-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpython_read_file.py
More file actions
40 lines (35 loc) · 1.24 KB
/
python_read_file.py
File metadata and controls
40 lines (35 loc) · 1.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from PyPDF2 import PdfFileReader
import sys
import re
def extract_information(pdf_path):
with open(pdf_path, 'rb') as f:
pdf = PdfFileReader(f)
number_of_pages = pdf.getNumPages()
for part in range(number_of_pages):
page = pdf.getPage(part)
text = page.extractText()
text_list = text.split("MJ Cena v Kč")
try:
to_cut = text_list[1]
except:
to_cut = text_list[0]
values = re.findall(r"([0-9]KS \D*)|([0-9].?[0-9]* KG \D*)", to_cut)
number_pieces = 0
only_name = ""
for part in values:
if part[0] != '':
number_pieces = re.search(r'\d*', part[0]).group()
only_name = part[0][4:-2]
print(only_name)
print(number_pieces)
else:
number_pieces = 1
only_name = re.sub(r"[0-9].?[0-9]* KG", r"", part[1])[1:-2]
print(only_name)
print(number_pieces)
return
if __name__ == '__main__':
number_arguments = len(sys.argv)
if number_arguments == 2:
path = sys.argv[1]
extract_information(path)