major: integrate ocr to pdf reader
This commit is contained in:
parent
0f6f76a66b
commit
d695e90c82
@ -4,6 +4,7 @@ from dotenv import load_dotenv
|
|||||||
basedir = os.path.abspath(os.path.dirname(__file__))
|
basedir = os.path.abspath(os.path.dirname(__file__))
|
||||||
load_dotenv(os.path.join(basedir, '.env'))
|
load_dotenv(os.path.join(basedir, '.env'))
|
||||||
|
|
||||||
|
os.environ["TESSDATA_PREFIX"] = os.path.join(basedir, 'tessdata')
|
||||||
class Config(object):
|
class Config(object):
|
||||||
SECRET_KEY = os.environ.get('SECRET_KEY') or "s0m37h!n6-obfu5c471ng"
|
SECRET_KEY = os.environ.get('SECRET_KEY') or "s0m37h!n6-obfu5c471ng"
|
||||||
SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL', '').replace(
|
SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL', '').replace(
|
||||||
|
|||||||
BIN
backend/configs/tessdata/ScrollView.jar
Normal file
BIN
backend/configs/tessdata/ScrollView.jar
Normal file
Binary file not shown.
1
backend/configs/tessdata/configs/alto
Normal file
1
backend/configs/tessdata/configs/alto
Normal file
@ -0,0 +1 @@
|
|||||||
|
tessedit_create_alto 1
|
||||||
7
backend/configs/tessdata/configs/ambigs.train
Normal file
7
backend/configs/tessdata/configs/ambigs.train
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
tessedit_ambigs_training 1
|
||||||
|
load_freq_dawg 0
|
||||||
|
load_punc_dawg 0
|
||||||
|
load_system_dawg 0
|
||||||
|
load_number_dawg 0
|
||||||
|
ambigs_debug_level 3
|
||||||
|
load_fixed_length_dawgs 0
|
||||||
1
backend/configs/tessdata/configs/api_config
Normal file
1
backend/configs/tessdata/configs/api_config
Normal file
@ -0,0 +1 @@
|
|||||||
|
tessedit_zero_rejection T
|
||||||
5
backend/configs/tessdata/configs/bigram
Normal file
5
backend/configs/tessdata/configs/bigram
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
load_bigram_dawg True
|
||||||
|
tessedit_enable_bigram_correction True
|
||||||
|
tessedit_bigram_debug 3
|
||||||
|
save_raw_choices True
|
||||||
|
save_alt_choices True
|
||||||
12
backend/configs/tessdata/configs/box.train
Normal file
12
backend/configs/tessdata/configs/box.train
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
disable_character_fragments T
|
||||||
|
file_type .bl
|
||||||
|
textord_fast_pitch_test T
|
||||||
|
tessedit_zero_rejection T
|
||||||
|
tessedit_minimal_rejection F
|
||||||
|
tessedit_write_rep_codes F
|
||||||
|
edges_children_fix F
|
||||||
|
edges_childarea 0.65
|
||||||
|
edges_boxarea 0.9
|
||||||
|
tessedit_resegment_from_boxes T
|
||||||
|
tessedit_train_from_boxes T
|
||||||
|
textord_no_rejects T
|
||||||
13
backend/configs/tessdata/configs/box.train.stderr
Normal file
13
backend/configs/tessdata/configs/box.train.stderr
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
file_type .bl
|
||||||
|
#tessedit_use_nn F
|
||||||
|
textord_fast_pitch_test T
|
||||||
|
tessedit_zero_rejection T
|
||||||
|
tessedit_minimal_rejection F
|
||||||
|
tessedit_write_rep_codes F
|
||||||
|
edges_children_fix F
|
||||||
|
edges_childarea 0.65
|
||||||
|
edges_boxarea 0.9
|
||||||
|
tessedit_resegment_from_boxes T
|
||||||
|
tessedit_train_from_boxes T
|
||||||
|
#textord_repeat_extraction F
|
||||||
|
textord_no_rejects T
|
||||||
1
backend/configs/tessdata/configs/digits
Normal file
1
backend/configs/tessdata/configs/digits
Normal file
@ -0,0 +1 @@
|
|||||||
|
tessedit_char_whitelist 0123456789-.
|
||||||
1
backend/configs/tessdata/configs/get.images
Normal file
1
backend/configs/tessdata/configs/get.images
Normal file
@ -0,0 +1 @@
|
|||||||
|
tessedit_write_images T
|
||||||
2
backend/configs/tessdata/configs/hocr
Normal file
2
backend/configs/tessdata/configs/hocr
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
tessedit_create_hocr 1
|
||||||
|
hocr_font_info 0
|
||||||
2
backend/configs/tessdata/configs/inter
Normal file
2
backend/configs/tessdata/configs/inter
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
interactive_display_mode T
|
||||||
|
tessedit_display_outwords T
|
||||||
4
backend/configs/tessdata/configs/kannada
Normal file
4
backend/configs/tessdata/configs/kannada
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
textord_skewsmooth_offset 8
|
||||||
|
textord_skewsmooth_offset2 8
|
||||||
|
textord_merge_desc 0.5
|
||||||
|
textord_no_rejects 1
|
||||||
2
backend/configs/tessdata/configs/linebox
Normal file
2
backend/configs/tessdata/configs/linebox
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
tessedit_resegment_from_line_boxes 1
|
||||||
|
tessedit_make_boxes_from_boxes 1
|
||||||
1
backend/configs/tessdata/configs/logfile
Normal file
1
backend/configs/tessdata/configs/logfile
Normal file
@ -0,0 +1 @@
|
|||||||
|
debug_file tesseract.log
|
||||||
11
backend/configs/tessdata/configs/lstm.train
Normal file
11
backend/configs/tessdata/configs/lstm.train
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
file_type .bl
|
||||||
|
textord_fast_pitch_test T
|
||||||
|
tessedit_zero_rejection T
|
||||||
|
tessedit_minimal_rejection F
|
||||||
|
tessedit_write_rep_codes F
|
||||||
|
edges_children_fix F
|
||||||
|
edges_childarea 0.65
|
||||||
|
edges_boxarea 0.9
|
||||||
|
tessedit_train_line_recognizer T
|
||||||
|
textord_no_rejects T
|
||||||
|
tessedit_init_config_only T
|
||||||
1
backend/configs/tessdata/configs/lstmbox
Normal file
1
backend/configs/tessdata/configs/lstmbox
Normal file
@ -0,0 +1 @@
|
|||||||
|
tessedit_create_lstmbox 1
|
||||||
4
backend/configs/tessdata/configs/lstmdebug
Normal file
4
backend/configs/tessdata/configs/lstmdebug
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
stopper_debug_level 1
|
||||||
|
classify_debug_level 1
|
||||||
|
segsearch_debug_level 1
|
||||||
|
language_model_debug_level 3
|
||||||
1
backend/configs/tessdata/configs/makebox
Normal file
1
backend/configs/tessdata/configs/makebox
Normal file
@ -0,0 +1 @@
|
|||||||
|
tessedit_create_boxfile 1
|
||||||
1
backend/configs/tessdata/configs/pdf
Normal file
1
backend/configs/tessdata/configs/pdf
Normal file
@ -0,0 +1 @@
|
|||||||
|
tessedit_create_pdf 1
|
||||||
1
backend/configs/tessdata/configs/quiet
Normal file
1
backend/configs/tessdata/configs/quiet
Normal file
@ -0,0 +1 @@
|
|||||||
|
debug_file /dev/null
|
||||||
2
backend/configs/tessdata/configs/rebox
Normal file
2
backend/configs/tessdata/configs/rebox
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
tessedit_resegment_from_boxes 1
|
||||||
|
tessedit_make_boxes_from_boxes 1
|
||||||
12
backend/configs/tessdata/configs/strokewidth
Normal file
12
backend/configs/tessdata/configs/strokewidth
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
textord_show_blobs 0
|
||||||
|
textord_debug_tabfind 3
|
||||||
|
textord_tabfind_show_partitions 1
|
||||||
|
textord_tabfind_show_initial_partitions 1
|
||||||
|
textord_tabfind_show_columns 1
|
||||||
|
textord_tabfind_show_blocks 1
|
||||||
|
textord_tabfind_show_initialtabs 1
|
||||||
|
textord_tabfind_show_finaltabs 1
|
||||||
|
textord_tabfind_show_strokewidths 1
|
||||||
|
textord_tabfind_show_vlines 0
|
||||||
|
textord_tabfind_show_images 1
|
||||||
|
tessedit_dump_pageseg_images 0
|
||||||
1
backend/configs/tessdata/configs/tsv
Normal file
1
backend/configs/tessdata/configs/tsv
Normal file
@ -0,0 +1 @@
|
|||||||
|
tessedit_create_tsv 1
|
||||||
3
backend/configs/tessdata/configs/txt
Normal file
3
backend/configs/tessdata/configs/txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# This config file should be used with other config files which create renderers.
|
||||||
|
# usage example: tesseract eurotext.tif eurotext txt hocr pdf
|
||||||
|
tessedit_create_txt 1
|
||||||
2
backend/configs/tessdata/configs/unlv
Normal file
2
backend/configs/tessdata/configs/unlv
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
tessedit_write_unlv 1
|
||||||
|
unlv_tilde_crunching T
|
||||||
1
backend/configs/tessdata/configs/wordstrbox
Normal file
1
backend/configs/tessdata/configs/wordstrbox
Normal file
@ -0,0 +1 @@
|
|||||||
|
tessedit_create_wordstrbox 1
|
||||||
BIN
backend/configs/tessdata/deu.traineddata.old
Normal file
BIN
backend/configs/tessdata/deu.traineddata.old
Normal file
Binary file not shown.
BIN
backend/configs/tessdata/eng.traineddata
Normal file
BIN
backend/configs/tessdata/eng.traineddata
Normal file
Binary file not shown.
2
backend/configs/tessdata/eng.user-patterns
Normal file
2
backend/configs/tessdata/eng.user-patterns
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
1-\d\d\d-GOOG-411
|
||||||
|
www.\n\\\*.com
|
||||||
5
backend/configs/tessdata/eng.user-words
Normal file
5
backend/configs/tessdata/eng.user-words
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
the
|
||||||
|
quick
|
||||||
|
brown
|
||||||
|
fox
|
||||||
|
jumped
|
||||||
BIN
backend/configs/tessdata/jaxb-api-2.3.1.jar
Normal file
BIN
backend/configs/tessdata/jaxb-api-2.3.1.jar
Normal file
Binary file not shown.
BIN
backend/configs/tessdata/osd.traineddata
Normal file
BIN
backend/configs/tessdata/osd.traineddata
Normal file
Binary file not shown.
BIN
backend/configs/tessdata/pdf.ttf
Normal file
BIN
backend/configs/tessdata/pdf.ttf
Normal file
Binary file not shown.
BIN
backend/configs/tessdata/piccolo2d-core-3.0.1.jar
Normal file
BIN
backend/configs/tessdata/piccolo2d-core-3.0.1.jar
Normal file
Binary file not shown.
BIN
backend/configs/tessdata/piccolo2d-extras-3.0.1.jar
Normal file
BIN
backend/configs/tessdata/piccolo2d-extras-3.0.1.jar
Normal file
Binary file not shown.
1
backend/configs/tessdata/tessconfigs/batch
Normal file
1
backend/configs/tessdata/tessconfigs/batch
Normal file
@ -0,0 +1 @@
|
|||||||
|
# No content needed as all defaults are correct.
|
||||||
2
backend/configs/tessdata/tessconfigs/batch.nochop
Normal file
2
backend/configs/tessdata/tessconfigs/batch.nochop
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
chop_enable 0
|
||||||
|
wordrec_enable_assoc 0
|
||||||
7
backend/configs/tessdata/tessconfigs/matdemo
Normal file
7
backend/configs/tessdata/tessconfigs/matdemo
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
#################################################
|
||||||
|
# Adaptive Matcher Using PreAdapted Templates
|
||||||
|
#################################################
|
||||||
|
|
||||||
|
classify_enable_adaptive_debugger 1
|
||||||
|
matcher_debug_flags 6
|
||||||
|
matcher_debug_level 1
|
||||||
12
backend/configs/tessdata/tessconfigs/msdemo
Normal file
12
backend/configs/tessdata/tessconfigs/msdemo
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#################################################
|
||||||
|
# Adaptive Matcher Using PreAdapted Templates
|
||||||
|
#################################################
|
||||||
|
|
||||||
|
classify_enable_adaptive_debugger 1
|
||||||
|
matcher_debug_flags 6
|
||||||
|
matcher_debug_level 1
|
||||||
|
|
||||||
|
wordrec_display_splits 0
|
||||||
|
wordrec_display_all_blobs 1
|
||||||
|
wordrec_display_segmentations 2
|
||||||
|
classify_debug_level 1
|
||||||
1
backend/configs/tessdata/tessconfigs/nobatch
Normal file
1
backend/configs/tessdata/tessconfigs/nobatch
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
9
backend/configs/tessdata/tessconfigs/segdemo
Normal file
9
backend/configs/tessdata/tessconfigs/segdemo
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
#################################################
|
||||||
|
# Adaptive Matcher Using PreAdapted Templates
|
||||||
|
#################################################
|
||||||
|
|
||||||
|
wordrec_display_splits 0
|
||||||
|
wordrec_display_all_blobs 1
|
||||||
|
wordrec_display_segmentations 2
|
||||||
|
classify_debug_level 1
|
||||||
|
stopper_debug_level 1
|
||||||
@ -0,0 +1,38 @@
|
|||||||
|
"""raise bonid digits
|
||||||
|
|
||||||
|
Revision ID: 926395732c3e
|
||||||
|
Revises: 2a64d3b9235a
|
||||||
|
Create Date: 2024-08-24 10:33:39.109944
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = '926395732c3e'
|
||||||
|
down_revision = '2a64d3b9235a'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table('receipt', schema=None) as batch_op:
|
||||||
|
batch_op.alter_column('bonid',
|
||||||
|
existing_type=sa.NUMERIC(precision=24, scale=0),
|
||||||
|
type_=sa.Numeric(precision=28, scale=0),
|
||||||
|
existing_nullable=True)
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table('receipt', schema=None) as batch_op:
|
||||||
|
batch_op.alter_column('bonid',
|
||||||
|
existing_type=sa.Numeric(precision=28, scale=0),
|
||||||
|
type_=sa.NUMERIC(precision=24, scale=0),
|
||||||
|
existing_nullable=True)
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
||||||
@ -4,7 +4,7 @@ from src import db
|
|||||||
class Receipt(db.Model):
|
class Receipt(db.Model):
|
||||||
id = db.Column(db.BigInteger, primary_key=True, autoincrement=True)
|
id = db.Column(db.BigInteger, primary_key=True, autoincrement=True)
|
||||||
date = db.Column(db.Date, nullable=False)
|
date = db.Column(db.Date, nullable=False)
|
||||||
bonid = db.Column(db.Numeric(precision=24, scale=0), unique=True)
|
bonid = db.Column(db.Numeric(precision=28, scale=0), unique=True)
|
||||||
from_user = db.Column(db.ForeignKey("login_token.token"),
|
from_user = db.Column(db.ForeignKey("login_token.token"),
|
||||||
server_onupdate=db.FetchedValue())
|
server_onupdate=db.FetchedValue())
|
||||||
registered = db.Column(db.Boolean, nullable=False,
|
registered = db.Column(db.Boolean, nullable=False,
|
||||||
|
|||||||
@ -40,7 +40,7 @@ def upload_receipt(establishment: int):
|
|||||||
db.session.commit()
|
db.session.commit()
|
||||||
if pdfReceipt:
|
if pdfReceipt:
|
||||||
rename(f"{PDFDir}/temp.pdf", f"{PDFDir}{secure_filename(f'{dbReceipt.id}.pdf')}")
|
rename(f"{PDFDir}/temp.pdf", f"{PDFDir}{secure_filename(f'{dbReceipt.id}.pdf')}")
|
||||||
LOGGER.debug(receipt.text)
|
LOGGER.debug(receipt.words)
|
||||||
return redirect(url_for("receipts.check_items.confirm_receipt_items", receipt_id = dbReceipt.id))
|
return redirect(url_for("receipts.check_items.confirm_receipt_items", receipt_id = dbReceipt.id))
|
||||||
else:
|
else:
|
||||||
LOGGER.debug(form.errors)
|
LOGGER.debug(form.errors)
|
||||||
|
|||||||
@ -0,0 +1,20 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
def getDictFromWords(words: list[tuple]):
|
||||||
|
results = {"items": []}
|
||||||
|
results['date'] = datetime.strptime(words[-6][4], "%d.%m.%y").date()
|
||||||
|
results['bonid'] = words[-1][4]
|
||||||
|
currentline = 0
|
||||||
|
skipwords = 14
|
||||||
|
for i, word in enumerate(words[skipwords:]):
|
||||||
|
if currentline != word[5]:
|
||||||
|
results['items'].append({"itemname": word[4]})
|
||||||
|
currentline = word[5]
|
||||||
|
elif word[6] == 0:
|
||||||
|
results['items'][-1]["itemname"] += " " + word[4]
|
||||||
|
if word[6] == 1 and word[7] == 1:
|
||||||
|
results['items'][-1]["price"] = word[4].split("*")[0]
|
||||||
|
if "----" in word[4]:
|
||||||
|
del(results['items'][-1])
|
||||||
|
break
|
||||||
|
return results
|
||||||
@ -0,0 +1,27 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
def getDictFromWords(words: list[tuple]):
|
||||||
|
results = {"items": []}
|
||||||
|
results['bonid'] = words[-1][4]
|
||||||
|
currentline = 0
|
||||||
|
skipwords = 9
|
||||||
|
for i, word in enumerate(words[skipwords:]):
|
||||||
|
if currentline != word[5]:
|
||||||
|
results['items'].append({"itemname": word[4]})
|
||||||
|
currentline = word[5]
|
||||||
|
elif word[6] == 0:
|
||||||
|
results['items'][-1]["itemname"] += " " + word[4]
|
||||||
|
if word[6] == 1 and word[7] == 0:
|
||||||
|
if word[4].lower() == "x":
|
||||||
|
results['items'][-1]["amount"] = words[i+skipwords+1][4]
|
||||||
|
else:
|
||||||
|
results['items'][-1]["price"] = word[4]
|
||||||
|
elif word[6] == 2:
|
||||||
|
results['items'][-1]["price"] = word[4]
|
||||||
|
if word[4].lower() == "gesamt":
|
||||||
|
del(results['items'][-1])
|
||||||
|
break
|
||||||
|
for i, word in enumerate(words[::-1]):
|
||||||
|
if word[4].lower() == "datum:":
|
||||||
|
results['date'] = datetime.strptime(words[::-1][i-1][4], "%d.%m.%Y").date()
|
||||||
|
return results
|
||||||
@ -1,5 +1,7 @@
|
|||||||
import fitz
|
import fitz
|
||||||
from datetime import datetime, date
|
from datetime import datetime, date
|
||||||
|
from .edeka.edeka_parser import getDictFromWords as edekaparser
|
||||||
|
from .kaufland.kaufland_parser import getDictFromWords as kauflandparser
|
||||||
from re import search
|
from re import search
|
||||||
|
|
||||||
class PDFReceipt:
|
class PDFReceipt:
|
||||||
@ -10,22 +12,29 @@ class PDFReceipt:
|
|||||||
parser -- A keyword in lowercase to tell how the receipt is formated.
|
parser -- A keyword in lowercase to tell how the receipt is formated.
|
||||||
Currently supported: 'edeka'
|
Currently supported: 'edeka'
|
||||||
"""
|
"""
|
||||||
def __init__(self, bPDFFile, parser: str = "edeka") -> None:
|
def __init__(self, strPDFFile) -> None:
|
||||||
try:
|
try:
|
||||||
self.text = PDFReceipt._getTextFromPDF(bPDFFile)
|
self.words = PDFReceipt._getWordsFromPDF(strPDFFile)
|
||||||
self.id, self.date, self.items = PDFReceipt._getInfosFromText(self.text, parser)
|
storename = PDFReceipt._getStoreName(self.words)
|
||||||
|
self.id, self.date, self.items = PDFReceipt._getInfosFromText(self.words, store = storename)
|
||||||
except:
|
except:
|
||||||
self.text = "PDF konnte nicht geladen werden."
|
self.words = "PDF konnte nicht geladen werden."
|
||||||
self.date = date.today()
|
self.date = date.today()
|
||||||
self.id = None
|
self.id = None
|
||||||
self.items = []
|
self.items = []
|
||||||
|
|
||||||
def _getTextFromPDF(file):
|
def _getWordsFromPDF(file):
|
||||||
with fitz.open(file, filetype="pdf") as doc:
|
with fitz.open(file, filetype="pdf") as doc:
|
||||||
text = ""
|
words = []
|
||||||
for page in doc:
|
for page in doc:
|
||||||
text += page.get_text()
|
words.extend(page.get_text("words", textpage=page.get_textpage_ocr(), sort=True))
|
||||||
return text.strip()
|
return words
|
||||||
|
|
||||||
|
def _getStoreName(words: list[tuple]) -> str:
|
||||||
|
for word in words:
|
||||||
|
if word[4].lower() in ("edeka", "kaufland"):
|
||||||
|
return word[4].lower()
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
def _getItemsTextFromText(text, start="", end=""):
|
def _getItemsTextFromText(text, start="", end=""):
|
||||||
return text[text.index(start)+len(start):text.index(end)].strip()
|
return text[text.index(start)+len(start):text.index(end)].strip()
|
||||||
@ -43,21 +52,23 @@ class PDFReceipt:
|
|||||||
i += 2
|
i += 2
|
||||||
return resultsArr
|
return resultsArr
|
||||||
|
|
||||||
def _getInfosFromText(text: str, parser: str = "edeka"):
|
def _getInfosFromText(words: str, store: str = "edeka"):
|
||||||
if parser.lower() == "edeka":
|
if store == "edeka":
|
||||||
items = PDFReceipt._convertItemsTextToDict(PDFReceipt._getItemsTextFromText(text, start="EUR", end="----------"))
|
result = edekaparser(words)
|
||||||
strDate = text.split("\n")[-1].split(" ")[0]
|
elif store == "kaufland":
|
||||||
date = datetime.strptime(strDate, "%d.%m.%y").date()
|
result = kauflandparser(words)
|
||||||
strReceiptNumber = text.split("\n")[-1].split(" ")[-1]
|
items = result.get("items")
|
||||||
|
date = result.get("date")
|
||||||
|
strReceiptNumber = result.get("bonid")
|
||||||
try:
|
try:
|
||||||
intReceiptNumber = int(strReceiptNumber)
|
intReceiptNumber = int(strReceiptNumber)
|
||||||
except:
|
except:
|
||||||
raise ValueError("Receipt Number not an integer.")
|
raise ValueError("Receipt Number not an integer.")
|
||||||
return (intReceiptNumber, date, items)
|
return (intReceiptNumber, date, items)
|
||||||
|
|
||||||
def getPDFReceiptFromFile(strPDFFile: str, parser: str = "edeka"):
|
def getPDFReceiptFromFile(strPDFFile: str):
|
||||||
try:
|
try:
|
||||||
with open(strPDFFile) as doc:
|
with open(strPDFFile) as doc:
|
||||||
return PDFReceipt(doc, parser)
|
return PDFReceipt(doc)
|
||||||
except FileNotFoundError as e:
|
except FileNotFoundError as e:
|
||||||
return PDFReceipt(None)
|
return PDFReceipt(None)
|
||||||
Loading…
x
Reference in New Issue
Block a user