Compare commits

..

9 Commits

Author SHA1 Message Date
f24880c09a fix: raise user password char length (#18)
All checks were successful
Create Release / release (push) Successful in 28s
Create Release / docker (push) Successful in 8m55s
Co-authored-by: Lunaresk <lingsonb@gmail.com>
Reviewed-on: #18
2025-06-03 21:39:00 +02:00
3622cbca05 fix: workflow login
All checks were successful
Create Release / release (push) Successful in 21s
Create Release / docker (push) Successful in 8m43s
2025-04-09 01:29:54 +02:00
637a5f7ad5 fix: stylesheet
Some checks failed
Create Release / release (push) Successful in 2m1s
Create Release / docker (push) Failing after 8m22s
2025-04-09 00:50:15 +02:00
15ba3c060a Workflow Action for Docker Repo (#2)
Reviewed-on: #2
2024-11-03 17:57:37 +00:00
bde9a50767 fix: update dockerfile 2024-11-03 15:47:29 +01:00
60e221146a Actions Workflow Fix 2024-11-03 10:51:25 +01:00
d34f2adb2f fix: correct OCR language 2024-11-02 23:46:02 +01:00
a6f229e7fb minor: set folder for receipts 2024-08-26 19:43:47 +02:00
5a453a140e major: integrate ocr to pdf reader 2024-08-25 22:19:53 +02:00
58 changed files with 339 additions and 61 deletions

View File

@ -4,19 +4,47 @@ on:
tags: tags:
- '*' - '*'
jobs: jobs:
build: release:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@master - uses: https://gitea.com/actions/checkout@master
- name: Archive Server - name: Zip Artifacts
uses: thedoctor0/zip-release@master uses: https://github.com/thedoctor0/zip-release@master
with: with:
type: 'zip' type: 'zip'
filename: 'server.zip' filename: 'server.zip'
exclusions: '*.git*' exclusions: '*.git*'
- name: Release Archive - name: Release Archive
uses: ncipollo/release-action@v1 uses: https://gitea.com/actions/gitea-release-action@v1
with: with:
allowUpdates: true server_url: https://gitea.wpgcommunity.net
artifacts: "server.zip" files: 'server.zip'
token: ${{ secrets.GITHUB_TOKEN }} docker:
runs-on: ubuntu-latest
steps:
- uses: https://gitea.com/actions/checkout@master
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
${{ vars.DOCKER_REPO }}/costhive
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
config-inline: |
[registry."${{ vars.DOCKER_REPO }}"]
http = true
insecure = true
- name: Login to Gitea Container Registry
uses: docker/login-action@v3
with:
registry: ${{ vars.DOCKER_REPO }}
username: ${{ vars.DOCKER_USER }}
password: ${{ secrets.DOCKER_PASS }}
- name: Build and push
uses: https://github.com/docker/build-push-action@v6
with:
push: true
context: .
tags: ${{ steps.meta.outputs.tags }}

View File

@ -1,5 +1,5 @@
FROM python@sha256:21c9f0b22213295a13bd678c5b45aa587ff6cb01cd99b6cf0e6928f4c777006b FROM python@sha256:c66cf219ac0083a9af2ff90e16530f16cd503c59eb7909feb3b8f3524dc1a87e
# python:3.11.4-slim-bullseye (arm/v7) # python:3.12.2-slim-bullseye (amd64)
RUN useradd costhive RUN useradd costhive
WORKDIR /home/costhive WORKDIR /home/costhive
@ -16,7 +16,7 @@ RUN python -m venv venv; \
COPY backend backend COPY backend backend
ENV FLASK_APP run.py ENV FLASK_APP=run.py
RUN chmod +x boot.sh; \ RUN chmod +x boot.sh; \
chown -R costhive:costhive . chown -R costhive:costhive .

View File

@ -4,6 +4,7 @@ from dotenv import load_dotenv
basedir = os.path.abspath(os.path.dirname(__file__)) basedir = os.path.abspath(os.path.dirname(__file__))
load_dotenv(os.path.join(basedir, '.env')) load_dotenv(os.path.join(basedir, '.env'))
os.environ["TESSDATA_PREFIX"] = os.path.join(basedir, 'tessdata')
class Config(object): class Config(object):
SECRET_KEY = os.environ.get('SECRET_KEY') or "s0m37h!n6-obfu5c471ng" SECRET_KEY = os.environ.get('SECRET_KEY') or "s0m37h!n6-obfu5c471ng"
SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL', '').replace( SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL', '').replace(
@ -19,3 +20,4 @@ class Config(object):
MAIL_PASSWORD = os.environ.get('MAIL_PASSWORD') MAIL_PASSWORD = os.environ.get('MAIL_PASSWORD')
ADMINS = ['postmaster@wpgcommunity.net'] ADMINS = ['postmaster@wpgcommunity.net']
POSTS_PER_PAGE = 15 POSTS_PER_PAGE = 15
RECEIPT_FOLDER = f"{basedir}/../PDFReceipts"

Binary file not shown.

View File

@ -0,0 +1 @@
tessedit_create_alto 1

View File

@ -0,0 +1,7 @@
tessedit_ambigs_training 1
load_freq_dawg 0
load_punc_dawg 0
load_system_dawg 0
load_number_dawg 0
ambigs_debug_level 3
load_fixed_length_dawgs 0

View File

@ -0,0 +1 @@
tessedit_zero_rejection T

View File

@ -0,0 +1,5 @@
load_bigram_dawg True
tessedit_enable_bigram_correction True
tessedit_bigram_debug 3
save_raw_choices True
save_alt_choices True

View File

@ -0,0 +1,12 @@
disable_character_fragments T
file_type .bl
textord_fast_pitch_test T
tessedit_zero_rejection T
tessedit_minimal_rejection F
tessedit_write_rep_codes F
edges_children_fix F
edges_childarea 0.65
edges_boxarea 0.9
tessedit_resegment_from_boxes T
tessedit_train_from_boxes T
textord_no_rejects T

View File

@ -0,0 +1,13 @@
file_type .bl
#tessedit_use_nn F
textord_fast_pitch_test T
tessedit_zero_rejection T
tessedit_minimal_rejection F
tessedit_write_rep_codes F
edges_children_fix F
edges_childarea 0.65
edges_boxarea 0.9
tessedit_resegment_from_boxes T
tessedit_train_from_boxes T
#textord_repeat_extraction F
textord_no_rejects T

View File

@ -0,0 +1 @@
tessedit_char_whitelist 0123456789-.

View File

@ -0,0 +1 @@
tessedit_write_images T

View File

@ -0,0 +1,2 @@
tessedit_create_hocr 1
hocr_font_info 0

View File

@ -0,0 +1,2 @@
interactive_display_mode T
tessedit_display_outwords T

View File

@ -0,0 +1,4 @@
textord_skewsmooth_offset 8
textord_skewsmooth_offset2 8
textord_merge_desc 0.5
textord_no_rejects 1

View File

@ -0,0 +1,2 @@
tessedit_resegment_from_line_boxes 1
tessedit_make_boxes_from_boxes 1

View File

@ -0,0 +1 @@
debug_file tesseract.log

View File

@ -0,0 +1,11 @@
file_type .bl
textord_fast_pitch_test T
tessedit_zero_rejection T
tessedit_minimal_rejection F
tessedit_write_rep_codes F
edges_children_fix F
edges_childarea 0.65
edges_boxarea 0.9
tessedit_train_line_recognizer T
textord_no_rejects T
tessedit_init_config_only T

View File

@ -0,0 +1 @@
tessedit_create_lstmbox 1

View File

@ -0,0 +1,4 @@
stopper_debug_level 1
classify_debug_level 1
segsearch_debug_level 1
language_model_debug_level 3

View File

@ -0,0 +1 @@
tessedit_create_boxfile 1

View File

@ -0,0 +1 @@
tessedit_create_pdf 1

View File

@ -0,0 +1 @@
debug_file /dev/null

View File

@ -0,0 +1,2 @@
tessedit_resegment_from_boxes 1
tessedit_make_boxes_from_boxes 1

View File

@ -0,0 +1,12 @@
textord_show_blobs 0
textord_debug_tabfind 3
textord_tabfind_show_partitions 1
textord_tabfind_show_initial_partitions 1
textord_tabfind_show_columns 1
textord_tabfind_show_blocks 1
textord_tabfind_show_initialtabs 1
textord_tabfind_show_finaltabs 1
textord_tabfind_show_strokewidths 1
textord_tabfind_show_vlines 0
textord_tabfind_show_images 1
tessedit_dump_pageseg_images 0

View File

@ -0,0 +1 @@
tessedit_create_tsv 1

View File

@ -0,0 +1,3 @@
# This config file should be used with other config files which create renderers.
# usage example: tesseract eurotext.tif eurotext txt hocr pdf
tessedit_create_txt 1

View File

@ -0,0 +1,2 @@
tessedit_write_unlv 1
unlv_tilde_crunching T

View File

@ -0,0 +1 @@
tessedit_create_wordstrbox 1

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,2 @@
1-\d\d\d-GOOG-411
www.\n\\\*.com

View File

@ -0,0 +1,5 @@
the
quick
brown
fox
jumped

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
# No content needed as all defaults are correct.

View File

@ -0,0 +1,2 @@
chop_enable 0
wordrec_enable_assoc 0

View File

@ -0,0 +1,7 @@
#################################################
# Adaptive Matcher Using PreAdapted Templates
#################################################
classify_enable_adaptive_debugger 1
matcher_debug_flags 6
matcher_debug_level 1

View File

@ -0,0 +1,12 @@
#################################################
# Adaptive Matcher Using PreAdapted Templates
#################################################
classify_enable_adaptive_debugger 1
matcher_debug_flags 6
matcher_debug_level 1
wordrec_display_splits 0
wordrec_display_all_blobs 1
wordrec_display_segmentations 2
classify_debug_level 1

View File

@ -0,0 +1 @@

View File

@ -0,0 +1,9 @@
#################################################
# Adaptive Matcher Using PreAdapted Templates
#################################################
wordrec_display_splits 0
wordrec_display_all_blobs 1
wordrec_display_segmentations 2
classify_debug_level 1
stopper_debug_level 1

View File

@ -0,0 +1,38 @@
"""raise password char length
Revision ID: 782a2409df41
Revises: 926395732c3e
Create Date: 2025-06-03 21:01:23.169897
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '782a2409df41'
down_revision = '926395732c3e'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('user', schema=None) as batch_op:
batch_op.alter_column('password_hash',
existing_type=sa.VARCHAR(length=128),
type_=sa.String(length=255),
existing_nullable=False)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('user', schema=None) as batch_op:
batch_op.alter_column('password_hash',
existing_type=sa.String(length=255),
type_=sa.VARCHAR(length=128),
existing_nullable=False)
# ### end Alembic commands ###

View File

@ -0,0 +1,38 @@
"""raise bonid digits
Revision ID: 926395732c3e
Revises: 2a64d3b9235a
Create Date: 2024-08-24 10:33:39.109944
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '926395732c3e'
down_revision = '2a64d3b9235a'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('receipt', schema=None) as batch_op:
batch_op.alter_column('bonid',
existing_type=sa.NUMERIC(precision=24, scale=0),
type_=sa.Numeric(precision=28, scale=0),
existing_nullable=True)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('receipt', schema=None) as batch_op:
batch_op.alter_column('bonid',
existing_type=sa.Numeric(precision=28, scale=0),
type_=sa.NUMERIC(precision=24, scale=0),
existing_nullable=True)
# ### end Alembic commands ###

View File

@ -4,7 +4,7 @@ from src import db
class Receipt(db.Model): class Receipt(db.Model):
id = db.Column(db.BigInteger, primary_key=True, autoincrement=True) id = db.Column(db.BigInteger, primary_key=True, autoincrement=True)
date = db.Column(db.Date, nullable=False) date = db.Column(db.Date, nullable=False)
bonid = db.Column(db.Numeric(precision=24, scale=0), unique=True) bonid = db.Column(db.Numeric(precision=28, scale=0), unique=True)
from_user = db.Column(db.ForeignKey("login_token.token"), from_user = db.Column(db.ForeignKey("login_token.token"),
server_onupdate=db.FetchedValue()) server_onupdate=db.FetchedValue())
registered = db.Column(db.Boolean, nullable=False, registered = db.Column(db.Boolean, nullable=False,

View File

@ -9,7 +9,7 @@ from werkzeug.security import generate_password_hash, check_password_hash
class User(UserMixin, db.Model): class User(UserMixin, db.Model):
id = db.Column(db.BigInteger, primary_key=True, autoincrement=True) id = db.Column(db.BigInteger, primary_key=True, autoincrement=True)
email = db.Column(db.String(255), nullable=False, unique=True) email = db.Column(db.String(255), nullable=False, unique=True)
password_hash = db.Column(db.String(128), nullable=False) password_hash = db.Column(db.String(255), nullable=False)
LoginToken = db.relationship("LoginToken", backref='User', lazy='dynamic') LoginToken = db.relationship("LoginToken", backref='User', lazy='dynamic')
Bought = db.relationship("Bought", secondary="login_token", Bought = db.relationship("Bought", secondary="login_token",

View File

@ -38,6 +38,8 @@ migrate = Migrate(transaction_per_migration=True)
def create_app(config_class=Config): def create_app(config_class=Config):
if not exists(config_class.RECEIPT_FOLDER):
makedirs(config_class.RECEIPT_FOLDER)
app = Flask(__name__, template_folder="../web/templates", static_folder="../web/static") app = Flask(__name__, template_folder="../web/templates", static_folder="../web/static")
app.config.from_object(config_class) app.config.from_object(config_class)
bootstrap.init_app(app) bootstrap.init_app(app)

View File

@ -10,5 +10,4 @@ def show_item(item: int):
itemschema = ItemSchema().dump(itemobj) itemschema = ItemSchema().dump(itemobj)
itemschema['PriceChange'].sort(key=lambda d: d['date'], reverse=True) itemschema['PriceChange'].sort(key=lambda d: d['date'], reverse=True)
itemschema['AmountChange'].sort(key=lambda d: d['date'], reverse=True) itemschema['AmountChange'].sort(key=lambda d: d['date'], reverse=True)
print(itemschema)
return render_template('item/details/show_item.html', item = itemschema) return render_template('item/details/show_item.html', item = itemschema)

View File

@ -73,6 +73,4 @@ class CheckItemsForm(FlaskForm):
item['itemname'], item['price'], item['amount'] if 'amount' in item else 1, 0)) item['itemname'], item['price'], item['amount'] if 'amount' in item else 1, 0))
check_items = CheckItems(check_items_entry) check_items = CheckItems(check_items_entry)
form = cls(obj=check_items) form = cls(obj=check_items)
print(f"{form.items.entries}")
return form return form

View File

@ -1,5 +1,5 @@
from datetime import date from datetime import date
from flask import abort, request, url_for from flask import abort, current_app, request, url_for
from flask_login import current_user, login_required from flask_login import current_user, login_required
from . import bp from . import bp
from .forms import CheckCustomItemsEntryForm, CheckItemsEntryForm, CheckItemsForm, get_choices from .forms import CheckCustomItemsEntryForm, CheckItemsEntryForm, CheckItemsForm, get_choices
@ -9,16 +9,16 @@ from models import AmountChange, Item, LoginToken, PriceChange, Receipt, Receipt
from src.utils.modules.receipt_parser.pdf_receipt_parser import PDFReceipt from src.utils.modules.receipt_parser.pdf_receipt_parser import PDFReceipt
from src.utils.routes_utils import render_custom_template as render_template from src.utils.routes_utils import render_custom_template as render_template
PDFDir = "./"
@bp.route('/<int:receipt_id>', methods=['GET', 'POST']) @bp.route('/<int:receipt_id>', methods=['GET', 'POST'])
@login_required @login_required
def confirm_receipt_items(receipt_id: int): def confirm_receipt_items(receipt_id: int):
"""Check items from a receipt if they should be accounted for payment. """Check items from a receipt if they should be accounted for payment.
Get those items from the receipt PDF itself.""" Get those items from the receipt PDF itself."""
PDFDir: str = current_app.config["RECEIPT_FOLDER"]
receipt_details: Receipt = Receipt.query.get(receipt_id) receipt_details: Receipt = Receipt.query.get(receipt_id)
if current_user.is_authenticated and current_user.id == receipt_details.LoginToken.Establishment.owner: if current_user.is_authenticated and current_user.id == receipt_details.LoginToken.Establishment.owner:
receipt: PDFReceipt = PDFReceipt.getPDFReceiptFromFile(PDFDir + f"{receipt_details.id}.pdf") receipt: PDFReceipt = PDFReceipt.getPDFReceiptFromFile(PDFDir + f"/{receipt_details.id}.pdf")
form: CheckItemsForm = CheckItemsForm.new(receipt.items) form: CheckItemsForm = CheckItemsForm.new(receipt.items)
_template = CheckCustomItemsEntryForm(prefix="custom_items-_-") _template = CheckCustomItemsEntryForm(prefix="custom_items-_-")
# TODO: Precheck if items are already in database. If yes, check if item is present only once or multiple # TODO: Precheck if items are already in database. If yes, check if item is present only once or multiple

View File

@ -1,5 +1,5 @@
from flask_wtf import FlaskForm from flask_wtf import FlaskForm
from flask_wtf.file import FileAllowed, FileField, FileRequired from flask_wtf.file import FileAllowed, FileField
from wtforms import DateField, SelectField, SubmitField from wtforms import DateField, SelectField, SubmitField
from models import Establishment from models import Establishment

View File

@ -1,4 +1,4 @@
from flask import abort, redirect, request, url_for from flask import abort, current_app, redirect, request, url_for
from flask_login import current_user, login_required from flask_login import current_user, login_required
from os import rename from os import rename
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
@ -10,11 +10,11 @@ from models.login_token import LoginToken
from src.utils.modules.receipt_parser.pdf_receipt_parser import PDFReceipt from src.utils.modules.receipt_parser.pdf_receipt_parser import PDFReceipt
from src.utils.routes_utils import render_custom_template as render_template from src.utils.routes_utils import render_custom_template as render_template
PDFDir = "./"
@bp.route('/<int:establishment>', methods=['GET', 'POST']) @bp.route('/<int:establishment>', methods=['GET', 'POST'])
@login_required @login_required
def upload_receipt(establishment: int): def upload_receipt(establishment: int):
"""Upload of a receipt.""" """Upload of a receipt."""
PDFDir: str = current_app.config["RECEIPT_FOLDER"]
if current_user.is_anonymous: if current_user.is_anonymous:
abort(403) abort(403)
if LoginToken.query.filter_by(establishment=establishment, user=current_user.id).first(): if LoginToken.query.filter_by(establishment=establishment, user=current_user.id).first():
@ -39,8 +39,8 @@ def upload_receipt(establishment: int):
db.session.add(dbReceipt) db.session.add(dbReceipt)
db.session.commit() db.session.commit()
if pdfReceipt: if pdfReceipt:
rename(f"{PDFDir}/temp.pdf", f"{PDFDir}{secure_filename(f'{dbReceipt.id}.pdf')}") rename(f"{PDFDir}/temp.pdf", f"{PDFDir}/{secure_filename(f'{dbReceipt.id}.pdf')}")
LOGGER.debug(receipt.text) LOGGER.debug(receipt.words)
return redirect(url_for("receipts.check_items.confirm_receipt_items", receipt_id = dbReceipt.id)) return redirect(url_for("receipts.check_items.confirm_receipt_items", receipt_id = dbReceipt.id))
else: else:
LOGGER.debug(form.errors) LOGGER.debug(form.errors)

View File

@ -0,0 +1,20 @@
from datetime import datetime
def getDictFromWords(words: list[tuple]):
results = {"items": []}
results['date'] = datetime.strptime(words[-6][4], "%d.%m.%y").date()
results['bonid'] = words[-1][4]
currentline = 0
skipwords = 14
for i, word in enumerate(words[skipwords:]):
if currentline != word[5]:
results['items'].append({"itemname": word[4]})
currentline = word[5]
elif word[6] == 0:
results['items'][-1]["itemname"] += " " + word[4]
if word[6] == 1 and word[7] == 1:
results['items'][-1]["price"] = word[4].split("*")[0]
if "----" in word[4]:
del(results['items'][-1])
break
return results

View File

@ -0,0 +1,27 @@
from datetime import datetime
def getDictFromWords(words: list[tuple]):
results = {"items": []}
results['bonid'] = words[-1][4]
currentline = 0
skipwords = 9
for i, word in enumerate(words[skipwords:]):
if currentline != word[5]:
results['items'].append({"itemname": word[4]})
currentline = word[5]
elif word[6] == 0:
results['items'][-1]["itemname"] += " " + word[4]
if word[6] == 1 and word[7] == 0:
if word[4].lower() == "x":
results['items'][-1]["amount"] = words[i+skipwords+1][4]
else:
results['items'][-1]["price"] = word[4]
elif word[6] == 2:
results['items'][-1]["price"] = word[4]
if word[4].lower() == "gesamt":
del(results['items'][-1])
break
for i, word in enumerate(words[::-1]):
if word[4].lower() == "datum:":
results['date'] = datetime.strptime(words[::-1][i-1][4], "%d.%m.%Y").date()
return results

View File

@ -1,5 +1,7 @@
import fitz import fitz
from datetime import datetime, date from datetime import date
from .edeka.edeka_parser import getDictFromWords as edekaparser
from .kaufland.kaufland_parser import getDictFromWords as kauflandparser
from re import search from re import search
class PDFReceipt: class PDFReceipt:
@ -10,54 +12,47 @@ class PDFReceipt:
parser -- A keyword in lowercase to tell how the receipt is formated. parser -- A keyword in lowercase to tell how the receipt is formated.
Currently supported: 'edeka' Currently supported: 'edeka'
""" """
def __init__(self, bPDFFile, parser: str = "edeka") -> None: def __init__(self, strPDFFile) -> None:
try: try:
self.text = PDFReceipt._getTextFromPDF(bPDFFile) self.words = PDFReceipt._getWordsFromPDF(strPDFFile)
self.id, self.date, self.items = PDFReceipt._getInfosFromText(self.text, parser) storename = PDFReceipt._getStoreName(self.words)
self.id, self.date, self.items = PDFReceipt._getInfosFromText(self.words, store = storename)
except: except:
self.text = "PDF konnte nicht geladen werden." self.words = "PDF konnte nicht geladen werden."
self.date = date.today() self.date = date.today()
self.id = None self.id = None
self.items = [] self.items = []
def _getTextFromPDF(file): def _getWordsFromPDF(file):
with fitz.open(file, filetype="pdf") as doc: with fitz.open(file, filetype="pdf") as doc:
text = "" words = []
for page in doc: for page in doc:
text += page.get_text() words.extend(page.get_text("words", textpage=page.get_textpage_ocr(language = 'deu'), sort=True))
return text.strip() return words
def _getStoreName(words: list[tuple]) -> str:
for word in words:
if word[4].lower() in ("edeka", "kaufland"):
return word[4].lower()
return "unknown"
def _getItemsTextFromText(text, start="", end=""): def _getInfosFromText(words: str, store: str = "edeka"):
return text[text.index(start)+len(start):text.index(end)].strip() if store == "edeka":
result = edekaparser(words)
def _convertItemsTextToDict(text): elif store == "kaufland":
temp = text.split("\n") result = kauflandparser(words)
resultsArr = [] items = result.get("items")
i = 0 date = result.get("date")
while i < len(temp): strReceiptNumber = result.get("bonid")
if search("(\d+) x", temp[i]): try:
resultsArr.append({"itemname": temp[i+2], "price": temp[i+1], "amount": temp[i][:-2]}) intReceiptNumber = int(strReceiptNumber)
i += 4 except:
else: raise ValueError("Receipt Number not an integer.")
resultsArr.append({"itemname": temp[i], "price": temp[i+1][:-2]})
i += 2
return resultsArr
def _getInfosFromText(text: str, parser: str = "edeka"):
if parser.lower() == "edeka":
items = PDFReceipt._convertItemsTextToDict(PDFReceipt._getItemsTextFromText(text, start="EUR", end="----------"))
strDate = text.split("\n")[-1].split(" ")[0]
date = datetime.strptime(strDate, "%d.%m.%y").date()
strReceiptNumber = text.split("\n")[-1].split(" ")[-1]
try:
intReceiptNumber = int(strReceiptNumber)
except:
raise ValueError("Receipt Number not an integer.")
return (intReceiptNumber, date, items) return (intReceiptNumber, date, items)
def getPDFReceiptFromFile(strPDFFile: str, parser: str = "edeka"): def getPDFReceiptFromFile(strPDFFile: str):
try: try:
with open(strPDFFile) as doc: with open(strPDFFile) as doc:
return PDFReceipt(doc, parser) return PDFReceipt(doc)
except FileNotFoundError as e: except FileNotFoundError as e:
return PDFReceipt(None) return PDFReceipt(None)