Compare commits

..

No commits in common. "main" and "v0.15.2" have entirely different histories.

65 changed files with 110 additions and 350 deletions

View File

@ -4,47 +4,19 @@ on:
tags: tags:
- '*' - '*'
jobs: jobs:
release: build:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: https://gitea.com/actions/checkout@master - uses: actions/checkout@master
- name: Zip Artifacts - name: Archive Server
uses: https://github.com/thedoctor0/zip-release@master uses: thedoctor0/zip-release@master
with: with:
type: 'zip' type: 'zip'
filename: 'server.zip' filename: 'server.zip'
exclusions: '*.git*' exclusions: '*.git*'
- name: Release Archive - name: Release Archive
uses: https://gitea.com/actions/gitea-release-action@v1 uses: ncipollo/release-action@v1
with: with:
server_url: https://gitea.wpgcommunity.net allowUpdates: true
files: 'server.zip' artifacts: "server.zip"
docker: token: ${{ secrets.GITHUB_TOKEN }}
runs-on: ubuntu-latest
steps:
- uses: https://gitea.com/actions/checkout@master
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
${{ vars.DOCKER_REPO }}/costhive
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
config-inline: |
[registry."${{ vars.DOCKER_REPO }}"]
http = true
insecure = true
- name: Login to Gitea Container Registry
uses: docker/login-action@v3
with:
registry: ${{ vars.DOCKER_REPO }}
username: ${{ vars.DOCKER_USER }}
password: ${{ secrets.DOCKER_PASS }}
- name: Build and push
uses: https://github.com/docker/build-push-action@v6
with:
push: true
context: .
tags: ${{ steps.meta.outputs.tags }}

1
.gitignore vendored
View File

@ -355,4 +355,3 @@ tests*
*.backup *.backup
Dockerfile.* Dockerfile.*
docker-compose.* docker-compose.*
*debug.py

View File

@ -1,5 +1,5 @@
FROM python@sha256:c66cf219ac0083a9af2ff90e16530f16cd503c59eb7909feb3b8f3524dc1a87e FROM python@sha256:21c9f0b22213295a13bd678c5b45aa587ff6cb01cd99b6cf0e6928f4c777006b
# python:3.12.2-slim-bullseye (amd64) # python:3.11.4-slim-bullseye (arm/v7)
RUN useradd costhive RUN useradd costhive
WORKDIR /home/costhive WORKDIR /home/costhive
@ -16,7 +16,7 @@ RUN python -m venv venv; \
COPY backend backend COPY backend backend
ENV FLASK_APP=run.py ENV FLASK_APP run.py
RUN chmod +x boot.sh; \ RUN chmod +x boot.sh; \
chown -R costhive:costhive . chown -R costhive:costhive .

View File

@ -4,7 +4,6 @@ from dotenv import load_dotenv
basedir = os.path.abspath(os.path.dirname(__file__)) basedir = os.path.abspath(os.path.dirname(__file__))
load_dotenv(os.path.join(basedir, '.env')) load_dotenv(os.path.join(basedir, '.env'))
os.environ["TESSDATA_PREFIX"] = os.path.join(basedir, 'tessdata')
class Config(object): class Config(object):
SECRET_KEY = os.environ.get('SECRET_KEY') or "s0m37h!n6-obfu5c471ng" SECRET_KEY = os.environ.get('SECRET_KEY') or "s0m37h!n6-obfu5c471ng"
SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL', '').replace( SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL', '').replace(
@ -20,4 +19,3 @@ class Config(object):
MAIL_PASSWORD = os.environ.get('MAIL_PASSWORD') MAIL_PASSWORD = os.environ.get('MAIL_PASSWORD')
ADMINS = ['postmaster@wpgcommunity.net'] ADMINS = ['postmaster@wpgcommunity.net']
POSTS_PER_PAGE = 15 POSTS_PER_PAGE = 15
RECEIPT_FOLDER = f"{basedir}/../PDFReceipts"

View File

@ -1 +0,0 @@
tessedit_create_alto 1

View File

@ -1,7 +0,0 @@
tessedit_ambigs_training 1
load_freq_dawg 0
load_punc_dawg 0
load_system_dawg 0
load_number_dawg 0
ambigs_debug_level 3
load_fixed_length_dawgs 0

View File

@ -1 +0,0 @@
tessedit_zero_rejection T

View File

@ -1,5 +0,0 @@
load_bigram_dawg True
tessedit_enable_bigram_correction True
tessedit_bigram_debug 3
save_raw_choices True
save_alt_choices True

View File

@ -1,12 +0,0 @@
disable_character_fragments T
file_type .bl
textord_fast_pitch_test T
tessedit_zero_rejection T
tessedit_minimal_rejection F
tessedit_write_rep_codes F
edges_children_fix F
edges_childarea 0.65
edges_boxarea 0.9
tessedit_resegment_from_boxes T
tessedit_train_from_boxes T
textord_no_rejects T

View File

@ -1,13 +0,0 @@
file_type .bl
#tessedit_use_nn F
textord_fast_pitch_test T
tessedit_zero_rejection T
tessedit_minimal_rejection F
tessedit_write_rep_codes F
edges_children_fix F
edges_childarea 0.65
edges_boxarea 0.9
tessedit_resegment_from_boxes T
tessedit_train_from_boxes T
#textord_repeat_extraction F
textord_no_rejects T

View File

@ -1 +0,0 @@
tessedit_char_whitelist 0123456789-.

View File

@ -1 +0,0 @@
tessedit_write_images T

View File

@ -1,2 +0,0 @@
tessedit_create_hocr 1
hocr_font_info 0

View File

@ -1,2 +0,0 @@
interactive_display_mode T
tessedit_display_outwords T

View File

@ -1,4 +0,0 @@
textord_skewsmooth_offset 8
textord_skewsmooth_offset2 8
textord_merge_desc 0.5
textord_no_rejects 1

View File

@ -1,2 +0,0 @@
tessedit_resegment_from_line_boxes 1
tessedit_make_boxes_from_boxes 1

View File

@ -1 +0,0 @@
debug_file tesseract.log

View File

@ -1,11 +0,0 @@
file_type .bl
textord_fast_pitch_test T
tessedit_zero_rejection T
tessedit_minimal_rejection F
tessedit_write_rep_codes F
edges_children_fix F
edges_childarea 0.65
edges_boxarea 0.9
tessedit_train_line_recognizer T
textord_no_rejects T
tessedit_init_config_only T

View File

@ -1 +0,0 @@
tessedit_create_lstmbox 1

View File

@ -1,4 +0,0 @@
stopper_debug_level 1
classify_debug_level 1
segsearch_debug_level 1
language_model_debug_level 3

View File

@ -1 +0,0 @@
tessedit_create_boxfile 1

View File

@ -1 +0,0 @@
tessedit_create_pdf 1

View File

@ -1 +0,0 @@
debug_file /dev/null

View File

@ -1,2 +0,0 @@
tessedit_resegment_from_boxes 1
tessedit_make_boxes_from_boxes 1

View File

@ -1,12 +0,0 @@
textord_show_blobs 0
textord_debug_tabfind 3
textord_tabfind_show_partitions 1
textord_tabfind_show_initial_partitions 1
textord_tabfind_show_columns 1
textord_tabfind_show_blocks 1
textord_tabfind_show_initialtabs 1
textord_tabfind_show_finaltabs 1
textord_tabfind_show_strokewidths 1
textord_tabfind_show_vlines 0
textord_tabfind_show_images 1
tessedit_dump_pageseg_images 0

View File

@ -1 +0,0 @@
tessedit_create_tsv 1

View File

@ -1,3 +0,0 @@
# This config file should be used with other config files which create renderers.
# usage example: tesseract eurotext.tif eurotext txt hocr pdf
tessedit_create_txt 1

View File

@ -1,2 +0,0 @@
tessedit_write_unlv 1
unlv_tilde_crunching T

View File

@ -1 +0,0 @@
tessedit_create_wordstrbox 1

View File

@ -1,2 +0,0 @@
1-\d\d\d-GOOG-411
www.\n\\\*.com

View File

@ -1,5 +0,0 @@
the
quick
brown
fox
jumped

Binary file not shown.

View File

@ -1 +0,0 @@
# No content needed as all defaults are correct.

View File

@ -1,2 +0,0 @@
chop_enable 0
wordrec_enable_assoc 0

View File

@ -1,7 +0,0 @@
#################################################
# Adaptive Matcher Using PreAdapted Templates
#################################################
classify_enable_adaptive_debugger 1
matcher_debug_flags 6
matcher_debug_level 1

View File

@ -1,12 +0,0 @@
#################################################
# Adaptive Matcher Using PreAdapted Templates
#################################################
classify_enable_adaptive_debugger 1
matcher_debug_flags 6
matcher_debug_level 1
wordrec_display_splits 0
wordrec_display_all_blobs 1
wordrec_display_segmentations 2
classify_debug_level 1

View File

@ -1,9 +0,0 @@
#################################################
# Adaptive Matcher Using PreAdapted Templates
#################################################
wordrec_display_splits 0
wordrec_display_all_blobs 1
wordrec_display_segmentations 2
classify_debug_level 1
stopper_debug_level 1

View File

@ -0,0 +1,40 @@
"""new table for bought entries with unknown items
Revision ID: 24b8e319c0d0
Revises: 015f4256bb4c
Create Date: 2024-06-02 13:14:38.681605
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '24b8e319c0d0'
down_revision = '015f4256bb4c'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('payment', schema=None) as batch_op:
batch_op.alter_column('token',
existing_type=sa.VARCHAR(length=15),
nullable=True)
batch_op.drop_constraint('payment_token_fkey', type_='foreignkey')
batch_op.create_foreign_key(None, 'login_token', ['token'], ['token'])
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('payment', schema=None) as batch_op:
batch_op.drop_constraint(None, type_='foreignkey')
batch_op.create_foreign_key('payment_token_fkey', 'login_token', ['token'], ['token'], onupdate='CASCADE', ondelete='CASCADE')
batch_op.alter_column('token',
existing_type=sa.VARCHAR(length=15),
nullable=False)
# ### end Alembic commands ###

View File

@ -1,7 +1,7 @@
"""new table for bought entries with unknown items 2 """new table for bought entries with unknown items 2
Revision ID: 2a64d3b9235a Revision ID: 2a64d3b9235a
Revises: 015f4256bb4c Revises: 24b8e319c0d0
Create Date: 2024-06-02 13:19:59.901053 Create Date: 2024-06-02 13:19:59.901053
""" """
@ -11,7 +11,7 @@ import sqlalchemy as sa
# revision identifiers, used by Alembic. # revision identifiers, used by Alembic.
revision = '2a64d3b9235a' revision = '2a64d3b9235a'
down_revision = '015f4256bb4c' down_revision = '24b8e319c0d0'
branch_labels = None branch_labels = None
depends_on = None depends_on = None

View File

@ -1,38 +0,0 @@
"""raise password char length
Revision ID: 782a2409df41
Revises: 926395732c3e
Create Date: 2025-06-03 21:01:23.169897
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '782a2409df41'
down_revision = '926395732c3e'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('user', schema=None) as batch_op:
batch_op.alter_column('password_hash',
existing_type=sa.VARCHAR(length=128),
type_=sa.String(length=255),
existing_nullable=False)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('user', schema=None) as batch_op:
batch_op.alter_column('password_hash',
existing_type=sa.String(length=255),
type_=sa.VARCHAR(length=128),
existing_nullable=False)
# ### end Alembic commands ###

View File

@ -1,38 +0,0 @@
"""raise bonid digits
Revision ID: 926395732c3e
Revises: 2a64d3b9235a
Create Date: 2024-08-24 10:33:39.109944
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '926395732c3e'
down_revision = '2a64d3b9235a'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('receipt', schema=None) as batch_op:
batch_op.alter_column('bonid',
existing_type=sa.NUMERIC(precision=24, scale=0),
type_=sa.Numeric(precision=28, scale=0),
existing_nullable=True)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('receipt', schema=None) as batch_op:
batch_op.alter_column('bonid',
existing_type=sa.Numeric(precision=28, scale=0),
type_=sa.NUMERIC(precision=24, scale=0),
existing_nullable=True)
# ### end Alembic commands ###

View File

@ -4,7 +4,7 @@ from src import db
class Payment(db.Model): class Payment(db.Model):
id = db.Column(db.BigInteger, primary_key=True, autoincrement=True) id = db.Column(db.BigInteger, primary_key=True, autoincrement=True)
token = db.Column(db.ForeignKey('login_token.token'), token = db.Column(db.ForeignKey('login_token.token'),
server_onupdate=db.FetchedValue(), nullable=False) server_onupdate=db.FetchedValue())
date = db.Column(db.Date, nullable=False, server_default=db.func.now()) date = db.Column(db.Date, nullable=False, server_default=db.func.now())
amount = db.Column(db.BigInteger, nullable=False, server_default=str(0)) amount = db.Column(db.BigInteger, nullable=False, server_default=str(0))

View File

@ -4,7 +4,7 @@ from src import db
class Receipt(db.Model): class Receipt(db.Model):
id = db.Column(db.BigInteger, primary_key=True, autoincrement=True) id = db.Column(db.BigInteger, primary_key=True, autoincrement=True)
date = db.Column(db.Date, nullable=False) date = db.Column(db.Date, nullable=False)
bonid = db.Column(db.Numeric(precision=28, scale=0), unique=True) bonid = db.Column(db.Numeric(precision=24, scale=0), unique=True)
from_user = db.Column(db.ForeignKey("login_token.token"), from_user = db.Column(db.ForeignKey("login_token.token"),
server_onupdate=db.FetchedValue()) server_onupdate=db.FetchedValue())
registered = db.Column(db.Boolean, nullable=False, registered = db.Column(db.Boolean, nullable=False,

View File

@ -9,7 +9,7 @@ from werkzeug.security import generate_password_hash, check_password_hash
class User(UserMixin, db.Model): class User(UserMixin, db.Model):
id = db.Column(db.BigInteger, primary_key=True, autoincrement=True) id = db.Column(db.BigInteger, primary_key=True, autoincrement=True)
email = db.Column(db.String(255), nullable=False, unique=True) email = db.Column(db.String(255), nullable=False, unique=True)
password_hash = db.Column(db.String(255), nullable=False) password_hash = db.Column(db.String(128), nullable=False)
LoginToken = db.relationship("LoginToken", backref='User', lazy='dynamic') LoginToken = db.relationship("LoginToken", backref='User', lazy='dynamic')
Bought = db.relationship("Bought", secondary="login_token", Bought = db.relationship("Bought", secondary="login_token",

View File

@ -38,8 +38,6 @@ migrate = Migrate(transaction_per_migration=True)
def create_app(config_class=Config): def create_app(config_class=Config):
if not exists(config_class.RECEIPT_FOLDER):
makedirs(config_class.RECEIPT_FOLDER)
app = Flask(__name__, template_folder="../web/templates", static_folder="../web/static") app = Flask(__name__, template_folder="../web/templates", static_folder="../web/static")
app.config.from_object(config_class) app.config.from_object(config_class)
bootstrap.init_app(app) bootstrap.init_app(app)

View File

@ -1,12 +1,12 @@
from models import LoginToken from models import LoginToken
from flask_wtf import FlaskForm from flask_wtf import FlaskForm
from wtforms import DateField, DecimalField, SelectField, SubmitField from wtforms import DateField, FloatField, IntegerField, SelectField, SelectMultipleField, StringField, SubmitField
from wtforms.validators import DataRequired from wtforms.validators import DataRequired, Optional
class NewPaymentForm(FlaskForm): class NewPaymentForm(FlaskForm):
token = SelectField("User", validators=[DataRequired()], render_kw={"class": "form-control"}) token = SelectField("User", validators=[DataRequired()], render_kw={"class": "form-control"})
date = DateField("Date", validators=[DataRequired()], render_kw={"class": "form-control"}) date = DateField("Date", validators=[DataRequired()], render_kw={"class": "form-control"})
amount = DecimalField("Amount (€)", validators=[DataRequired()], render_kw={"class": "form-control"}) amount = IntegerField("Amount (in ct)", validators=[DataRequired()], render_kw={"class": "form-control"})
submit = SubmitField("Submit", render_kw={"class": "btn btn-primary mt-3"}) submit = SubmitField("Submit", render_kw={"class": "btn btn-primary mt-3"})
@classmethod @classmethod

View File

@ -17,7 +17,7 @@ def insert_payment(establishment_id: int):
if form.validate_on_submit(): if form.validate_on_submit():
new_payment = Payment(token = form.token.data, new_payment = Payment(token = form.token.data,
date = form.date.data, date = form.date.data,
amount = int(form.amount.data*100)) amount = form.amount.data)
db.session.add(new_payment) db.session.add(new_payment)
db.session.commit() db.session.commit()
return redirect(url_for("main.index")) return redirect(url_for("main.index"))

View File

@ -10,4 +10,5 @@ def show_item(item: int):
itemschema = ItemSchema().dump(itemobj) itemschema = ItemSchema().dump(itemobj)
itemschema['PriceChange'].sort(key=lambda d: d['date'], reverse=True) itemschema['PriceChange'].sort(key=lambda d: d['date'], reverse=True)
itemschema['AmountChange'].sort(key=lambda d: d['date'], reverse=True) itemschema['AmountChange'].sort(key=lambda d: d['date'], reverse=True)
print(itemschema)
return render_template('item/details/show_item.html', item = itemschema) return render_template('item/details/show_item.html', item = itemschema)

View File

@ -73,4 +73,6 @@ class CheckItemsForm(FlaskForm):
item['itemname'], item['price'], item['amount'] if 'amount' in item else 1, 0)) item['itemname'], item['price'], item['amount'] if 'amount' in item else 1, 0))
check_items = CheckItems(check_items_entry) check_items = CheckItems(check_items_entry)
form = cls(obj=check_items) form = cls(obj=check_items)
print(f"{form.items.entries}")
return form return form

View File

@ -1,5 +1,5 @@
from datetime import date from datetime import date
from flask import abort, current_app, request, url_for from flask import abort, request, url_for
from flask_login import current_user, login_required from flask_login import current_user, login_required
from . import bp from . import bp
from .forms import CheckCustomItemsEntryForm, CheckItemsEntryForm, CheckItemsForm, get_choices from .forms import CheckCustomItemsEntryForm, CheckItemsEntryForm, CheckItemsForm, get_choices
@ -9,16 +9,16 @@ from models import AmountChange, Item, LoginToken, PriceChange, Receipt, Receipt
from src.utils.modules.receipt_parser.pdf_receipt_parser import PDFReceipt from src.utils.modules.receipt_parser.pdf_receipt_parser import PDFReceipt
from src.utils.routes_utils import render_custom_template as render_template from src.utils.routes_utils import render_custom_template as render_template
PDFDir = "./"
@bp.route('/<int:receipt_id>', methods=['GET', 'POST']) @bp.route('/<int:receipt_id>', methods=['GET', 'POST'])
@login_required @login_required
def confirm_receipt_items(receipt_id: int): def confirm_receipt_items(receipt_id: int):
"""Check items from a receipt if they should be accounted for payment. """Check items from a receipt if they should be accounted for payment.
Get those items from the receipt PDF itself.""" Get those items from the receipt PDF itself."""
PDFDir: str = current_app.config["RECEIPT_FOLDER"]
receipt_details: Receipt = Receipt.query.get(receipt_id) receipt_details: Receipt = Receipt.query.get(receipt_id)
if current_user.is_authenticated and current_user.id == receipt_details.LoginToken.Establishment.owner: if current_user.is_authenticated and current_user.id == receipt_details.LoginToken.Establishment.owner:
receipt: PDFReceipt = PDFReceipt.getPDFReceiptFromFile(PDFDir + f"/{receipt_details.id}.pdf") receipt: PDFReceipt = PDFReceipt.getPDFReceiptFromFile(PDFDir + f"{receipt_details.id}.pdf")
form: CheckItemsForm = CheckItemsForm.new(receipt.items) form: CheckItemsForm = CheckItemsForm.new(receipt.items)
_template = CheckCustomItemsEntryForm(prefix="custom_items-_-") _template = CheckCustomItemsEntryForm(prefix="custom_items-_-")
# TODO: Precheck if items are already in database. If yes, check if item is present only once or multiple # TODO: Precheck if items are already in database. If yes, check if item is present only once or multiple

View File

@ -1,5 +1,4 @@
from datetime import date from datetime import date
from decimal import Decimal
from models import AmountChange, Item, PriceChange, Receipt, ReceiptItem from models import AmountChange, Item, PriceChange, Receipt, ReceiptItem
from src import db, LOGGER from src import db, LOGGER
@ -36,7 +35,7 @@ def insert_existing_item(formitemdict: dict[str: str], receipt_date: date = None
db.session.commit() db.session.commit()
def insert_item_to_receipt(receipt: Receipt, item_dict: dict[str: str], item_index:int=0): def insert_item_to_receipt(receipt: Receipt, item_dict: dict[str: str], item_index:int=0):
receipt.ReceiptItem.append(ReceiptItem(item=item_index, name=item_dict.get('itemname'), amount=item_dict.get('amount'), price=int(item_dict.get('price')*100))) receipt.ReceiptItem.append(ReceiptItem(item=item_index, name=item_dict.get('itemname'), amount=item_dict.get('amount'), price=int(str(item_dict.get('price')).replace(',','').replace('.', ''))))
db.session.add(receipt) db.session.add(receipt)
db.session.commit() db.session.commit()

View File

@ -1,5 +1,5 @@
from flask_wtf import FlaskForm from flask_wtf import FlaskForm
from flask_wtf.file import FileAllowed, FileField from flask_wtf.file import FileAllowed, FileField, FileRequired
from wtforms import DateField, SelectField, SubmitField from wtforms import DateField, SelectField, SubmitField
from models import Establishment from models import Establishment

View File

@ -1,4 +1,4 @@
from flask import abort, current_app, redirect, request, url_for from flask import abort, redirect, request, url_for
from flask_login import current_user, login_required from flask_login import current_user, login_required
from os import rename from os import rename
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
@ -10,11 +10,11 @@ from models.login_token import LoginToken
from src.utils.modules.receipt_parser.pdf_receipt_parser import PDFReceipt from src.utils.modules.receipt_parser.pdf_receipt_parser import PDFReceipt
from src.utils.routes_utils import render_custom_template as render_template from src.utils.routes_utils import render_custom_template as render_template
PDFDir = "./"
@bp.route('/<int:establishment>', methods=['GET', 'POST']) @bp.route('/<int:establishment>', methods=['GET', 'POST'])
@login_required @login_required
def upload_receipt(establishment: int): def upload_receipt(establishment: int):
"""Upload of a receipt.""" """Upload of a receipt."""
PDFDir: str = current_app.config["RECEIPT_FOLDER"]
if current_user.is_anonymous: if current_user.is_anonymous:
abort(403) abort(403)
if LoginToken.query.filter_by(establishment=establishment, user=current_user.id).first(): if LoginToken.query.filter_by(establishment=establishment, user=current_user.id).first():
@ -39,8 +39,8 @@ def upload_receipt(establishment: int):
db.session.add(dbReceipt) db.session.add(dbReceipt)
db.session.commit() db.session.commit()
if pdfReceipt: if pdfReceipt:
rename(f"{PDFDir}/temp.pdf", f"{PDFDir}/{secure_filename(f'{dbReceipt.id}.pdf')}") rename(f"{PDFDir}/temp.pdf", f"{PDFDir}{secure_filename(f'{dbReceipt.id}.pdf')}")
LOGGER.debug(receipt.words) LOGGER.debug(receipt.text)
return redirect(url_for("receipts.check_items.confirm_receipt_items", receipt_id = dbReceipt.id)) return redirect(url_for("receipts.check_items.confirm_receipt_items", receipt_id = dbReceipt.id))
else: else:
LOGGER.debug(form.errors) LOGGER.debug(form.errors)

View File

@ -1,20 +0,0 @@
from datetime import datetime
def getDictFromWords(words: list[tuple]):
results = {"items": []}
results['date'] = datetime.strptime(words[-6][4], "%d.%m.%y").date()
results['bonid'] = words[-1][4]
currentline = 0
skipwords = 14
for i, word in enumerate(words[skipwords:]):
if currentline != word[5]:
results['items'].append({"itemname": word[4]})
currentline = word[5]
elif word[6] == 0:
results['items'][-1]["itemname"] += " " + word[4]
if word[6] == 1 and word[7] == 1:
results['items'][-1]["price"] = word[4].split("*")[0]
if "----" in word[4]:
del(results['items'][-1])
break
return results

View File

@ -1,27 +0,0 @@
from datetime import datetime
def getDictFromWords(words: list[tuple]):
results = {"items": []}
results['bonid'] = words[-1][4]
currentline = 0
skipwords = 9
for i, word in enumerate(words[skipwords:]):
if currentline != word[5]:
results['items'].append({"itemname": word[4]})
currentline = word[5]
elif word[6] == 0:
results['items'][-1]["itemname"] += " " + word[4]
if word[6] == 1 and word[7] == 0:
if word[4].lower() == "x":
results['items'][-1]["amount"] = words[i+skipwords+1][4]
else:
results['items'][-1]["price"] = word[4]
elif word[6] == 2:
results['items'][-1]["price"] = word[4]
if word[4].lower() == "gesamt":
del(results['items'][-1])
break
for i, word in enumerate(words[::-1]):
if word[4].lower() == "datum:":
results['date'] = datetime.strptime(words[::-1][i-1][4], "%d.%m.%Y").date()
return results

View File

@ -1,7 +1,5 @@
import fitz import fitz
from datetime import date from datetime import datetime, date
from .edeka.edeka_parser import getDictFromWords as edekaparser
from .kaufland.kaufland_parser import getDictFromWords as kauflandparser
from re import search from re import search
class PDFReceipt: class PDFReceipt:
@ -12,47 +10,54 @@ class PDFReceipt:
parser -- A keyword in lowercase to tell how the receipt is formated. parser -- A keyword in lowercase to tell how the receipt is formated.
Currently supported: 'edeka' Currently supported: 'edeka'
""" """
def __init__(self, strPDFFile) -> None: def __init__(self, bPDFFile, parser: str = "edeka") -> None:
try: try:
self.words = PDFReceipt._getWordsFromPDF(strPDFFile) self.text = PDFReceipt._getTextFromPDF(bPDFFile)
storename = PDFReceipt._getStoreName(self.words) self.id, self.date, self.items = PDFReceipt._getInfosFromText(self.text, parser)
self.id, self.date, self.items = PDFReceipt._getInfosFromText(self.words, store = storename)
except: except:
self.words = "PDF konnte nicht geladen werden." self.text = "PDF konnte nicht geladen werden."
self.date = date.today() self.date = date.today()
self.id = None self.id = None
self.items = [] self.items = []
def _getWordsFromPDF(file): def _getTextFromPDF(file):
with fitz.open(file, filetype="pdf") as doc: with fitz.open(file, filetype="pdf") as doc:
words = [] text = ""
for page in doc: for page in doc:
words.extend(page.get_text("words", textpage=page.get_textpage_ocr(language = 'deu'), sort=True)) text += page.get_text()
return words return text.strip()
def _getStoreName(words: list[tuple]) -> str: def _getItemsTextFromText(text, start="", end=""):
for word in words: return text[text.index(start)+len(start):text.index(end)].strip()
if word[4].lower() in ("edeka", "kaufland"):
return word[4].lower()
return "unknown"
def _getInfosFromText(words: str, store: str = "edeka"): def _convertItemsTextToDict(text):
if store == "edeka": temp = text.split("\n")
result = edekaparser(words) resultsArr = []
elif store == "kaufland": i = 0
result = kauflandparser(words) while i < len(temp):
items = result.get("items") if search("(\d+) x", temp[i]):
date = result.get("date") resultsArr.append({"itemname": temp[i+2], "price": temp[i+1], "amount": temp[i][:-2]})
strReceiptNumber = result.get("bonid") i += 4
try: else:
intReceiptNumber = int(strReceiptNumber) resultsArr.append({"itemname": temp[i], "price": temp[i+1][:-2]})
except: i += 2
raise ValueError("Receipt Number not an integer.") return resultsArr
def _getInfosFromText(text: str, parser: str = "edeka"):
if parser.lower() == "edeka":
items = PDFReceipt._convertItemsTextToDict(PDFReceipt._getItemsTextFromText(text, start="EUR", end="----------"))
strDate = text.split("\n")[-1].split(" ")[0]
date = datetime.strptime(strDate, "%d.%m.%y").date()
strReceiptNumber = text.split("\n")[-1].split(" ")[-1]
try:
intReceiptNumber = int(strReceiptNumber)
except:
raise ValueError("Receipt Number not an integer.")
return (intReceiptNumber, date, items) return (intReceiptNumber, date, items)
def getPDFReceiptFromFile(strPDFFile: str): def getPDFReceiptFromFile(strPDFFile: str, parser: str = "edeka"):
try: try:
with open(strPDFFile) as doc: with open(strPDFFile) as doc:
return PDFReceipt(doc) return PDFReceipt(doc, parser)
except FileNotFoundError as e: except FileNotFoundError as e:
return PDFReceipt(None) return PDFReceipt(None)