Compare commits

..

10 Commits

Author SHA1 Message Date
f24880c09a fix: raise user password char length (#18)
All checks were successful
Create Release / release (push) Successful in 28s
Create Release / docker (push) Successful in 8m55s
Co-authored-by: Lunaresk <lingsonb@gmail.com>
Reviewed-on: #18
2025-06-03 21:39:00 +02:00
3622cbca05 fix: workflow login
All checks were successful
Create Release / release (push) Successful in 21s
Create Release / docker (push) Successful in 8m43s
2025-04-09 01:29:54 +02:00
637a5f7ad5 fix: stylesheet
Some checks failed
Create Release / release (push) Successful in 2m1s
Create Release / docker (push) Failing after 8m22s
2025-04-09 00:50:15 +02:00
15ba3c060a Workflow Action for Docker Repo (#2)
Reviewed-on: #2
2024-11-03 17:57:37 +00:00
bde9a50767 fix: update dockerfile 2024-11-03 15:47:29 +01:00
60e221146a Actions Workflow Fix 2024-11-03 10:51:25 +01:00
d34f2adb2f fix: correct OCR language 2024-11-02 23:46:02 +01:00
a6f229e7fb minor: set folder for receipts 2024-08-26 19:43:47 +02:00
5a453a140e major: integrate ocr to pdf reader 2024-08-25 22:19:53 +02:00
0f6f76a66b fix: money can now be entered in decimal 2024-08-18 18:39:56 +02:00
62 changed files with 347 additions and 67 deletions

View File

@ -4,19 +4,47 @@ on:
tags:
- '*'
jobs:
build:
release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: Archive Server
uses: thedoctor0/zip-release@master
- uses: https://gitea.com/actions/checkout@master
- name: Zip Artifacts
uses: https://github.com/thedoctor0/zip-release@master
with:
type: 'zip'
filename: 'server.zip'
exclusions: '*.git*'
- name: Release Archive
uses: ncipollo/release-action@v1
uses: https://gitea.com/actions/gitea-release-action@v1
with:
allowUpdates: true
artifacts: "server.zip"
token: ${{ secrets.GITHUB_TOKEN }}
server_url: https://gitea.wpgcommunity.net
files: 'server.zip'
docker:
runs-on: ubuntu-latest
steps:
- uses: https://gitea.com/actions/checkout@master
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
${{ vars.DOCKER_REPO }}/costhive
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
config-inline: |
[registry."${{ vars.DOCKER_REPO }}"]
http = true
insecure = true
- name: Login to Gitea Container Registry
uses: docker/login-action@v3
with:
registry: ${{ vars.DOCKER_REPO }}
username: ${{ vars.DOCKER_USER }}
password: ${{ secrets.DOCKER_PASS }}
- name: Build and push
uses: https://github.com/docker/build-push-action@v6
with:
push: true
context: .
tags: ${{ steps.meta.outputs.tags }}

1
.gitignore vendored
View File

@ -355,3 +355,4 @@ tests*
*.backup
Dockerfile.*
docker-compose.*
*debug.py

View File

@ -1,5 +1,5 @@
FROM python@sha256:21c9f0b22213295a13bd678c5b45aa587ff6cb01cd99b6cf0e6928f4c777006b
# python:3.11.4-slim-bullseye (arm/v7)
FROM python@sha256:c66cf219ac0083a9af2ff90e16530f16cd503c59eb7909feb3b8f3524dc1a87e
# python:3.12.2-slim-bullseye (amd64)
RUN useradd costhive
WORKDIR /home/costhive
@ -16,7 +16,7 @@ RUN python -m venv venv; \
COPY backend backend
ENV FLASK_APP run.py
ENV FLASK_APP=run.py
RUN chmod +x boot.sh; \
chown -R costhive:costhive .

View File

@ -4,6 +4,7 @@ from dotenv import load_dotenv
basedir = os.path.abspath(os.path.dirname(__file__))
load_dotenv(os.path.join(basedir, '.env'))
os.environ["TESSDATA_PREFIX"] = os.path.join(basedir, 'tessdata')
class Config(object):
SECRET_KEY = os.environ.get('SECRET_KEY') or "s0m37h!n6-obfu5c471ng"
SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL', '').replace(
@ -19,3 +20,4 @@ class Config(object):
MAIL_PASSWORD = os.environ.get('MAIL_PASSWORD')
ADMINS = ['postmaster@wpgcommunity.net']
POSTS_PER_PAGE = 15
RECEIPT_FOLDER = f"{basedir}/../PDFReceipts"

Binary file not shown.

View File

@ -0,0 +1 @@
tessedit_create_alto 1

View File

@ -0,0 +1,7 @@
tessedit_ambigs_training 1
load_freq_dawg 0
load_punc_dawg 0
load_system_dawg 0
load_number_dawg 0
ambigs_debug_level 3
load_fixed_length_dawgs 0

View File

@ -0,0 +1 @@
tessedit_zero_rejection T

View File

@ -0,0 +1,5 @@
load_bigram_dawg True
tessedit_enable_bigram_correction True
tessedit_bigram_debug 3
save_raw_choices True
save_alt_choices True

View File

@ -0,0 +1,12 @@
disable_character_fragments T
file_type .bl
textord_fast_pitch_test T
tessedit_zero_rejection T
tessedit_minimal_rejection F
tessedit_write_rep_codes F
edges_children_fix F
edges_childarea 0.65
edges_boxarea 0.9
tessedit_resegment_from_boxes T
tessedit_train_from_boxes T
textord_no_rejects T

View File

@ -0,0 +1,13 @@
file_type .bl
#tessedit_use_nn F
textord_fast_pitch_test T
tessedit_zero_rejection T
tessedit_minimal_rejection F
tessedit_write_rep_codes F
edges_children_fix F
edges_childarea 0.65
edges_boxarea 0.9
tessedit_resegment_from_boxes T
tessedit_train_from_boxes T
#textord_repeat_extraction F
textord_no_rejects T

View File

@ -0,0 +1 @@
tessedit_char_whitelist 0123456789-.

View File

@ -0,0 +1 @@
tessedit_write_images T

View File

@ -0,0 +1,2 @@
tessedit_create_hocr 1
hocr_font_info 0

View File

@ -0,0 +1,2 @@
interactive_display_mode T
tessedit_display_outwords T

View File

@ -0,0 +1,4 @@
textord_skewsmooth_offset 8
textord_skewsmooth_offset2 8
textord_merge_desc 0.5
textord_no_rejects 1

View File

@ -0,0 +1,2 @@
tessedit_resegment_from_line_boxes 1
tessedit_make_boxes_from_boxes 1

View File

@ -0,0 +1 @@
debug_file tesseract.log

View File

@ -0,0 +1,11 @@
file_type .bl
textord_fast_pitch_test T
tessedit_zero_rejection T
tessedit_minimal_rejection F
tessedit_write_rep_codes F
edges_children_fix F
edges_childarea 0.65
edges_boxarea 0.9
tessedit_train_line_recognizer T
textord_no_rejects T
tessedit_init_config_only T

View File

@ -0,0 +1 @@
tessedit_create_lstmbox 1

View File

@ -0,0 +1,4 @@
stopper_debug_level 1
classify_debug_level 1
segsearch_debug_level 1
language_model_debug_level 3

View File

@ -0,0 +1 @@
tessedit_create_boxfile 1

View File

@ -0,0 +1 @@
tessedit_create_pdf 1

View File

@ -0,0 +1 @@
debug_file /dev/null

View File

@ -0,0 +1,2 @@
tessedit_resegment_from_boxes 1
tessedit_make_boxes_from_boxes 1

View File

@ -0,0 +1,12 @@
textord_show_blobs 0
textord_debug_tabfind 3
textord_tabfind_show_partitions 1
textord_tabfind_show_initial_partitions 1
textord_tabfind_show_columns 1
textord_tabfind_show_blocks 1
textord_tabfind_show_initialtabs 1
textord_tabfind_show_finaltabs 1
textord_tabfind_show_strokewidths 1
textord_tabfind_show_vlines 0
textord_tabfind_show_images 1
tessedit_dump_pageseg_images 0

View File

@ -0,0 +1 @@
tessedit_create_tsv 1

View File

@ -0,0 +1,3 @@
# This config file should be used with other config files which create renderers.
# usage example: tesseract eurotext.tif eurotext txt hocr pdf
tessedit_create_txt 1

View File

@ -0,0 +1,2 @@
tessedit_write_unlv 1
unlv_tilde_crunching T

View File

@ -0,0 +1 @@
tessedit_create_wordstrbox 1

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,2 @@
1-\d\d\d-GOOG-411
www.\n\\\*.com

View File

@ -0,0 +1,5 @@
the
quick
brown
fox
jumped

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
# No content needed as all defaults are correct.

View File

@ -0,0 +1,2 @@
chop_enable 0
wordrec_enable_assoc 0

View File

@ -0,0 +1,7 @@
#################################################
# Adaptive Matcher Using PreAdapted Templates
#################################################
classify_enable_adaptive_debugger 1
matcher_debug_flags 6
matcher_debug_level 1

View File

@ -0,0 +1,12 @@
#################################################
# Adaptive Matcher Using PreAdapted Templates
#################################################
classify_enable_adaptive_debugger 1
matcher_debug_flags 6
matcher_debug_level 1
wordrec_display_splits 0
wordrec_display_all_blobs 1
wordrec_display_segmentations 2
classify_debug_level 1

View File

@ -0,0 +1 @@

View File

@ -0,0 +1,9 @@
#################################################
# Adaptive Matcher Using PreAdapted Templates
#################################################
wordrec_display_splits 0
wordrec_display_all_blobs 1
wordrec_display_segmentations 2
classify_debug_level 1
stopper_debug_level 1

View File

@ -0,0 +1,38 @@
"""raise password char length
Revision ID: 782a2409df41
Revises: 926395732c3e
Create Date: 2025-06-03 21:01:23.169897
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '782a2409df41'
down_revision = '926395732c3e'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('user', schema=None) as batch_op:
batch_op.alter_column('password_hash',
existing_type=sa.VARCHAR(length=128),
type_=sa.String(length=255),
existing_nullable=False)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('user', schema=None) as batch_op:
batch_op.alter_column('password_hash',
existing_type=sa.String(length=255),
type_=sa.VARCHAR(length=128),
existing_nullable=False)
# ### end Alembic commands ###

View File

@ -0,0 +1,38 @@
"""raise bonid digits
Revision ID: 926395732c3e
Revises: 2a64d3b9235a
Create Date: 2024-08-24 10:33:39.109944
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '926395732c3e'
down_revision = '2a64d3b9235a'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('receipt', schema=None) as batch_op:
batch_op.alter_column('bonid',
existing_type=sa.NUMERIC(precision=24, scale=0),
type_=sa.Numeric(precision=28, scale=0),
existing_nullable=True)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('receipt', schema=None) as batch_op:
batch_op.alter_column('bonid',
existing_type=sa.Numeric(precision=28, scale=0),
type_=sa.NUMERIC(precision=24, scale=0),
existing_nullable=True)
# ### end Alembic commands ###

View File

@ -4,7 +4,7 @@ from src import db
class Receipt(db.Model):
id = db.Column(db.BigInteger, primary_key=True, autoincrement=True)
date = db.Column(db.Date, nullable=False)
bonid = db.Column(db.Numeric(precision=24, scale=0), unique=True)
bonid = db.Column(db.Numeric(precision=28, scale=0), unique=True)
from_user = db.Column(db.ForeignKey("login_token.token"),
server_onupdate=db.FetchedValue())
registered = db.Column(db.Boolean, nullable=False,

View File

@ -9,7 +9,7 @@ from werkzeug.security import generate_password_hash, check_password_hash
class User(UserMixin, db.Model):
id = db.Column(db.BigInteger, primary_key=True, autoincrement=True)
email = db.Column(db.String(255), nullable=False, unique=True)
password_hash = db.Column(db.String(128), nullable=False)
password_hash = db.Column(db.String(255), nullable=False)
LoginToken = db.relationship("LoginToken", backref='User', lazy='dynamic')
Bought = db.relationship("Bought", secondary="login_token",

View File

@ -38,6 +38,8 @@ migrate = Migrate(transaction_per_migration=True)
def create_app(config_class=Config):
if not exists(config_class.RECEIPT_FOLDER):
makedirs(config_class.RECEIPT_FOLDER)
app = Flask(__name__, template_folder="../web/templates", static_folder="../web/static")
app.config.from_object(config_class)
bootstrap.init_app(app)

View File

@ -1,12 +1,12 @@
from models import LoginToken
from flask_wtf import FlaskForm
from wtforms import DateField, FloatField, IntegerField, SelectField, SelectMultipleField, StringField, SubmitField
from wtforms.validators import DataRequired, Optional
from wtforms import DateField, DecimalField, SelectField, SubmitField
from wtforms.validators import DataRequired
class NewPaymentForm(FlaskForm):
token = SelectField("User", validators=[DataRequired()], render_kw={"class": "form-control"})
date = DateField("Date", validators=[DataRequired()], render_kw={"class": "form-control"})
amount = IntegerField("Amount (in ct)", validators=[DataRequired()], render_kw={"class": "form-control"})
amount = DecimalField("Amount (€)", validators=[DataRequired()], render_kw={"class": "form-control"})
submit = SubmitField("Submit", render_kw={"class": "btn btn-primary mt-3"})
@classmethod

View File

@ -17,7 +17,7 @@ def insert_payment(establishment_id: int):
if form.validate_on_submit():
new_payment = Payment(token = form.token.data,
date = form.date.data,
amount = form.amount.data)
amount = int(form.amount.data*100))
db.session.add(new_payment)
db.session.commit()
return redirect(url_for("main.index"))

View File

@ -10,5 +10,4 @@ def show_item(item: int):
itemschema = ItemSchema().dump(itemobj)
itemschema['PriceChange'].sort(key=lambda d: d['date'], reverse=True)
itemschema['AmountChange'].sort(key=lambda d: d['date'], reverse=True)
print(itemschema)
return render_template('item/details/show_item.html', item = itemschema)

View File

@ -73,6 +73,4 @@ class CheckItemsForm(FlaskForm):
item['itemname'], item['price'], item['amount'] if 'amount' in item else 1, 0))
check_items = CheckItems(check_items_entry)
form = cls(obj=check_items)
print(f"{form.items.entries}")
return form

View File

@ -1,5 +1,5 @@
from datetime import date
from flask import abort, request, url_for
from flask import abort, current_app, request, url_for
from flask_login import current_user, login_required
from . import bp
from .forms import CheckCustomItemsEntryForm, CheckItemsEntryForm, CheckItemsForm, get_choices
@ -9,16 +9,16 @@ from models import AmountChange, Item, LoginToken, PriceChange, Receipt, Receipt
from src.utils.modules.receipt_parser.pdf_receipt_parser import PDFReceipt
from src.utils.routes_utils import render_custom_template as render_template
PDFDir = "./"
@bp.route('/<int:receipt_id>', methods=['GET', 'POST'])
@login_required
def confirm_receipt_items(receipt_id: int):
"""Check items from a receipt if they should be accounted for payment.
Get those items from the receipt PDF itself."""
PDFDir: str = current_app.config["RECEIPT_FOLDER"]
receipt_details: Receipt = Receipt.query.get(receipt_id)
if current_user.is_authenticated and current_user.id == receipt_details.LoginToken.Establishment.owner:
receipt: PDFReceipt = PDFReceipt.getPDFReceiptFromFile(PDFDir + f"{receipt_details.id}.pdf")
receipt: PDFReceipt = PDFReceipt.getPDFReceiptFromFile(PDFDir + f"/{receipt_details.id}.pdf")
form: CheckItemsForm = CheckItemsForm.new(receipt.items)
_template = CheckCustomItemsEntryForm(prefix="custom_items-_-")
# TODO: Precheck if items are already in database. If yes, check if item is present only once or multiple

View File

@ -1,4 +1,5 @@
from datetime import date
from decimal import Decimal
from models import AmountChange, Item, PriceChange, Receipt, ReceiptItem
from src import db, LOGGER
@ -35,7 +36,7 @@ def insert_existing_item(formitemdict: dict[str: str], receipt_date: date = None
db.session.commit()
def insert_item_to_receipt(receipt: Receipt, item_dict: dict[str: str], item_index:int=0):
receipt.ReceiptItem.append(ReceiptItem(item=item_index, name=item_dict.get('itemname'), amount=item_dict.get('amount'), price=int(str(item_dict.get('price')).replace(',','').replace('.', ''))))
receipt.ReceiptItem.append(ReceiptItem(item=item_index, name=item_dict.get('itemname'), amount=item_dict.get('amount'), price=int(item_dict.get('price')*100)))
db.session.add(receipt)
db.session.commit()

View File

@ -1,5 +1,5 @@
from flask_wtf import FlaskForm
from flask_wtf.file import FileAllowed, FileField, FileRequired
from flask_wtf.file import FileAllowed, FileField
from wtforms import DateField, SelectField, SubmitField
from models import Establishment

View File

@ -1,4 +1,4 @@
from flask import abort, redirect, request, url_for
from flask import abort, current_app, redirect, request, url_for
from flask_login import current_user, login_required
from os import rename
from werkzeug.utils import secure_filename
@ -10,11 +10,11 @@ from models.login_token import LoginToken
from src.utils.modules.receipt_parser.pdf_receipt_parser import PDFReceipt
from src.utils.routes_utils import render_custom_template as render_template
PDFDir = "./"
@bp.route('/<int:establishment>', methods=['GET', 'POST'])
@login_required
def upload_receipt(establishment: int):
"""Upload of a receipt."""
PDFDir: str = current_app.config["RECEIPT_FOLDER"]
if current_user.is_anonymous:
abort(403)
if LoginToken.query.filter_by(establishment=establishment, user=current_user.id).first():
@ -39,8 +39,8 @@ def upload_receipt(establishment: int):
db.session.add(dbReceipt)
db.session.commit()
if pdfReceipt:
rename(f"{PDFDir}/temp.pdf", f"{PDFDir}{secure_filename(f'{dbReceipt.id}.pdf')}")
LOGGER.debug(receipt.text)
rename(f"{PDFDir}/temp.pdf", f"{PDFDir}/{secure_filename(f'{dbReceipt.id}.pdf')}")
LOGGER.debug(receipt.words)
return redirect(url_for("receipts.check_items.confirm_receipt_items", receipt_id = dbReceipt.id))
else:
LOGGER.debug(form.errors)

View File

@ -0,0 +1,20 @@
from datetime import datetime
def getDictFromWords(words: list[tuple]):
results = {"items": []}
results['date'] = datetime.strptime(words[-6][4], "%d.%m.%y").date()
results['bonid'] = words[-1][4]
currentline = 0
skipwords = 14
for i, word in enumerate(words[skipwords:]):
if currentline != word[5]:
results['items'].append({"itemname": word[4]})
currentline = word[5]
elif word[6] == 0:
results['items'][-1]["itemname"] += " " + word[4]
if word[6] == 1 and word[7] == 1:
results['items'][-1]["price"] = word[4].split("*")[0]
if "----" in word[4]:
del(results['items'][-1])
break
return results

View File

@ -0,0 +1,27 @@
from datetime import datetime
def getDictFromWords(words: list[tuple]):
results = {"items": []}
results['bonid'] = words[-1][4]
currentline = 0
skipwords = 9
for i, word in enumerate(words[skipwords:]):
if currentline != word[5]:
results['items'].append({"itemname": word[4]})
currentline = word[5]
elif word[6] == 0:
results['items'][-1]["itemname"] += " " + word[4]
if word[6] == 1 and word[7] == 0:
if word[4].lower() == "x":
results['items'][-1]["amount"] = words[i+skipwords+1][4]
else:
results['items'][-1]["price"] = word[4]
elif word[6] == 2:
results['items'][-1]["price"] = word[4]
if word[4].lower() == "gesamt":
del(results['items'][-1])
break
for i, word in enumerate(words[::-1]):
if word[4].lower() == "datum:":
results['date'] = datetime.strptime(words[::-1][i-1][4], "%d.%m.%Y").date()
return results

View File

@ -1,5 +1,7 @@
import fitz
from datetime import datetime, date
from datetime import date
from .edeka.edeka_parser import getDictFromWords as edekaparser
from .kaufland.kaufland_parser import getDictFromWords as kauflandparser
from re import search
class PDFReceipt:
@ -10,54 +12,47 @@ class PDFReceipt:
parser -- A keyword in lowercase to tell how the receipt is formated.
Currently supported: 'edeka'
"""
def __init__(self, bPDFFile, parser: str = "edeka") -> None:
def __init__(self, strPDFFile) -> None:
try:
self.text = PDFReceipt._getTextFromPDF(bPDFFile)
self.id, self.date, self.items = PDFReceipt._getInfosFromText(self.text, parser)
self.words = PDFReceipt._getWordsFromPDF(strPDFFile)
storename = PDFReceipt._getStoreName(self.words)
self.id, self.date, self.items = PDFReceipt._getInfosFromText(self.words, store = storename)
except:
self.text = "PDF konnte nicht geladen werden."
self.words = "PDF konnte nicht geladen werden."
self.date = date.today()
self.id = None
self.items = []
def _getTextFromPDF(file):
def _getWordsFromPDF(file):
with fitz.open(file, filetype="pdf") as doc:
text = ""
words = []
for page in doc:
text += page.get_text()
return text.strip()
words.extend(page.get_text("words", textpage=page.get_textpage_ocr(language = 'deu'), sort=True))
return words
def _getItemsTextFromText(text, start="", end=""):
return text[text.index(start)+len(start):text.index(end)].strip()
def _getStoreName(words: list[tuple]) -> str:
for word in words:
if word[4].lower() in ("edeka", "kaufland"):
return word[4].lower()
return "unknown"
def _convertItemsTextToDict(text):
temp = text.split("\n")
resultsArr = []
i = 0
while i < len(temp):
if search("(\d+) x", temp[i]):
resultsArr.append({"itemname": temp[i+2], "price": temp[i+1], "amount": temp[i][:-2]})
i += 4
else:
resultsArr.append({"itemname": temp[i], "price": temp[i+1][:-2]})
i += 2
return resultsArr
def _getInfosFromText(text: str, parser: str = "edeka"):
if parser.lower() == "edeka":
items = PDFReceipt._convertItemsTextToDict(PDFReceipt._getItemsTextFromText(text, start="EUR", end="----------"))
strDate = text.split("\n")[-1].split(" ")[0]
date = datetime.strptime(strDate, "%d.%m.%y").date()
strReceiptNumber = text.split("\n")[-1].split(" ")[-1]
try:
intReceiptNumber = int(strReceiptNumber)
except:
raise ValueError("Receipt Number not an integer.")
def _getInfosFromText(words: str, store: str = "edeka"):
if store == "edeka":
result = edekaparser(words)
elif store == "kaufland":
result = kauflandparser(words)
items = result.get("items")
date = result.get("date")
strReceiptNumber = result.get("bonid")
try:
intReceiptNumber = int(strReceiptNumber)
except:
raise ValueError("Receipt Number not an integer.")
return (intReceiptNumber, date, items)
def getPDFReceiptFromFile(strPDFFile: str, parser: str = "edeka"):
def getPDFReceiptFromFile(strPDFFile: str):
try:
with open(strPDFFile) as doc:
return PDFReceipt(doc, parser)
return PDFReceipt(doc)
except FileNotFoundError as e:
return PDFReceipt(None)