diff --git a/backend/configs/tessdata/deu.traineddata b/backend/configs/tessdata/deu.traineddata new file mode 100644 index 0000000..af9cc91 Binary files /dev/null and b/backend/configs/tessdata/deu.traineddata differ diff --git a/backend/configs/tessdata/eng.traineddata b/backend/configs/tessdata/eng.traineddata index af9cc91..bbef467 100644 Binary files a/backend/configs/tessdata/eng.traineddata and b/backend/configs/tessdata/eng.traineddata differ diff --git a/backend/src/utils/modules/receipt_parser/pdf_receipt_parser.py b/backend/src/utils/modules/receipt_parser/pdf_receipt_parser.py index 92293d7..9e8d6e9 100644 --- a/backend/src/utils/modules/receipt_parser/pdf_receipt_parser.py +++ b/backend/src/utils/modules/receipt_parser/pdf_receipt_parser.py @@ -27,7 +27,7 @@ class PDFReceipt: with fitz.open(file, filetype="pdf") as doc: words = [] for page in doc: - words.extend(page.get_text("words", textpage=page.get_textpage_ocr(), sort=True)) + words.extend(page.get_text("words", textpage=page.get_textpage_ocr(language = 'deu'), sort=True)) return words def _getStoreName(words: list[tuple]) -> str: