fix: correct OCR language

2024-11-02 23:46:02 +01:00 · 2024-11-02 23:46:02 +01:00 · d34f2adb2f
commit d34f2adb2f
parent a6f229e7fb
3 changed files with 1 additions and 1 deletions
--- a/backend/configs/tessdata/deu.traineddata
+++ b/backend/configs/tessdata/deu.traineddata
--- a/backend/configs/tessdata/eng.traineddata
+++ b/backend/configs/tessdata/eng.traineddata
--- a/backend/src/utils/modules/receipt_parser/pdf_receipt_parser.py
+++ b/backend/src/utils/modules/receipt_parser/pdf_receipt_parser.py
@ -27,7 +27,7 @@ class PDFReceipt:
        with fitz.open(file, filetype="pdf") as doc:
            words = []
            for page in doc:
-                words.extend(page.get_text("words", textpage=page.get_textpage_ocr(), sort=True))
+                words.extend(page.get_text("words", textpage=page.get_textpage_ocr(language = 'deu'), sort=True))
        return words
    def _getStoreName(words: list[tuple]) -> str: