fix: correct OCR language
This commit is contained in:
parent
a6f229e7fb
commit
d34f2adb2f
BIN
backend/configs/tessdata/deu.traineddata
Normal file
BIN
backend/configs/tessdata/deu.traineddata
Normal file
Binary file not shown.
Binary file not shown.
@ -27,7 +27,7 @@ class PDFReceipt:
|
|||||||
with fitz.open(file, filetype="pdf") as doc:
|
with fitz.open(file, filetype="pdf") as doc:
|
||||||
words = []
|
words = []
|
||||||
for page in doc:
|
for page in doc:
|
||||||
words.extend(page.get_text("words", textpage=page.get_textpage_ocr(), sort=True))
|
words.extend(page.get_text("words", textpage=page.get_textpage_ocr(language = 'deu'), sort=True))
|
||||||
return words
|
return words
|
||||||
|
|
||||||
def _getStoreName(words: list[tuple]) -> str:
|
def _getStoreName(words: list[tuple]) -> str:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user