fix: [ocr] fix ocr supported languages

This commit is contained in:
terrtia 2024-04-26 10:49:24 +02:00
parent 1d1671c00f
commit 5503d8134a
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0

View file

@ -24,7 +24,7 @@ from lib.objects import Ocrs
# Default to eng # Default to eng
def get_model_languages(obj, ocr_languages, add_en=True): def get_model_languages(obj, add_en=True):
if add_en: if add_en:
model_languages = {'en'} model_languages = {'en'}
else: else:
@ -54,8 +54,6 @@ def get_model_languages(obj, ocr_languages, add_en=True):
model_languages.add(lang) model_languages.add(lang)
return model_languages return model_languages
model_languages = Ocrs.sanityze_ocr_languages(model_languages, ocr_languages=ocr_languages)
return model_languages return model_languages
# TODO thread # TODO thread
@ -100,7 +98,8 @@ class OcrExtractor(AbstractModule):
if not ocr.exists(): if not ocr.exists():
path = image.get_filepath() path = image.get_filepath()
languages = get_model_languages(image, self.ocr_languages) languages = get_model_languages(image)
languages = Ocrs.sanityze_ocr_languages(languages, ocr_languages=self.ocr_languages)
print(image.id, languages) print(image.id, languages)
texts = Ocrs.extract_text(path, languages) texts = Ocrs.extract_text(path, languages)
if texts: if texts: