ail-framework/bin/modules/OcrExtractor.py

98 lines
2.4 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The OcrExtractor Module
======================
"""
##################################
# Import External packages
##################################
import os
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from lib import chats_viewer
from lib.objects import Messages
from lib.objects import Ocrs
# Default to eng
def get_model_languages(obj, add_en=True):
if add_en:
model_languages = {'en'}
else:
model_languages = set()
ob = obj.get_first_correlation('message')
if ob:
message = Messages.Message(ob.split(':', 2)[-1])
lang = message.get_language()
if lang:
model_languages.add(lang)
return model_languages
ob = obj.get_first_correlation('chat-subchannel')
if ob:
ob = chats_viewer.get_obj_chat_from_global_id(ob)
lang = ob.get_main_language()
if lang:
model_languages.add(lang)
return model_languages
ob = obj.get_first_correlation('chat')
if ob:
ob = chats_viewer.get_obj_chat_from_global_id(ob)
lang = ob.get_main_language()
if lang:
model_languages.add(lang)
return model_languages
return model_languages
# TODO thread
class OcrExtractor(AbstractModule):
"""
OcrExtractor for AIL framework
"""
def __init__(self):
super(OcrExtractor, self).__init__()
# Waiting time in seconds between to message processed
self.pending_seconds = 1
# Send module state to logs
self.logger.info(f'Module {self.module_name} initialized')
def compute(self, message):
image = self.get_obj()
path = image.get_filepath()
print(image)
languages = get_model_languages(image)
print(languages)
ocr = Ocrs.Ocr(image.id)
ocr.delete()
if not ocr.exists():
texts = Ocrs.extract_text(path, languages)
if texts:
ocr = Ocrs.create(image.id, texts)
self.add_message_to_queue(ocr)
if __name__ == '__main__':
module = OcrExtractor()
module.run()
# from lib.objects import Images
# module.obj = Images.Image('')
# module.compute('')