mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-25 15:27:17 +00:00
merge lang
This commit is contained in:
commit
9d481bd0b0
13 changed files with 364 additions and 96 deletions
|
@ -206,8 +206,7 @@ class AbstractChatFeeder(DefaultFeeder, ABC):
|
|||
subchannel = ChatSubChannels.ChatSubChannel(f'{self.get_chat_id()}/{meta["id"]}', self.get_chat_instance_uuid())
|
||||
thread = None
|
||||
|
||||
# TODO correlation with obj = message/image
|
||||
subchannel.add(date)
|
||||
subchannel.add(date, obj)
|
||||
|
||||
if meta.get('date'): # TODO check if already exists
|
||||
subchannel.set_created_at(int(meta['date']['timestamp']))
|
||||
|
@ -358,7 +357,58 @@ class AbstractChatFeeder(DefaultFeeder, ABC):
|
|||
# CHAT
|
||||
chat_objs = self.process_chat(new_objs, obj, date, timestamp, reply_id=reply_id)
|
||||
|
||||
# Message forward
|
||||
# # TODO HANDLE OTHERS OBJECT TYPE
|
||||
# # TODO MAKE IT GENERIC FOR OTHERS CHATS !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
# # Message forward + Discussion
|
||||
# if self.get_json_meta().get('forward'):
|
||||
# discussion_id = self.get_json_meta().get('discussion')
|
||||
# forward_from = self.get_message_forward()
|
||||
#
|
||||
# if discussion_id: # TODO HANDLE FORWARDED MESSAGES FROM EXTERNAL CHANNELS
|
||||
# chat_forward_id = forward_from['from']['id']
|
||||
# message_forward_id = forward_from['from']['channel_post']
|
||||
#
|
||||
# # if chat_forward_id == discussion_id:
|
||||
# # linked_chat = Chat(chat_forward_id, self.get_chat_instance_uuid())
|
||||
# # if linked_chat.exists():
|
||||
# # # create thread
|
||||
# # # add message replies for each childrens
|
||||
#
|
||||
# # TODO HANDLE THREAD
|
||||
# # TODO Change FORWARD META FIELDS
|
||||
# # meta['forward'] = {}
|
||||
# # # CHAT ID
|
||||
# # # SUBCHANNEL ID -> can be None
|
||||
# # # Message ID
|
||||
#
|
||||
# # meta['forward']['origin']
|
||||
# # # same as 'forward'
|
||||
#
|
||||
# if self.get_json_meta().get('forward'):
|
||||
# forward = self.get_message_forward()
|
||||
# f_chat = forward['chat']
|
||||
# f_subchannel = forward.get('subchannel')
|
||||
# f_id = forward.get('id')
|
||||
# if not f_subchannel:
|
||||
# chat_forward = Chat(f_chat, self.get_chat_instance_uuid())
|
||||
# if chat_forward.exists():
|
||||
# for chat_obj in chat_objs:
|
||||
# if chat_obj.type == 'chat':
|
||||
# chat_forward.add_relationship(chat_obj.get_global_id(), 'forward')
|
||||
# # TODO LIST FORWARDED MESSAGES
|
||||
#
|
||||
#
|
||||
# # Discord -> serverID + subchannel ID + message ID
|
||||
# # Telegram -> chat ID + Message ID
|
||||
# # + ORIGIN IDs
|
||||
#
|
||||
#
|
||||
#
|
||||
# # TODO create relationships graph
|
||||
#
|
||||
#
|
||||
# # TODO REMOVE ME
|
||||
# # Message forward # TODO handle subchannel + message ID
|
||||
# if self.get_json_meta().get('forward'):
|
||||
# forward_from = self.get_message_forward()
|
||||
# print('-----------------------------------------------------------')
|
||||
|
|
|
@ -7,6 +7,7 @@ import sys
|
|||
import html2text
|
||||
|
||||
import gcld3
|
||||
from lexilang.detector import detect as lexilang_detect
|
||||
from libretranslatepy import LibreTranslateAPI
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
|
@ -264,7 +265,10 @@ def _get_html2text(content, ignore_links=False):
|
|||
h = html2text.HTML2Text()
|
||||
h.ignore_links = ignore_links
|
||||
h.ignore_images = ignore_links
|
||||
return h.handle(content)
|
||||
content = h.handle(content)
|
||||
if content == '\n\n':
|
||||
content = ''
|
||||
return content
|
||||
|
||||
def _clean_text_to_translate(content, html=False, keys_blocks=True):
|
||||
if html:
|
||||
|
@ -323,67 +327,105 @@ def get_objs_languages(obj_type, obj_subtype=''):
|
|||
def get_obj_languages(obj_type, obj_subtype, obj_id):
|
||||
return r_lang.smembers(f'obj:lang:{obj_type}:{obj_subtype}:{obj_id}')
|
||||
|
||||
def get_obj_language_stats(obj_type, obj_subtype, obj_id):
|
||||
return r_lang.zrange(f'obj:langs:stat:{obj_type}:{obj_subtype}:{obj_id}', 0, -1, withscores=True)
|
||||
|
||||
# TODO ADD language to CHAT GLOBAL SET
|
||||
def add_obj_language(language, obj_type, obj_subtype, obj_id): # (s)
|
||||
def add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=set()): # (s)
|
||||
if not obj_subtype:
|
||||
obj_subtype = ''
|
||||
obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
|
||||
|
||||
r_lang.sadd(f'objs:langs:{obj_type}', language)
|
||||
r_lang.sadd(f'objs:lang:{obj_type}:{obj_subtype}', language)
|
||||
r_lang.sadd(f'obj:lang:{obj_global_id}', language)
|
||||
new = r_lang.sadd(f'obj:lang:{obj_global_id}', language)
|
||||
|
||||
r_lang.sadd(f'languages:{language}', f'{obj_type}:{obj_subtype}') ################### REMOVE ME ???
|
||||
r_lang.sadd(f'langs:{obj_type}:{obj_subtype}:{language}', obj_global_id)
|
||||
|
||||
def remove_obj_language(language, obj_type, obj_subtype, obj_id):
|
||||
if new:
|
||||
for global_id in objs_containers:
|
||||
r_lang.zincrby(f'obj:langs:stat:{global_id}', 1, language)
|
||||
|
||||
|
||||
def remove_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=set()):
|
||||
if not obj_subtype:
|
||||
obj_subtype = ''
|
||||
obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
|
||||
r_lang.srem(f'obj:lang:{obj_global_id}', language)
|
||||
rem = r_lang.srem(f'obj:lang:{obj_global_id}', language)
|
||||
|
||||
delete_obj_translation(obj_global_id, language)
|
||||
|
||||
r_lang.srem(f'langs:{obj_type}:{obj_subtype}:{language}', obj_global_id)
|
||||
if not r_lang.exists(f'langs:{obj_type}:{obj_subtype}:{language}'):
|
||||
r_lang.srem(f'objs:lang:{obj_type}:{obj_subtype}', language)
|
||||
r_lang.srem(f'languages:{language}', f'{obj_type}:{obj_subtype}')
|
||||
if not r_lang.exists(f'objs:lang:{obj_type}:{obj_subtype}'):
|
||||
if r_lang.scard(f'objs:langs:{obj_type}', language) <= 1:
|
||||
if r_lang.scard(f'objs:langs:{obj_type}') <= 1:
|
||||
r_lang.srem(f'objs:langs:{obj_type}', language)
|
||||
|
||||
def edit_obj_language(language, obj_type, obj_subtype, obj_id):
|
||||
remove_obj_language(language, obj_type, obj_subtype, obj_id)
|
||||
add_obj_language(language, obj_type, obj_subtype, obj_id)
|
||||
if rem:
|
||||
for global_id in objs_containers:
|
||||
r = r_lang.zincrby(f'obj:langs:stat:{global_id}', -1, language)
|
||||
if r < 1:
|
||||
r_lang.zrem(f'obj:langs:stat:{global_id}', language)
|
||||
|
||||
# TODO handle fields
|
||||
def detect_obj_language(obj_type, obj_subtype, obj_id, content, objs_containers=set()):
|
||||
detector = LanguagesDetector(nb_langs=1)
|
||||
language = detector.detect(content)
|
||||
if language:
|
||||
language = language[0]
|
||||
previous_lang = get_obj_languages(obj_type, obj_subtype, obj_id)
|
||||
if previous_lang:
|
||||
previous_lang = previous_lang.pop()
|
||||
if language != previous_lang:
|
||||
remove_obj_language(previous_lang, obj_type, obj_subtype, obj_id, objs_containers=objs_containers)
|
||||
add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=objs_containers)
|
||||
else:
|
||||
add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=objs_containers)
|
||||
return language
|
||||
|
||||
## Translation
|
||||
def _get_obj_translation(obj_global_id, language, field=''):
|
||||
def r_get_obj_translation(obj_global_id, language, field=''):
|
||||
return r_lang.hget(f'tr:{obj_global_id}:{field}', language)
|
||||
|
||||
def get_obj_translation(obj_global_id, language, source=None, content=None, field=''):
|
||||
def _get_obj_translation(obj_global_id, language, source=None, content=None, field='', objs_containers=set()):
|
||||
"""
|
||||
Returns translated content
|
||||
"""
|
||||
translation = r_cache.get(f'translation:{language}:{obj_global_id}:{field}')
|
||||
r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 0)
|
||||
if translation:
|
||||
# DEBUG
|
||||
# print('cache')
|
||||
# r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 0)
|
||||
return translation
|
||||
# TODO HANDLE FIELDS TRANSLATION
|
||||
translation = _get_obj_translation(obj_global_id, language, field=field)
|
||||
translation = r_get_obj_translation(obj_global_id, language, field=field)
|
||||
if not translation:
|
||||
translation = LanguageTranslator().translate(content, source=source, target=language)
|
||||
source, translation = LanguageTranslator().translate(content, source=source, target=language)
|
||||
if source:
|
||||
obj_type, subtype, obj_id = obj_global_id.split(':', 2)
|
||||
add_obj_language(source, obj_type, subtype, obj_id, objs_containers=objs_containers)
|
||||
if translation:
|
||||
r_cache.set(f'translation:{language}:{obj_global_id}:{field}', translation)
|
||||
r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 300)
|
||||
return translation
|
||||
|
||||
def get_obj_translation(obj_global_id, language, source=None, content=None, field='', objs_containers=set()):
|
||||
return _get_obj_translation(obj_global_id, language, source=source, content=content, field=field, objs_containers=objs_containers)
|
||||
|
||||
|
||||
# TODO Force to edit ????
|
||||
|
||||
def set_obj_translation(obj_global_id, language, translation, field=''):
|
||||
r_cache.delete(f'translation:{language}:{obj_global_id}:')
|
||||
return r_lang.hset(f'tr:{obj_global_id}:{field}', language, translation)
|
||||
|
||||
def delete_obj_translation(obj_global_id, language, field=''):
|
||||
r_cache.delete(f'translation:{language}:{obj_global_id}:')
|
||||
r_lang.hdel(f'tr:{obj_global_id}:{field}', language)
|
||||
|
||||
## --LANGUAGE ENGINE-- ##
|
||||
|
||||
|
@ -410,11 +452,22 @@ class LanguagesDetector:
|
|||
if self.min_len > 0:
|
||||
if len(content) < self.min_len:
|
||||
return languages
|
||||
# p = self.detector.FindTopNMostFreqLangs(content, num_langs=3)
|
||||
# for lang in p:
|
||||
# print(lang.language, lang.probability, lang.proportion, lang.is_reliable)
|
||||
# print('------------------------------------------------')
|
||||
for lang in self.detector.FindTopNMostFreqLangs(content, num_langs=self.nb_langs):
|
||||
if lang.proportion >= self.min_proportion and lang.probability >= self.min_probability and lang.is_reliable:
|
||||
languages.append(lang.language)
|
||||
return languages
|
||||
|
||||
def detect_lexilang(self, content): # TODO clean text ??? - TODO REMOVE SEPARATOR
|
||||
language, prob = lexilang_detect(content)
|
||||
if prob > 0:
|
||||
return [language]
|
||||
else:
|
||||
return []
|
||||
|
||||
def detect_libretranslate(self, content):
|
||||
languages = []
|
||||
try:
|
||||
|
@ -431,19 +484,35 @@ class LanguagesDetector:
|
|||
languages.append(language)
|
||||
return languages
|
||||
|
||||
def detect(self, content, force_gcld3=False):
|
||||
def detect(self, content, force_gcld3=False): # TODO detect length between 20-200 ????
|
||||
if not content:
|
||||
return None
|
||||
content = _clean_text_to_translate(content, html=True)
|
||||
if not content:
|
||||
return None
|
||||
# DEBUG
|
||||
# print('-------------------------------------------------------')
|
||||
# print(content)
|
||||
# print(len(content))
|
||||
# lexilang
|
||||
if len(content) < 150:
|
||||
# print('lexilang')
|
||||
languages = self.detect_lexilang(content)
|
||||
# gcld3
|
||||
if len(content) >= 200 or not self.lt or force_gcld3:
|
||||
language = self.detect_gcld3(content)
|
||||
# libretranslate
|
||||
else:
|
||||
language = self.detect_libretranslate(content)
|
||||
return language
|
||||
# if len(content) >= 200 or not self.lt or force_gcld3:
|
||||
# print('gcld3')
|
||||
languages = self.detect_gcld3(content)
|
||||
# libretranslate
|
||||
# else:
|
||||
# languages = self.detect_libretranslate(content)
|
||||
return languages
|
||||
|
||||
class LanguageTranslator:
|
||||
|
||||
def __init__(self):
|
||||
self.lt = LibreTranslateAPI(get_translator_instance())
|
||||
self.ld = LanguagesDetector(nb_langs=1)
|
||||
|
||||
def languages(self):
|
||||
languages = []
|
||||
|
@ -473,13 +542,13 @@ class LanguageTranslator:
|
|||
return language[0].get('language')
|
||||
|
||||
def detect(self, content):
|
||||
# gcld3
|
||||
if len(content) >= 200:
|
||||
language = self.detect_gcld3(content)
|
||||
# libretranslate
|
||||
else:
|
||||
language = self.detect_libretranslate(content)
|
||||
return language
|
||||
# print('++++++++++++++++++++++++++++++++++++++++++++++++++++++')
|
||||
# print(content)
|
||||
language = self.ld.detect(content)
|
||||
if language:
|
||||
# print(language[0])
|
||||
# print('##############################################################')
|
||||
return language[0]
|
||||
|
||||
def translate(self, content, source=None, target="en"): # TODO source target
|
||||
if target not in get_translation_languages():
|
||||
|
@ -498,9 +567,9 @@ class LanguageTranslator:
|
|||
translation = None
|
||||
# TODO LOG and display error
|
||||
if translation == content:
|
||||
print('EQUAL')
|
||||
# print('EQUAL')
|
||||
translation = None
|
||||
return translation
|
||||
return source, translation
|
||||
|
||||
|
||||
LIST_LANGUAGES = {}
|
||||
|
|
|
@ -323,7 +323,6 @@ def get_username_meta_from_global_id(username_global_id):
|
|||
username = Usernames.Username(username_id, instance_uuid)
|
||||
return username.get_meta()
|
||||
|
||||
|
||||
# TODO Filter
|
||||
## Instance type
|
||||
## Chats IDS
|
||||
|
@ -380,6 +379,22 @@ def get_nb_messages_iterator(filters={}):
|
|||
nb_messages += chat.get_nb_messages()
|
||||
return nb_messages
|
||||
|
||||
|
||||
#### FIX ####
|
||||
|
||||
def fix_correlations_subchannel_message():
|
||||
for instance_uuid in get_chat_service_instances():
|
||||
for chat_id in ChatServiceInstance(instance_uuid).get_chats():
|
||||
chat = Chats.Chat(chat_id, instance_uuid)
|
||||
# subchannels
|
||||
for subchannel_gid in chat.get_subchannels():
|
||||
_, _, subchannel_id = subchannel_gid.split(':', 2)
|
||||
subchannel = ChatSubChannels.ChatSubChannel(subchannel_id, instance_uuid)
|
||||
messages, _ = subchannel._get_messages(nb=-1)
|
||||
for mess in messages:
|
||||
_, _, message_id = mess[0].split(':', )
|
||||
subchannel.add_correlation('message', '', message_id)
|
||||
|
||||
#### API ####
|
||||
|
||||
def api_get_chat_service_instance(chat_instance_uuid):
|
||||
|
@ -392,6 +407,7 @@ def api_get_chat(chat_id, chat_instance_uuid, translation_target=None, nb=-1, pa
|
|||
chat = Chats.Chat(chat_id, chat_instance_uuid)
|
||||
if not chat.exists():
|
||||
return {"status": "error", "reason": "Unknown chat"}, 404
|
||||
# print(chat.get_obj_language_stats())
|
||||
meta = chat.get_meta({'created_at', 'icon', 'info', 'nb_participants', 'subchannels', 'threads', 'translation', 'username'}, translation_target=translation_target)
|
||||
if meta['username']:
|
||||
meta['username'] = get_username_meta_from_global_id(meta['username'])
|
||||
|
@ -437,6 +453,7 @@ def api_get_subchannel(chat_id, chat_instance_uuid, translation_target=None, nb=
|
|||
subchannel = ChatSubChannels.ChatSubChannel(chat_id, chat_instance_uuid)
|
||||
if not subchannel.exists():
|
||||
return {"status": "error", "reason": "Unknown subchannel"}, 404
|
||||
# print(subchannel.get_obj_language_stats())
|
||||
meta = subchannel.get_meta({'chat', 'created_at', 'icon', 'nb_messages', 'nb_participants', 'threads', 'translation'}, translation_target=translation_target)
|
||||
if meta['chat']:
|
||||
meta['chat'] = get_chat_meta_from_global_id(meta['chat'])
|
||||
|
@ -451,6 +468,7 @@ def api_get_thread(thread_id, thread_instance_uuid, translation_target=None, nb=
|
|||
thread = ChatThreads.ChatThread(thread_id, thread_instance_uuid)
|
||||
if not thread.exists():
|
||||
return {"status": "error", "reason": "Unknown thread"}, 404
|
||||
# print(thread.get_obj_language_stats())
|
||||
meta = thread.get_meta({'chat', 'nb_messages', 'nb_participants'})
|
||||
# if meta['chat']:
|
||||
# meta['chat'] = get_chat_meta_from_global_id(meta['chat'])
|
||||
|
@ -461,18 +479,32 @@ def api_get_message(message_id, translation_target=None):
|
|||
message = Messages.Message(message_id)
|
||||
if not message.exists():
|
||||
return {"status": "error", "reason": "Unknown uuid"}, 404
|
||||
meta = message.get_meta({'chat', 'content', 'files-names', 'icon', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, translation_target=translation_target)
|
||||
meta = message.get_meta({'chat', 'content', 'files-names', 'icon', 'images', 'language', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, translation_target=translation_target)
|
||||
return meta, 200
|
||||
|
||||
def api_manually_translate_message(message_id, translation_target, translation):
|
||||
def api_message_detect_language(message_id):
|
||||
message = Messages.Message(message_id)
|
||||
if not message.exists():
|
||||
return {"status": "error", "reason": "Unknown uuid"}, 404
|
||||
lang = message.detect_language()
|
||||
return {"language": lang}, 200
|
||||
|
||||
def api_manually_translate_message(message_id, source, translation_target, translation):
|
||||
message = Messages.Message(message_id)
|
||||
if not message.exists():
|
||||
return {"status": "error", "reason": "Unknown uuid"}, 404
|
||||
if len(translation) > 200000: # TODO REVIEW LIMIT
|
||||
return {"status": "error", "reason": "Max Size reached"}, 400
|
||||
if translation_target not in Language.get_translation_languages():
|
||||
return {"status": "error", "reason": "Unknown Language"}, 400
|
||||
if translation:
|
||||
if len(translation) > 200000: # TODO REVIEW LIMIT
|
||||
return {"status": "error", "reason": "Max Size reached"}, 400
|
||||
all_languages = Language.get_translation_languages()
|
||||
if source not in all_languages:
|
||||
return {"status": "error", "reason": "Unknown source Language"}, 400
|
||||
message_language = message.get_language()
|
||||
if message_language != source:
|
||||
message.edit_language(message_language, source)
|
||||
if translation:
|
||||
if translation_target not in all_languages:
|
||||
return {"status": "error", "reason": "Unknown target Language"}, 400
|
||||
message.set_translation(translation_target, translation)
|
||||
# TODO SANITYZE translation
|
||||
return None, 200
|
||||
|
|
|
@ -107,9 +107,24 @@ class Message(AbstractObject):
|
|||
return message_id
|
||||
|
||||
def get_chat_id(self): # TODO optimize -> use me to tag Chat
|
||||
chat_id = self.get_basename().rsplit('_', 1)[0]
|
||||
return chat_id
|
||||
c_id = self.id.split('/')
|
||||
return c_id[2]
|
||||
|
||||
def get_chat(self):
|
||||
c_id = self.id.split('/')
|
||||
return f'chat:{c_id[0]}:{c_id[2]}'
|
||||
|
||||
def get_subchannel(self):
|
||||
subchannel = self.get_correlation('chat-subchannel')
|
||||
if subchannel.get('chat-subchannel'):
|
||||
return f'chat-subchannel:{subchannel["chat-subchannel"].pop()}'
|
||||
|
||||
def get_current_thread(self):
|
||||
subchannel = self.get_correlation('chat-thread')
|
||||
if subchannel.get('chat-thread'):
|
||||
return f'chat-thread:{subchannel["chat-thread"].pop()}'
|
||||
|
||||
# children thread
|
||||
def get_thread(self):
|
||||
for child in self.get_childrens():
|
||||
obj_type, obj_subtype, obj_id = child.split(':', 2)
|
||||
|
@ -180,24 +195,12 @@ class Message(AbstractObject):
|
|||
# message media
|
||||
# flag is deleted -> event or missing from feeder pass ???
|
||||
|
||||
def get_translation(self, content=None, source=None, target='fr'):
|
||||
"""
|
||||
Returns translated content
|
||||
"""
|
||||
|
||||
# return self._get_field('translated')
|
||||
global_id = self.get_global_id()
|
||||
translation = r_cache.get(f'translation:{target}:{global_id}')
|
||||
r_cache.expire(f'translation:{target}:{global_id}', 0)
|
||||
if translation:
|
||||
return translation
|
||||
if not content:
|
||||
content = self.get_content()
|
||||
translation = Language.LanguageTranslator().translate(content, source=source, target=target)
|
||||
if translation:
|
||||
r_cache.set(f'translation:{target}:{global_id}', translation)
|
||||
r_cache.expire(f'translation:{target}:{global_id}', 300)
|
||||
return translation
|
||||
def get_language(self):
|
||||
languages = self.get_languages()
|
||||
if languages:
|
||||
return languages.pop()
|
||||
else:
|
||||
return None
|
||||
|
||||
def _set_translation(self, translation):
|
||||
"""
|
||||
|
@ -296,8 +299,16 @@ class Message(AbstractObject):
|
|||
meta['files-names'] = self.get_files_names()
|
||||
if 'reactions' in options:
|
||||
meta['reactions'] = self.get_reactions()
|
||||
if 'language' in options:
|
||||
meta['language'] = self.get_language()
|
||||
if 'translation' in options and translation_target:
|
||||
meta['translation'] = self.translate(content=meta.get('content'), target=translation_target)
|
||||
if meta.get('language'):
|
||||
source = meta['language']
|
||||
else:
|
||||
source = None
|
||||
meta['translation'] = self.translate(content=meta.get('content'), source=source, target=translation_target)
|
||||
if 'language' in options:
|
||||
meta['language'] = self.get_language()
|
||||
|
||||
# meta['encoding'] = None
|
||||
return meta
|
||||
|
@ -311,11 +322,29 @@ class Message(AbstractObject):
|
|||
# self._set_translation(translated)
|
||||
# return translated
|
||||
|
||||
def create(self, content, translation=None, tags=[]):
|
||||
## Language ##
|
||||
|
||||
def get_objs_container(self):
|
||||
objs_containers = set()
|
||||
# chat
|
||||
objs_containers.add(self.get_chat())
|
||||
subchannel = self.get_subchannel()
|
||||
if subchannel:
|
||||
objs_containers.add(subchannel)
|
||||
thread = self.get_current_thread()
|
||||
if thread:
|
||||
objs_containers.add(thread)
|
||||
return objs_containers
|
||||
|
||||
#- Language -#
|
||||
|
||||
def create(self, content, language=None, translation=None, tags=[]):
|
||||
self._set_field('content', content)
|
||||
# r_content.get(f'content:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', content)
|
||||
if translation:
|
||||
if not language and content:
|
||||
language = self.detect_language()
|
||||
if translation and content:
|
||||
self._set_translation(translation)
|
||||
self.set_translation(language, translation)
|
||||
for tag in tags:
|
||||
self.add_tag(tag)
|
||||
|
||||
|
|
|
@ -226,7 +226,7 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
|
|||
def get_message_meta(self, message, timestamp=None, translation_target='', options=None): # TODO handle file message
|
||||
message = Messages.Message(message[9:])
|
||||
if not options:
|
||||
options = {'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}
|
||||
options = {'content', 'files-names', 'images', 'language', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}
|
||||
meta = message.get_meta(options=options, timestamp=timestamp, translation_target=translation_target)
|
||||
return meta
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ from lib import Duplicate
|
|||
from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type, get_obj_inter_correlation
|
||||
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
|
||||
from lib.relationships_engine import get_obj_nb_relationships, add_obj_relationship
|
||||
from lib.Language import get_obj_languages, add_obj_language, remove_obj_language, get_obj_translation, set_obj_translation
|
||||
from lib.Language import get_obj_languages, add_obj_language, remove_obj_language, detect_obj_language, get_obj_language_stats, get_obj_translation, set_obj_translation, delete_obj_translation
|
||||
from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers
|
||||
|
||||
logging.config.dictConfig(ail_logger.get_config(name='ail'))
|
||||
|
@ -305,28 +305,45 @@ class AbstractObject(ABC):
|
|||
|
||||
## -Relationship- ##
|
||||
|
||||
def get_objs_container(self):
|
||||
return set()
|
||||
|
||||
## Language ##
|
||||
|
||||
def get_languages(self):
|
||||
return get_obj_languages(self.type, self.get_subtype(r_str=True), self.id)
|
||||
|
||||
def add_language(self, language):
|
||||
return add_obj_language(language, self.type, self.get_subtype(r_str=True), self.id)
|
||||
return add_obj_language(language, self.type, self.get_subtype(r_str=True), self.id, objs_containers=self.get_objs_container())
|
||||
|
||||
def remove_language(self, language):
|
||||
return remove_obj_language(language, self.type, self.get_subtype(r_str=True), self.id)
|
||||
return remove_obj_language(language, self.type, self.get_subtype(r_str=True), self.id, objs_containers=self.get_objs_container())
|
||||
|
||||
def edit_language(self, old_language, new_language):
|
||||
if old_language:
|
||||
self.remove_language(old_language)
|
||||
self.add_language(new_language)
|
||||
|
||||
def detect_language(self, field=''):
|
||||
return detect_obj_language(self.type, self.get_subtype(r_str=True), self.id, self.get_content(), objs_containers=self.get_objs_container())
|
||||
|
||||
def get_obj_language_stats(self):
|
||||
return get_obj_language_stats(self.type, self.get_subtype(r_str=True), self.id)
|
||||
|
||||
def get_translation(self, language, field=''):
|
||||
return get_obj_translation(self.get_global_id(), language, field=field)
|
||||
return get_obj_translation(self.get_global_id(), language, field=field, objs_containers=self.get_objs_container())
|
||||
|
||||
def set_translation(self, language, translation, field=''):
|
||||
return set_obj_translation(self.get_global_id(), language, translation, field=field)
|
||||
|
||||
def delete_translation(self, language, field=''):
|
||||
return delete_obj_translation(self.get_global_id(), language, field=field)
|
||||
|
||||
def translate(self, content=None, field='', source=None, target='en'):
|
||||
global_id = self.get_global_id()
|
||||
if not content:
|
||||
content = self.get_content()
|
||||
translation = get_obj_translation(global_id, target, source=source, content=content, field=field)
|
||||
translation = get_obj_translation(global_id, target, source=source, content=content, field=field, objs_containers=self.get_objs_container())
|
||||
return translation
|
||||
|
||||
## -Language- ##
|
||||
|
|
|
@ -33,6 +33,10 @@ class Languages(AbstractModule):
|
|||
for lang in obj.get_languages(min_probability=0.8, force_gcld3=True):
|
||||
print(lang)
|
||||
domain.add_language(lang)
|
||||
# Detect Chat Message Language
|
||||
# elif obj.type == 'message':
|
||||
# lang = obj.detect_language()
|
||||
# print(self.obj.id, lang)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -20,17 +20,39 @@ from lib.ail_core import is_object_type
|
|||
from lib import ail_queues
|
||||
from lib.objects import ail_objects
|
||||
|
||||
def reprocess_message_objects(object_type):
|
||||
queue = ail_queues.AILQueue('FeederModuleImporter', -1)
|
||||
for obj in ail_objects.obj_iterator(object_type, filters={}):
|
||||
queue.send_message(obj.get_global_id(), message='reprocess')
|
||||
queue.end()
|
||||
# from modules.ApiKey import ApiKey
|
||||
# from modules.Categ import Categ
|
||||
# from modules.CreditCards import CreditCards
|
||||
# from modules.DomClassifier import DomClassifier
|
||||
# from modules.Global import Global
|
||||
# from modules.Keys import Keys
|
||||
# from modules.Onion import Onion
|
||||
# from modules.Telegram import Telegram
|
||||
|
||||
from modules.Languages import Languages
|
||||
|
||||
MODULES = {
|
||||
'Languages': Languages
|
||||
}
|
||||
|
||||
def reprocess_message_objects(object_type, module_name=None):
|
||||
if module_name:
|
||||
module = MODULES[module_name]()
|
||||
for obj in ail_objects.obj_iterator(object_type, filters={}):
|
||||
module.obj = obj
|
||||
module.compute(None)
|
||||
else:
|
||||
queue = ail_queues.AILQueue('FeederModuleImporter', -1)
|
||||
for obj in ail_objects.obj_iterator(object_type, filters={}):
|
||||
queue.send_message(obj.get_global_id(), message='reprocess')
|
||||
queue.end()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
parser = argparse.ArgumentParser(description='Reprocess AIL Objects')
|
||||
parser.add_argument('-t', '--type', type=str, help='AIL Object Type', required=True)
|
||||
parser.add_argument('-m', '--module', type=str, help='AIL Module Name')
|
||||
|
||||
args = parser.parse_args()
|
||||
if not args.type:
|
||||
|
@ -43,4 +65,7 @@ if __name__ == "__main__":
|
|||
if obj_type not in ['item', 'message']: # TODO image
|
||||
raise Exception(f'Currently not supported Object Type: {obj_type}')
|
||||
|
||||
reprocess_message_objects(obj_type)
|
||||
modulename = args.module
|
||||
if modulename not in MODULES:
|
||||
raise Exception(f'Currently not supported Module: {modulename}')
|
||||
reprocess_message_objects(obj_type, module_name=modulename)
|
||||
|
|
|
@ -10,6 +10,7 @@ sys.path.append(os.environ['AIL_HOME'])
|
|||
##################################
|
||||
from update.bin.ail_updater import AIL_Updater
|
||||
from lib import ail_updates
|
||||
from lib import chats_viewer
|
||||
|
||||
class Updater(AIL_Updater):
|
||||
"""default Updater."""
|
||||
|
@ -19,6 +20,7 @@ class Updater(AIL_Updater):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
chats_viewer.fix_correlations_subchannel_message()
|
||||
updater = Updater('v5.4')
|
||||
updater.run_update()
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ git submodule update
|
|||
echo ""
|
||||
echo -e $GREEN"Updating python packages ..."$DEFAULT
|
||||
echo ""
|
||||
pip install -U pyail
|
||||
pip install -U pylacus
|
||||
pip install -U git+https://github.com/ail-project/demoji
|
||||
pip install -U lexilang
|
||||
|
|
|
@ -244,15 +244,34 @@ def objects_message():
|
|||
@login_read_only
|
||||
def objects_message_translate():
|
||||
message_id = request.form.get('id')
|
||||
source = request.form.get('language_target')
|
||||
target = request.form.get('target')
|
||||
translation = request.form.get('translation')
|
||||
if target == "Don't Translate":
|
||||
target = None
|
||||
resp = chats_viewer.api_manually_translate_message(message_id, target, translation)
|
||||
resp = chats_viewer.api_manually_translate_message(message_id, source, target, translation)
|
||||
if resp[1] != 200:
|
||||
return create_json_response(resp[0], resp[1])
|
||||
else:
|
||||
return redirect(url_for('chats_explorer.objects_message', id=message_id, target=target))
|
||||
if request.referrer:
|
||||
return redirect(request.referrer)
|
||||
else:
|
||||
return redirect(url_for('chats_explorer.objects_message', id=message_id, target=target))
|
||||
|
||||
@chats_explorer.route("/objects/message/detect/language", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def objects_message_detect_language():
|
||||
message_id = request.args.get('id')
|
||||
target = request.args.get('target')
|
||||
resp = chats_viewer.api_message_detect_language(message_id)
|
||||
if resp[1] != 200:
|
||||
return create_json_response(resp[0], resp[1])
|
||||
else:
|
||||
if request.referrer:
|
||||
return redirect(request.referrer)
|
||||
else:
|
||||
return redirect(url_for('chats_explorer.objects_message', id=message_id, target=target))
|
||||
|
||||
@chats_explorer.route("/objects/user-account", methods=['GET'])
|
||||
@login_required
|
||||
|
|
|
@ -81,24 +81,6 @@
|
|||
<hr class="m-1">
|
||||
<pre class="my-0 text-secondary">{{ message['translation'] }}</pre>
|
||||
|
||||
{% set mess_id_escape= message['id'] | replace("/", "_") %}
|
||||
<button class="btn btn-light p-0" type="button" data-toggle="collapse" data-target="#collapseTrans{{ mess_id_escape }}" aria-expanded="false" aria-controls="collapseTrans{{ mess_id_escape }}">
|
||||
<i class="fas fa-language"></i>
|
||||
</button>
|
||||
<div class="collapse" id="collapseTrans{{ mess_id_escape }}">
|
||||
<div class="card card-body">
|
||||
<form method="post" action="{{ url_for('chats_explorer.objects_message_translate') }}" target="_blank">
|
||||
<input type="text" id="id" name="id" value="{{message['id']}}" hidden>
|
||||
<input type="text" id="target" name="target" value="{{translation_target}}" hidden>
|
||||
<span>{{translation_target}}:</span>
|
||||
<textarea class="form-control" id="translation" name="translation">{{ message['translation'] }}</textarea>
|
||||
<button class="btn btn-primary" type="submit">
|
||||
<i class="fas fa-pen-alt"> Manual Translation</i>
|
||||
</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% endif %}
|
||||
{% for reaction in message['reactions'] %}
|
||||
<span class="border rounded px-1">{{ reaction }} {{ message['reactions'][reaction] }}</span>
|
||||
|
@ -113,10 +95,47 @@
|
|||
<span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }}">{{ tag }}</span>
|
||||
{% endfor %}
|
||||
<div class="">
|
||||
|
||||
{% set mess_id_escape= message['id'] | replace("/", "_") %}
|
||||
<span class="btn btn-outline-dark p-0 px-1" type="button" data-toggle="collapse" data-target="#collapseTrans{{ mess_id_escape }}" aria-expanded="false" aria-controls="collapseTrans{{ mess_id_escape }}">
|
||||
<i class="fas fa-language"></i> {% if message['language'] %}{{ message['language'] }}{% endif %}
|
||||
</span>
|
||||
<div class="collapse" id="collapseTrans{{ mess_id_escape }}">
|
||||
<div class="card card-body">
|
||||
<form method="post" action="{{ url_for('chats_explorer.objects_message_translate') }}">
|
||||
<input type="text" id="id" name="id" value="{{message['id']}}" hidden>
|
||||
<span class="badge badge-primary">Source:</span>
|
||||
<span class="">
|
||||
<select id="language_target" name="language_target" class="form-select" aria-label="Message Language" onchange="$('#translation').val('');">
|
||||
<option selected value="{{ message['language'] }}">{{ message['language'] }}</option>
|
||||
{% for language in translation_languages %}
|
||||
<option value="{{ language }}">{{ translation_languages[language] }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
</span>
|
||||
{% if translation_target %}
|
||||
<input type="text" id="target" name="target" value="{{translation_target}}" hidden>
|
||||
<span class="badge badge-primary">Target:</span><span>{{translation_target}}</span>
|
||||
<textarea class="form-control" id="translation" name="translation">{{ message['translation'] }}</textarea>
|
||||
<button class="btn btn-dark" type="submit">
|
||||
<i class="fas fa-pen-alt"> Update Language or Translation</i>
|
||||
</button>
|
||||
{% else %}
|
||||
<button class="btn btn-dark" type="submit">
|
||||
<i class="fas fa-pen-alt"> Update Language</i>
|
||||
</button>
|
||||
{% endif %}
|
||||
</form>
|
||||
<div>
|
||||
<a class="btn btn-primary" href="{{ url_for('chats_explorer.objects_message_detect_language')}}?id={{ message['id'] }}">
|
||||
<i class="fas fa-redo"></i> Detect Language
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<a class="btn btn-light btn-sm text-secondary px-1" href="{{ url_for('correlation.show_correlation')}}?type={{ message['type'] }}&subtype={{ message['subtype'] }}&id={{ message['id'] }}"><i class="fas fa-project-diagram"></i></a>
|
||||
<a class="btn btn-light btn-sm text-secondary px-1" href="{{ message['link'] }}"><i class="fas fa-eye"></i></a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
<option selected>Don't Translate</option>
|
||||
{% else %}
|
||||
<option selected value="{{ translation_target }}">{{ translation_target }}</option>
|
||||
<option>Don't Translate</option>
|
||||
{% endif %}
|
||||
{% for language in translation_languages %}
|
||||
<option value="{{ language }}">{{ translation_languages[language] }}</option>
|
||||
|
|
Loading…
Reference in a new issue