chg: [chats] translate messages on demand

This commit is contained in:
terrtia 2023-12-04 15:47:58 +01:00
parent 941838ab76
commit bef4e69a68
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
11 changed files with 205 additions and 21 deletions

View file

@ -4,6 +4,20 @@
import os
import sys
import cld3
from libretranslatepy import LibreTranslateAPI
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
config_loader = ConfigLoader()
TRANSLATOR_URL = config_loader.get_config_str('Translation', 'libretranslate')
config_loader = None
dict_iso_languages = {
'af': 'Afrikaans',
'am': 'Amharic',
@ -237,3 +251,77 @@ def get_iso_from_languages(l_languages, sort=False):
if sort:
l_iso = sorted(l_iso)
return l_iso
class LanguageDetector:
pass
def get_translator_instance():
return TRANSLATOR_URL
class LanguageTranslator:
def __init__(self):
self.lt = LibreTranslateAPI(get_translator_instance())
def languages(self):
languages = []
try:
for dict_lang in self.lt.languages():
languages.append({'iso': dict_lang['code'], 'language': dict_lang['name']})
except:
pass
return languages
def detect_cld3(self, content):
for lang in cld3.get_frequent_languages(content, num_langs=1):
return lang.language
def detect_libretranslate(self, content):
try:
language = self.lt.detect(content)
except: # TODO ERROR MESSAGE
language = None
if language:
return language[0].get('language')
def detect(self, content): # TODO replace by gcld3
# cld3
if len(content) >= 200:
language = self.detect_cld3(content)
# libretranslate
else:
language = self.detect_libretranslate(content)
return language
def translate(self, content, source=None, target="en"): # TODO source target
translation = None
if content:
if not source:
source = self.detect(content)
# print(source, content)
if source:
if source != target:
try:
# print(content, source, target)
translation = self.lt.translate(content, source, target)
except:
translation = None
# TODO LOG and display error
if translation == content:
print('EQUAL')
translation = None
return translation
LIST_LANGUAGES = LanguageTranslator().languages()
def get_translation_languages():
return LIST_LANGUAGES
if __name__ == '__main__':
t_content = ''
langg = LanguageTranslator()
# lang.translate(t_content, source='ru')
langg.languages()

View file

@ -21,6 +21,7 @@ from lib.objects import Chats
from lib.objects import ChatSubChannels
from lib.objects import ChatThreads
from lib.objects import Messages
from lib.objects import UsersAccount
from lib.objects import Usernames
config_loader = ConfigLoader()
@ -307,7 +308,7 @@ def api_get_chat_service_instance(chat_instance_uuid):
return {"status": "error", "reason": "Unknown uuid"}, 404
return chat_instance.get_meta({'chats'}), 200
def api_get_chat(chat_id, chat_instance_uuid):
def api_get_chat(chat_id, chat_instance_uuid, translation_target=None):
chat = Chats.Chat(chat_id, chat_instance_uuid)
if not chat.exists():
return {"status": "error", "reason": "Unknown chat"}, 404
@ -317,7 +318,7 @@ def api_get_chat(chat_id, chat_instance_uuid):
if meta['subchannels']:
meta['subchannels'] = get_subchannels_meta_from_global_id(meta['subchannels'])
else:
meta['messages'], meta['tags_messages'] = chat.get_messages()
meta['messages'], meta['tags_messages'] = chat.get_messages(translation_target=translation_target)
return meta, 200
def api_get_nb_message_by_week(chat_id, chat_instance_uuid):
@ -328,7 +329,7 @@ def api_get_nb_message_by_week(chat_id, chat_instance_uuid):
# week = chat.get_nb_message_by_week('20231109')
return week, 200
def api_get_subchannel(chat_id, chat_instance_uuid):
def api_get_subchannel(chat_id, chat_instance_uuid, translation_target=None):
subchannel = ChatSubChannels.ChatSubChannel(chat_id, chat_instance_uuid)
if not subchannel.exists():
return {"status": "error", "reason": "Unknown subchannel"}, 404
@ -339,17 +340,17 @@ def api_get_subchannel(chat_id, chat_instance_uuid):
meta['threads'] = get_threads_metas(meta['threads'])
if meta.get('username'):
meta['username'] = get_username_meta_from_global_id(meta['username'])
meta['messages'], meta['tags_messages'] = subchannel.get_messages()
meta['messages'], meta['tags_messages'] = subchannel.get_messages(translation_target=translation_target)
return meta, 200
def api_get_thread(thread_id, thread_instance_uuid):
def api_get_thread(thread_id, thread_instance_uuid, translation_target=None):
thread = ChatThreads.ChatThread(thread_id, thread_instance_uuid)
if not thread.exists():
return {"status": "error", "reason": "Unknown thread"}, 404
meta = thread.get_meta({'chat', 'nb_messages'})
# if meta['chat']:
# meta['chat'] = get_chat_meta_from_global_id(meta['chat'])
meta['messages'], meta['tags_messages'] = thread.get_messages()
meta['messages'], meta['tags_messages'] = thread.get_messages(translation_target=translation_target)
return meta, 200
def api_get_message(message_id):
@ -362,6 +363,14 @@ def api_get_message(message_id):
# # meta['chat'] =
return meta, 200
def api_get_user_account(user_id, instance_uuid):
user_account = UsersAccount.UserAccount(user_id, instance_uuid)
if not user_account.exists():
return {"status": "error", "reason": "Unknown user-account"}, 404
meta = user_account.get_meta({'icon', 'username'})
print(meta)
return meta, 200
# # # # # # # # # # LATER
# #
# ChatCategory #

View file

@ -18,6 +18,7 @@ sys.path.append(os.environ['AIL_BIN'])
from lib.ail_core import get_ail_uuid
from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader
from lib import Language
from lib.objects import UsersAccount
from lib.data_retention_engine import update_obj_date, get_obj_date_first
# TODO Set all messages ???
@ -76,7 +77,13 @@ class Message(AbstractObject):
"""
Returns content
"""
content = self._get_field('content')
global_id = self.get_global_id()
content = r_cache.get(f'content:{global_id}')
if not content:
content = self._get_field('content')
if content:
r_cache.set(f'content:{global_id}', content)
r_cache.expire(f'content:{global_id}', 300)
if r_type == 'str':
return content
elif r_type == 'bytes':
@ -153,11 +160,23 @@ class Message(AbstractObject):
# message from channel ???
# message media
def get_translation(self): # TODO support multiple translated languages ?????
def get_translation(self, content=None, source=None, target='fr'):
"""
Returns translated content
"""
return self._get_field('translated') # TODO multiples translation ... -> use set
# return self._get_field('translated')
global_id = self.get_global_id()
translation = r_cache.get(f'translation:{target}:{global_id}')
r_cache.expire(f'translation:{target}:{global_id}', 0)
if translation:
return translation
if not content:
content = self.get_content()
translation = Language.LanguageTranslator().translate(content, source=source, target=target)
if translation:
r_cache.set(f'translation:{target}:{global_id}', translation)
r_cache.expire(f'translation:{target}:{global_id}', 300)
return translation
def _set_translation(self, translation):
"""
@ -209,7 +228,7 @@ class Message(AbstractObject):
# return r_object.hget(f'meta:item::{self.id}', 'url')
# options: set of optional meta fields
def get_meta(self, options=None, timestamp=None):
def get_meta(self, options=None, timestamp=None, translation_target='en'):
"""
:type options: set
:type timestamp: float
@ -239,7 +258,7 @@ class Message(AbstractObject):
parent_type, _, parent_id = meta['parent'].split(':', 3)
if parent_type == 'message':
message = Message(parent_id)
meta['reply_to'] = message.get_meta(options=options)
meta['reply_to'] = message.get_meta(options=options, translation_target=translation_target)
if 'investigations' in options:
meta['investigations'] = self.get_investigations()
if 'link' in options:
@ -262,6 +281,8 @@ class Message(AbstractObject):
meta['files-names'] = self.get_files_names()
if 'reactions' in options:
meta['reactions'] = self.get_reactions()
if 'translation' in options and translation_target:
meta['translation'] = self.get_translation(content=meta.get('content'), target=translation_target)
# meta['encoding'] = None
return meta

View file

@ -179,12 +179,12 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
week_date = Date.get_current_week_day()
return self.get_nb_message_by_week(week_date)
def get_message_meta(self, message, timestamp=None): # TODO handle file message
def get_message_meta(self, message, timestamp=None, translation_target='en'): # TODO handle file message
message = Messages.Message(message[9:])
meta = message.get_meta(options={'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'user-account'}, timestamp=timestamp)
meta = message.get_meta(options={'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, timestamp=timestamp, translation_target=translation_target)
return meta
def get_messages(self, start=0, page=1, nb=500, unread=False): # threads ???? # TODO ADD last/first message timestamp + return page
def get_messages(self, start=0, page=1, nb=500, unread=False, translation_target='en'): # threads ???? # TODO ADD last/first message timestamp + return page
# TODO return message meta
tags = {}
messages = {}
@ -195,7 +195,7 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
if date_day != curr_date:
messages[date_day] = []
curr_date = date_day
mess_dict = self.get_message_meta(message[0], timestamp=timestamp)
mess_dict = self.get_message_meta(message[0], timestamp=timestamp, translation_target=translation_target)
messages[date_day].append(mess_dict)
if mess_dict.get('tags'):

View file

@ -262,6 +262,9 @@ default_har = True
default_screenshot = True
onion_proxy = onion.foundation
[Translation]
libretranslate =
[IP]
# list of comma-separated CIDR that you wish to be alerted for. e.g:
#networks = 192.168.34.0/24,10.0.0.0/8,192.168.33.0/24

View file

@ -21,6 +21,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from lib import ail_core
from lib import chats_viewer
from lib import Language
from lib import Tag
# ============ BLUEPRINT ============
@ -80,12 +81,14 @@ def chats_explorer_instance():
def chats_explorer_chat():
chat_id = request.args.get('id')
instance_uuid = request.args.get('uuid')
chat = chats_viewer.api_get_chat(chat_id, instance_uuid)
target = request.args.get('target')
chat = chats_viewer.api_get_chat(chat_id, instance_uuid, translation_target=target)
if chat[1] != 200:
return create_json_response(chat[0], chat[1])
else:
chat = chat[0]
return render_template('chat_viewer.html', chat=chat, bootstrap_label=bootstrap_label)
languages = Language.get_translation_languages()
return render_template('chat_viewer.html', chat=chat, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target)
@chats_explorer.route("chats/explorer/messages/stats/week", methods=['GET'])
@login_required
@ -105,12 +108,14 @@ def chats_explorer_messages_stats_week():
def objects_subchannel_messages():
subchannel_id = request.args.get('id')
instance_uuid = request.args.get('uuid')
subchannel = chats_viewer.api_get_subchannel(subchannel_id, instance_uuid)
target = request.args.get('target')
subchannel = chats_viewer.api_get_subchannel(subchannel_id, instance_uuid, translation_target=target)
if subchannel[1] != 200:
return create_json_response(subchannel[0], subchannel[1])
else:
subchannel = subchannel[0]
return render_template('SubChannelMessages.html', subchannel=subchannel, bootstrap_label=bootstrap_label)
languages = Language.get_translation_languages()
return render_template('SubChannelMessages.html', subchannel=subchannel, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target)
@chats_explorer.route("/chats/explorer/thread", methods=['GET'])
@login_required
@ -118,12 +123,14 @@ def objects_subchannel_messages():
def objects_thread_messages():
thread_id = request.args.get('id')
instance_uuid = request.args.get('uuid')
thread = chats_viewer.api_get_thread(thread_id, instance_uuid)
target = request.args.get('target')
thread = chats_viewer.api_get_thread(thread_id, instance_uuid, translation_target=target)
if thread[1] != 200:
return create_json_response(thread[0], thread[1])
else:
meta = thread[0]
return render_template('ThreadMessages.html', meta=meta, bootstrap_label=bootstrap_label)
languages = Language.get_translation_languages()
return render_template('ThreadMessages.html', meta=meta, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target)
@chats_explorer.route("/objects/message", methods=['GET'])
@login_required
@ -135,5 +142,6 @@ def objects_message():
return create_json_response(message[0], message[1])
else:
message = message[0]
languages = Language.get_translation_languages()
return render_template('ChatMessage.html', meta=message, bootstrap_label=bootstrap_label,
modal_add_tags=Tag.get_modal_add_tags(message['id'], object_type='message'))

View file

@ -184,6 +184,9 @@
<span class="mt-3">
{% include 'objects/image/block_blur_img_slider.html' %}
</span>
{% with translate_url=url_for('chats_explorer.objects_subchannel_messages', uuid=subchannel['subtype']), obj_id=subchannel['id'] %}
{% include 'chats_explorer/block_translation.html' %}
{% endwith %}
<div class="position-relative">
<div class="chat-messages p-2">

View file

@ -133,6 +133,9 @@
<span class="mt-3">
{% include 'objects/image/block_blur_img_slider.html' %}
</span>
{% with translate_url=url_for('chats_explorer.objects_thread_messages', uuid=meta['subtype']), obj_id=meta['id'] %}
{% include 'chats_explorer/block_translation.html' %}
{% endwith %}
<div class="position-relative">
<div class="chat-messages p-2">

View file

@ -48,6 +48,10 @@
{% endif %}
</div>
<pre class="my-0">{{ message['reply_to']['content'] }}</pre>
{% if message['reply_to']['translation'] %}
<hr class="m-1">
<div class="my-0 text-secondary">{{ message['reply_to']['translation'] }}</div>
{% endif %}
{% for tag in message['reply_to']['tags'] %}
<span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }}">{{ tag }}</span>
{% endfor %}
@ -71,6 +75,10 @@
{% endfor %}
{% endif %}
<pre class="my-0">{{ message['content'] }}</pre>
{% if message['translation'] %}
<hr class="m-1">
<pre class="my-0 text-secondary">{{ message['translation'] }}</pre>
{% endif %}
{% for reaction in message['reactions'] %}
<span class="border rounded px-1">{{ reaction }} {{ message['reactions'][reaction] }}</span>
{% endfor %}

View file

@ -0,0 +1,37 @@
<div class="card border-secondary">
<div class="card-body py-2">
<div class="row">
<div class="col-md-3 text-center px-0">
Translation
</div>
<div class="col-md-6 text-center pl-0 pt-1">
<select id="translation_selector_target" class="form-select" aria-label="Default select example">
{% if not translation_target or translation_target == "Don't Translate" %}
<option selected>Don't Translate</option>
{% else %}
<option selected value="{{ translation_target }}">{{ translation_target }}</option>
{% endif %}
{% for language in translation_languages %}
<option value="{{ language['iso'] }}">{{ language['language'] }}</option>
{% endfor %}
</select>
</div>
<div class="col-md-3 text-center">
<button class="btn btn-sm btn-primary" onclick="translate_selector();">
<i class="fas fa-language"></i>
<span class="label-icon">Translate</span>
</button>
</div>
</div>
</div>
</div>
<script>
function translate_selector(){
var t = document.getElementById("translation_selector_target");
var target = t.value
window.location.replace("{{ translate_url }}&id={{ obj_id }}&target=" + target);
}
</script>

View file

@ -159,6 +159,10 @@
{% include 'objects/image/block_blur_img_slider.html' %}
</span>
{% with translate_url=url_for('chats_explorer.chats_explorer_chat', uuid=chat['subtype']), obj_id=chat['id'] %}
{% include 'chats_explorer/block_translation.html' %}
{% endwith %}
<div class="position-relative">
<div class="chat-messages p-2">