chg: [chats] translate messages on demand

This commit is contained in:
terrtia 2023-12-04 15:47:58 +01:00
parent 941838ab76
commit bef4e69a68
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
11 changed files with 205 additions and 21 deletions

View file

@ -4,6 +4,20 @@
import os import os
import sys import sys
import cld3
from libretranslatepy import LibreTranslateAPI
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
config_loader = ConfigLoader()
TRANSLATOR_URL = config_loader.get_config_str('Translation', 'libretranslate')
config_loader = None
dict_iso_languages = { dict_iso_languages = {
'af': 'Afrikaans', 'af': 'Afrikaans',
'am': 'Amharic', 'am': 'Amharic',
@ -237,3 +251,77 @@ def get_iso_from_languages(l_languages, sort=False):
if sort: if sort:
l_iso = sorted(l_iso) l_iso = sorted(l_iso)
return l_iso return l_iso
class LanguageDetector:
pass
def get_translator_instance():
return TRANSLATOR_URL
class LanguageTranslator:
def __init__(self):
self.lt = LibreTranslateAPI(get_translator_instance())
def languages(self):
languages = []
try:
for dict_lang in self.lt.languages():
languages.append({'iso': dict_lang['code'], 'language': dict_lang['name']})
except:
pass
return languages
def detect_cld3(self, content):
for lang in cld3.get_frequent_languages(content, num_langs=1):
return lang.language
def detect_libretranslate(self, content):
try:
language = self.lt.detect(content)
except: # TODO ERROR MESSAGE
language = None
if language:
return language[0].get('language')
def detect(self, content): # TODO replace by gcld3
# cld3
if len(content) >= 200:
language = self.detect_cld3(content)
# libretranslate
else:
language = self.detect_libretranslate(content)
return language
def translate(self, content, source=None, target="en"): # TODO source target
translation = None
if content:
if not source:
source = self.detect(content)
# print(source, content)
if source:
if source != target:
try:
# print(content, source, target)
translation = self.lt.translate(content, source, target)
except:
translation = None
# TODO LOG and display error
if translation == content:
print('EQUAL')
translation = None
return translation
LIST_LANGUAGES = LanguageTranslator().languages()
def get_translation_languages():
return LIST_LANGUAGES
if __name__ == '__main__':
t_content = ''
langg = LanguageTranslator()
# lang.translate(t_content, source='ru')
langg.languages()

View file

@ -21,6 +21,7 @@ from lib.objects import Chats
from lib.objects import ChatSubChannels from lib.objects import ChatSubChannels
from lib.objects import ChatThreads from lib.objects import ChatThreads
from lib.objects import Messages from lib.objects import Messages
from lib.objects import UsersAccount
from lib.objects import Usernames from lib.objects import Usernames
config_loader = ConfigLoader() config_loader = ConfigLoader()
@ -307,7 +308,7 @@ def api_get_chat_service_instance(chat_instance_uuid):
return {"status": "error", "reason": "Unknown uuid"}, 404 return {"status": "error", "reason": "Unknown uuid"}, 404
return chat_instance.get_meta({'chats'}), 200 return chat_instance.get_meta({'chats'}), 200
def api_get_chat(chat_id, chat_instance_uuid): def api_get_chat(chat_id, chat_instance_uuid, translation_target=None):
chat = Chats.Chat(chat_id, chat_instance_uuid) chat = Chats.Chat(chat_id, chat_instance_uuid)
if not chat.exists(): if not chat.exists():
return {"status": "error", "reason": "Unknown chat"}, 404 return {"status": "error", "reason": "Unknown chat"}, 404
@ -317,7 +318,7 @@ def api_get_chat(chat_id, chat_instance_uuid):
if meta['subchannels']: if meta['subchannels']:
meta['subchannels'] = get_subchannels_meta_from_global_id(meta['subchannels']) meta['subchannels'] = get_subchannels_meta_from_global_id(meta['subchannels'])
else: else:
meta['messages'], meta['tags_messages'] = chat.get_messages() meta['messages'], meta['tags_messages'] = chat.get_messages(translation_target=translation_target)
return meta, 200 return meta, 200
def api_get_nb_message_by_week(chat_id, chat_instance_uuid): def api_get_nb_message_by_week(chat_id, chat_instance_uuid):
@ -328,7 +329,7 @@ def api_get_nb_message_by_week(chat_id, chat_instance_uuid):
# week = chat.get_nb_message_by_week('20231109') # week = chat.get_nb_message_by_week('20231109')
return week, 200 return week, 200
def api_get_subchannel(chat_id, chat_instance_uuid): def api_get_subchannel(chat_id, chat_instance_uuid, translation_target=None):
subchannel = ChatSubChannels.ChatSubChannel(chat_id, chat_instance_uuid) subchannel = ChatSubChannels.ChatSubChannel(chat_id, chat_instance_uuid)
if not subchannel.exists(): if not subchannel.exists():
return {"status": "error", "reason": "Unknown subchannel"}, 404 return {"status": "error", "reason": "Unknown subchannel"}, 404
@ -339,17 +340,17 @@ def api_get_subchannel(chat_id, chat_instance_uuid):
meta['threads'] = get_threads_metas(meta['threads']) meta['threads'] = get_threads_metas(meta['threads'])
if meta.get('username'): if meta.get('username'):
meta['username'] = get_username_meta_from_global_id(meta['username']) meta['username'] = get_username_meta_from_global_id(meta['username'])
meta['messages'], meta['tags_messages'] = subchannel.get_messages() meta['messages'], meta['tags_messages'] = subchannel.get_messages(translation_target=translation_target)
return meta, 200 return meta, 200
def api_get_thread(thread_id, thread_instance_uuid): def api_get_thread(thread_id, thread_instance_uuid, translation_target=None):
thread = ChatThreads.ChatThread(thread_id, thread_instance_uuid) thread = ChatThreads.ChatThread(thread_id, thread_instance_uuid)
if not thread.exists(): if not thread.exists():
return {"status": "error", "reason": "Unknown thread"}, 404 return {"status": "error", "reason": "Unknown thread"}, 404
meta = thread.get_meta({'chat', 'nb_messages'}) meta = thread.get_meta({'chat', 'nb_messages'})
# if meta['chat']: # if meta['chat']:
# meta['chat'] = get_chat_meta_from_global_id(meta['chat']) # meta['chat'] = get_chat_meta_from_global_id(meta['chat'])
meta['messages'], meta['tags_messages'] = thread.get_messages() meta['messages'], meta['tags_messages'] = thread.get_messages(translation_target=translation_target)
return meta, 200 return meta, 200
def api_get_message(message_id): def api_get_message(message_id):
@ -362,6 +363,14 @@ def api_get_message(message_id):
# # meta['chat'] = # # meta['chat'] =
return meta, 200 return meta, 200
def api_get_user_account(user_id, instance_uuid):
user_account = UsersAccount.UserAccount(user_id, instance_uuid)
if not user_account.exists():
return {"status": "error", "reason": "Unknown user-account"}, 404
meta = user_account.get_meta({'icon', 'username'})
print(meta)
return meta, 200
# # # # # # # # # # LATER # # # # # # # # # # LATER
# # # #
# ChatCategory # # ChatCategory #

View file

@ -18,6 +18,7 @@ sys.path.append(os.environ['AIL_BIN'])
from lib.ail_core import get_ail_uuid from lib.ail_core import get_ail_uuid
from lib.objects.abstract_object import AbstractObject from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
from lib import Language
from lib.objects import UsersAccount from lib.objects import UsersAccount
from lib.data_retention_engine import update_obj_date, get_obj_date_first from lib.data_retention_engine import update_obj_date, get_obj_date_first
# TODO Set all messages ??? # TODO Set all messages ???
@ -76,7 +77,13 @@ class Message(AbstractObject):
""" """
Returns content Returns content
""" """
global_id = self.get_global_id()
content = r_cache.get(f'content:{global_id}')
if not content:
content = self._get_field('content') content = self._get_field('content')
if content:
r_cache.set(f'content:{global_id}', content)
r_cache.expire(f'content:{global_id}', 300)
if r_type == 'str': if r_type == 'str':
return content return content
elif r_type == 'bytes': elif r_type == 'bytes':
@ -153,11 +160,23 @@ class Message(AbstractObject):
# message from channel ??? # message from channel ???
# message media # message media
def get_translation(self): # TODO support multiple translated languages ????? def get_translation(self, content=None, source=None, target='fr'):
""" """
Returns translated content Returns translated content
""" """
return self._get_field('translated') # TODO multiples translation ... -> use set # return self._get_field('translated')
global_id = self.get_global_id()
translation = r_cache.get(f'translation:{target}:{global_id}')
r_cache.expire(f'translation:{target}:{global_id}', 0)
if translation:
return translation
if not content:
content = self.get_content()
translation = Language.LanguageTranslator().translate(content, source=source, target=target)
if translation:
r_cache.set(f'translation:{target}:{global_id}', translation)
r_cache.expire(f'translation:{target}:{global_id}', 300)
return translation
def _set_translation(self, translation): def _set_translation(self, translation):
""" """
@ -209,7 +228,7 @@ class Message(AbstractObject):
# return r_object.hget(f'meta:item::{self.id}', 'url') # return r_object.hget(f'meta:item::{self.id}', 'url')
# options: set of optional meta fields # options: set of optional meta fields
def get_meta(self, options=None, timestamp=None): def get_meta(self, options=None, timestamp=None, translation_target='en'):
""" """
:type options: set :type options: set
:type timestamp: float :type timestamp: float
@ -239,7 +258,7 @@ class Message(AbstractObject):
parent_type, _, parent_id = meta['parent'].split(':', 3) parent_type, _, parent_id = meta['parent'].split(':', 3)
if parent_type == 'message': if parent_type == 'message':
message = Message(parent_id) message = Message(parent_id)
meta['reply_to'] = message.get_meta(options=options) meta['reply_to'] = message.get_meta(options=options, translation_target=translation_target)
if 'investigations' in options: if 'investigations' in options:
meta['investigations'] = self.get_investigations() meta['investigations'] = self.get_investigations()
if 'link' in options: if 'link' in options:
@ -262,6 +281,8 @@ class Message(AbstractObject):
meta['files-names'] = self.get_files_names() meta['files-names'] = self.get_files_names()
if 'reactions' in options: if 'reactions' in options:
meta['reactions'] = self.get_reactions() meta['reactions'] = self.get_reactions()
if 'translation' in options and translation_target:
meta['translation'] = self.get_translation(content=meta.get('content'), target=translation_target)
# meta['encoding'] = None # meta['encoding'] = None
return meta return meta

View file

@ -179,12 +179,12 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
week_date = Date.get_current_week_day() week_date = Date.get_current_week_day()
return self.get_nb_message_by_week(week_date) return self.get_nb_message_by_week(week_date)
def get_message_meta(self, message, timestamp=None): # TODO handle file message def get_message_meta(self, message, timestamp=None, translation_target='en'): # TODO handle file message
message = Messages.Message(message[9:]) message = Messages.Message(message[9:])
meta = message.get_meta(options={'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'user-account'}, timestamp=timestamp) meta = message.get_meta(options={'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, timestamp=timestamp, translation_target=translation_target)
return meta return meta
def get_messages(self, start=0, page=1, nb=500, unread=False): # threads ???? # TODO ADD last/first message timestamp + return page def get_messages(self, start=0, page=1, nb=500, unread=False, translation_target='en'): # threads ???? # TODO ADD last/first message timestamp + return page
# TODO return message meta # TODO return message meta
tags = {} tags = {}
messages = {} messages = {}
@ -195,7 +195,7 @@ class AbstractChatObject(AbstractSubtypeObject, ABC):
if date_day != curr_date: if date_day != curr_date:
messages[date_day] = [] messages[date_day] = []
curr_date = date_day curr_date = date_day
mess_dict = self.get_message_meta(message[0], timestamp=timestamp) mess_dict = self.get_message_meta(message[0], timestamp=timestamp, translation_target=translation_target)
messages[date_day].append(mess_dict) messages[date_day].append(mess_dict)
if mess_dict.get('tags'): if mess_dict.get('tags'):

View file

@ -262,6 +262,9 @@ default_har = True
default_screenshot = True default_screenshot = True
onion_proxy = onion.foundation onion_proxy = onion.foundation
[Translation]
libretranslate =
[IP] [IP]
# list of comma-separated CIDR that you wish to be alerted for. e.g: # list of comma-separated CIDR that you wish to be alerted for. e.g:
#networks = 192.168.34.0/24,10.0.0.0/8,192.168.33.0/24 #networks = 192.168.34.0/24,10.0.0.0/8,192.168.33.0/24

View file

@ -21,6 +21,7 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
from lib import ail_core from lib import ail_core
from lib import chats_viewer from lib import chats_viewer
from lib import Language
from lib import Tag from lib import Tag
# ============ BLUEPRINT ============ # ============ BLUEPRINT ============
@ -80,12 +81,14 @@ def chats_explorer_instance():
def chats_explorer_chat(): def chats_explorer_chat():
chat_id = request.args.get('id') chat_id = request.args.get('id')
instance_uuid = request.args.get('uuid') instance_uuid = request.args.get('uuid')
chat = chats_viewer.api_get_chat(chat_id, instance_uuid) target = request.args.get('target')
chat = chats_viewer.api_get_chat(chat_id, instance_uuid, translation_target=target)
if chat[1] != 200: if chat[1] != 200:
return create_json_response(chat[0], chat[1]) return create_json_response(chat[0], chat[1])
else: else:
chat = chat[0] chat = chat[0]
return render_template('chat_viewer.html', chat=chat, bootstrap_label=bootstrap_label) languages = Language.get_translation_languages()
return render_template('chat_viewer.html', chat=chat, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target)
@chats_explorer.route("chats/explorer/messages/stats/week", methods=['GET']) @chats_explorer.route("chats/explorer/messages/stats/week", methods=['GET'])
@login_required @login_required
@ -105,12 +108,14 @@ def chats_explorer_messages_stats_week():
def objects_subchannel_messages(): def objects_subchannel_messages():
subchannel_id = request.args.get('id') subchannel_id = request.args.get('id')
instance_uuid = request.args.get('uuid') instance_uuid = request.args.get('uuid')
subchannel = chats_viewer.api_get_subchannel(subchannel_id, instance_uuid) target = request.args.get('target')
subchannel = chats_viewer.api_get_subchannel(subchannel_id, instance_uuid, translation_target=target)
if subchannel[1] != 200: if subchannel[1] != 200:
return create_json_response(subchannel[0], subchannel[1]) return create_json_response(subchannel[0], subchannel[1])
else: else:
subchannel = subchannel[0] subchannel = subchannel[0]
return render_template('SubChannelMessages.html', subchannel=subchannel, bootstrap_label=bootstrap_label) languages = Language.get_translation_languages()
return render_template('SubChannelMessages.html', subchannel=subchannel, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target)
@chats_explorer.route("/chats/explorer/thread", methods=['GET']) @chats_explorer.route("/chats/explorer/thread", methods=['GET'])
@login_required @login_required
@ -118,12 +123,14 @@ def objects_subchannel_messages():
def objects_thread_messages(): def objects_thread_messages():
thread_id = request.args.get('id') thread_id = request.args.get('id')
instance_uuid = request.args.get('uuid') instance_uuid = request.args.get('uuid')
thread = chats_viewer.api_get_thread(thread_id, instance_uuid) target = request.args.get('target')
thread = chats_viewer.api_get_thread(thread_id, instance_uuid, translation_target=target)
if thread[1] != 200: if thread[1] != 200:
return create_json_response(thread[0], thread[1]) return create_json_response(thread[0], thread[1])
else: else:
meta = thread[0] meta = thread[0]
return render_template('ThreadMessages.html', meta=meta, bootstrap_label=bootstrap_label) languages = Language.get_translation_languages()
return render_template('ThreadMessages.html', meta=meta, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target)
@chats_explorer.route("/objects/message", methods=['GET']) @chats_explorer.route("/objects/message", methods=['GET'])
@login_required @login_required
@ -135,5 +142,6 @@ def objects_message():
return create_json_response(message[0], message[1]) return create_json_response(message[0], message[1])
else: else:
message = message[0] message = message[0]
languages = Language.get_translation_languages()
return render_template('ChatMessage.html', meta=message, bootstrap_label=bootstrap_label, return render_template('ChatMessage.html', meta=message, bootstrap_label=bootstrap_label,
modal_add_tags=Tag.get_modal_add_tags(message['id'], object_type='message')) modal_add_tags=Tag.get_modal_add_tags(message['id'], object_type='message'))

View file

@ -184,6 +184,9 @@
<span class="mt-3"> <span class="mt-3">
{% include 'objects/image/block_blur_img_slider.html' %} {% include 'objects/image/block_blur_img_slider.html' %}
</span> </span>
{% with translate_url=url_for('chats_explorer.objects_subchannel_messages', uuid=subchannel['subtype']), obj_id=subchannel['id'] %}
{% include 'chats_explorer/block_translation.html' %}
{% endwith %}
<div class="position-relative"> <div class="position-relative">
<div class="chat-messages p-2"> <div class="chat-messages p-2">

View file

@ -133,6 +133,9 @@
<span class="mt-3"> <span class="mt-3">
{% include 'objects/image/block_blur_img_slider.html' %} {% include 'objects/image/block_blur_img_slider.html' %}
</span> </span>
{% with translate_url=url_for('chats_explorer.objects_thread_messages', uuid=meta['subtype']), obj_id=meta['id'] %}
{% include 'chats_explorer/block_translation.html' %}
{% endwith %}
<div class="position-relative"> <div class="position-relative">
<div class="chat-messages p-2"> <div class="chat-messages p-2">

View file

@ -48,6 +48,10 @@
{% endif %} {% endif %}
</div> </div>
<pre class="my-0">{{ message['reply_to']['content'] }}</pre> <pre class="my-0">{{ message['reply_to']['content'] }}</pre>
{% if message['reply_to']['translation'] %}
<hr class="m-1">
<div class="my-0 text-secondary">{{ message['reply_to']['translation'] }}</div>
{% endif %}
{% for tag in message['reply_to']['tags'] %} {% for tag in message['reply_to']['tags'] %}
<span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }}">{{ tag }}</span> <span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }}">{{ tag }}</span>
{% endfor %} {% endfor %}
@ -71,6 +75,10 @@
{% endfor %} {% endfor %}
{% endif %} {% endif %}
<pre class="my-0">{{ message['content'] }}</pre> <pre class="my-0">{{ message['content'] }}</pre>
{% if message['translation'] %}
<hr class="m-1">
<pre class="my-0 text-secondary">{{ message['translation'] }}</pre>
{% endif %}
{% for reaction in message['reactions'] %} {% for reaction in message['reactions'] %}
<span class="border rounded px-1">{{ reaction }} {{ message['reactions'][reaction] }}</span> <span class="border rounded px-1">{{ reaction }} {{ message['reactions'][reaction] }}</span>
{% endfor %} {% endfor %}

View file

@ -0,0 +1,37 @@
<div class="card border-secondary">
<div class="card-body py-2">
<div class="row">
<div class="col-md-3 text-center px-0">
Translation
</div>
<div class="col-md-6 text-center pl-0 pt-1">
<select id="translation_selector_target" class="form-select" aria-label="Default select example">
{% if not translation_target or translation_target == "Don't Translate" %}
<option selected>Don't Translate</option>
{% else %}
<option selected value="{{ translation_target }}">{{ translation_target }}</option>
{% endif %}
{% for language in translation_languages %}
<option value="{{ language['iso'] }}">{{ language['language'] }}</option>
{% endfor %}
</select>
</div>
<div class="col-md-3 text-center">
<button class="btn btn-sm btn-primary" onclick="translate_selector();">
<i class="fas fa-language"></i>
<span class="label-icon">Translate</span>
</button>
</div>
</div>
</div>
</div>
<script>
function translate_selector(){
var t = document.getElementById("translation_selector_target");
var target = t.value
window.location.replace("{{ translate_url }}&id={{ obj_id }}&target=" + target);
}
</script>

View file

@ -159,6 +159,10 @@
{% include 'objects/image/block_blur_img_slider.html' %} {% include 'objects/image/block_blur_img_slider.html' %}
</span> </span>
{% with translate_url=url_for('chats_explorer.chats_explorer_chat', uuid=chat['subtype']), obj_id=chat['id'] %}
{% include 'chats_explorer/block_translation.html' %}
{% endwith %}
<div class="position-relative"> <div class="position-relative">
<div class="chat-messages p-2"> <div class="chat-messages p-2">