From edf0c4c454b4b736605d477d3b081ac396a503e5 Mon Sep 17 00:00:00 2001 From: terrtia Date: Tue, 16 Jan 2024 14:38:29 +0100 Subject: [PATCH 01/15] chg: [message] UI translate message object --- bin/lib/Language.py | 2 +- bin/lib/chats_viewer.py | 4 ++-- var/www/blueprints/chats_explorer.py | 6 +++++- var/www/templates/chats_explorer/ChatMessage.html | 3 +++ 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/bin/lib/Language.py b/bin/lib/Language.py index 70d5b685..2b9bcc9b 100755 --- a/bin/lib/Language.py +++ b/bin/lib/Language.py @@ -388,7 +388,7 @@ class LanguageTranslator: return language def translate(self, content, source=None, target="en"): # TODO source target - if target not in LIST_LANGUAGES: + if target not in get_translation_languages(): return None translation = None if content: diff --git a/bin/lib/chats_viewer.py b/bin/lib/chats_viewer.py index 5b2e2da6..3c46db5f 100755 --- a/bin/lib/chats_viewer.py +++ b/bin/lib/chats_viewer.py @@ -394,11 +394,11 @@ def api_get_thread(thread_id, thread_instance_uuid, translation_target=None, nb= meta['messages'], meta['pagination'], meta['tags_messages'] = thread.get_messages(translation_target=translation_target, nb=nb, page=page) return meta, 200 -def api_get_message(message_id): +def api_get_message(message_id, translation_target=None): message = Messages.Message(message_id) if not message.exists(): return {"status": "error", "reason": "Unknown uuid"}, 404 - meta = message.get_meta({'chat', 'content', 'icon', 'images', 'link', 'parent', 'parent_meta', 'user-account'}) + meta = message.get_meta({'chat', 'content', 'files-names', 'icon', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, translation_target=translation_target) # if meta['chat']: # print(meta['chat']) # # meta['chat'] = diff --git a/var/www/blueprints/chats_explorer.py b/var/www/blueprints/chats_explorer.py index c24370fb..eb817da3 100644 --- a/var/www/blueprints/chats_explorer.py +++ b/var/www/blueprints/chats_explorer.py @@ -163,13 +163,17 @@ def chats_explorer_chat_participants(): @login_read_only def objects_message(): message_id = request.args.get('id') - message = chats_viewer.api_get_message(message_id) + target = request.args.get('target') + if target == "Don't Translate": + target = None + message = chats_viewer.api_get_message(message_id, translation_target=target) if message[1] != 200: return create_json_response(message[0], message[1]) else: message = message[0] languages = Language.get_translation_languages() return render_template('ChatMessage.html', meta=message, bootstrap_label=bootstrap_label, + translation_languages=languages, translation_target=target, modal_add_tags=Tag.get_modal_add_tags(message['id'], object_type='message')) @chats_explorer.route("/objects/user-account", methods=['GET']) diff --git a/var/www/templates/chats_explorer/ChatMessage.html b/var/www/templates/chats_explorer/ChatMessage.html index 5a129e66..1e314541 100644 --- a/var/www/templates/chats_explorer/ChatMessage.html +++ b/var/www/templates/chats_explorer/ChatMessage.html @@ -135,6 +135,9 @@ {% include 'objects/image/block_blur_img_slider.html' %} + {% with translate_url=url_for('chats_explorer.objects_message', id=meta['id']), obj_id=meta['id'] %} + {% include 'chats_explorer/block_translation.html' %} + {% endwith %}
From 9c6619aed365765388435cfe3da003c32fcc277f Mon Sep 17 00:00:00 2001 From: terrtia Date: Fri, 26 Jan 2024 15:31:32 +0100 Subject: [PATCH 02/15] fix: [crawler] log UNKNOWN timeout --- bin/crawlers/Crawler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/crawlers/Crawler.py b/bin/crawlers/Crawler.py index 0d6cb67c..c1ba0e0c 100755 --- a/bin/crawlers/Crawler.py +++ b/bin/crawlers/Crawler.py @@ -146,6 +146,7 @@ class Crawler(AbstractModule): task = capture.get_task() task.reset() capture.delete() + self.logger.warning(f'capture UNKNOWN Timeout, {task.uuid} Send back in queue') else: capture.update(status) else: From 699453f079a8e54b531ed8ba2e72a1c7377a20bd Mon Sep 17 00:00:00 2001 From: terrtia Date: Fri, 26 Jan 2024 15:42:46 +0100 Subject: [PATCH 03/15] chg: [relationships] add relationship engine + WIP relationships between forwarded messages/chats --- bin/importer/FeederImporter.py | 6 +- bin/importer/feeders/abstract_chats_feeder.py | 54 +- bin/lib/objects/Chats.py | 3 + bin/lib/objects/Messages.py | 33 +- bin/lib/objects/abstract_object.py | 17 + bin/lib/objects/ail_objects.py | 19 + bin/lib/relationships_engine.py | 111 +++ configs/6383.conf | 1 + configs/core.cfg.sample | 5 + var/www/blueprints/correlation.py | 63 +- .../correlation/metadata_card_chat.html | 78 ++ .../correlation/show_correlation.html | 9 + .../correlation/show_relationship.html | 719 ++++++++++++++++++ 13 files changed, 1097 insertions(+), 21 deletions(-) create mode 100755 bin/lib/relationships_engine.py create mode 100644 var/www/templates/correlation/metadata_card_chat.html create mode 100644 var/www/templates/correlation/show_relationship.html diff --git a/bin/importer/FeederImporter.py b/bin/importer/FeederImporter.py index 881cb8a9..34ccca8c 100755 --- a/bin/importer/FeederImporter.py +++ b/bin/importer/FeederImporter.py @@ -100,14 +100,15 @@ class FeederImporter(AbstractImporter): else: objs = set() - objs.add(data_obj) + if data_obj: + objs.add(data_obj) for obj in objs: if obj.type == 'item': # object save on disk as file (Items) gzip64_content = feeder.get_gzip64_content() return obj, f'{feeder_name} {gzip64_content}' else: # Messages save on DB - if obj.exists(): + if obj.exists() and obj.type != 'chat': return obj, f'{feeder_name}' @@ -136,4 +137,5 @@ class FeederModuleImporter(AbstractModule): # Launch Importer if __name__ == '__main__': module = FeederModuleImporter() + # module.debug = True module.run() diff --git a/bin/importer/feeders/abstract_chats_feeder.py b/bin/importer/feeders/abstract_chats_feeder.py index 1dadb970..6b8f1041 100755 --- a/bin/importer/feeders/abstract_chats_feeder.py +++ b/bin/importer/feeders/abstract_chats_feeder.py @@ -93,7 +93,10 @@ class AbstractChatFeeder(DefaultFeeder, ABC): return self.json_data['meta'].get('reactions', []) def get_message_timestamp(self): - return self.json_data['meta']['date']['timestamp'] # TODO CREATE DEFAULT TIMESTAMP + if not self.json_data['meta'].get('date'): + return None + else: + return self.json_data['meta']['date']['timestamp'] # if self.json_data['meta'].get('date'): # date = datetime.datetime.fromtimestamp( self.json_data['meta']['date']['timestamp']) # date = date.strftime('%Y/%m/%d') @@ -115,17 +118,29 @@ class AbstractChatFeeder(DefaultFeeder, ABC): def get_message_reply_id(self): return self.json_data['meta'].get('reply_to', {}).get('message_id') + def get_message_forward(self): + return self.json_data['meta'].get('forward') + def get_message_content(self): decoded = base64.standard_b64decode(self.json_data['data']) return _gunzip_bytes_obj(decoded) - def get_obj(self): # TODO handle others objects -> images, pdf, ... + def get_obj(self): #### TIMESTAMP #### timestamp = self.get_message_timestamp() #### Create Object ID #### chat_id = self.get_chat_id() - message_id = self.get_message_id() + try: + message_id = self.get_message_id() + except KeyError: + if chat_id: + self.obj = Chat(chat_id, self.get_chat_instance_uuid()) + return self.obj + else: + self.obj = None + return None + thread_id = self.get_thread_id() # channel id # thread id @@ -236,7 +251,10 @@ class AbstractChatFeeder(DefaultFeeder, ABC): # # ADD NEW MESSAGE REF (used by discord) def process_sender(self, new_objs, obj, date, timestamp): - meta = self.json_data['meta']['sender'] + meta = self.json_data['meta'].get('sender') + if not meta: + return None + user_account = UsersAccount.UserAccount(meta['id'], self.get_chat_instance_uuid()) # date stat + correlation @@ -286,8 +304,6 @@ class AbstractChatFeeder(DefaultFeeder, ABC): # REPLY reply_id = self.get_message_reply_id() - # TODO Translation - print(self.obj.type) # TODO FILES + FILES REF @@ -295,7 +311,7 @@ class AbstractChatFeeder(DefaultFeeder, ABC): # get object by meta object type if self.obj.type == 'message': # Content - obj = Messages.create(self.obj.id, self.get_message_content()) # TODO translation + obj = Messages.create(self.obj.id, self.get_message_content()) # FILENAME media_name = self.get_media_name() @@ -305,7 +321,8 @@ class AbstractChatFeeder(DefaultFeeder, ABC): for reaction in self.get_reactions(): obj.add_reaction(reaction['reaction'], int(reaction['count'])) - + elif self.obj.type == 'chat': + pass else: chat_id = self.get_chat_id() thread_id = self.get_thread_id() @@ -341,12 +358,29 @@ class AbstractChatFeeder(DefaultFeeder, ABC): # CHAT chat_objs = self.process_chat(new_objs, obj, date, timestamp, reply_id=reply_id) + # Message forward + # if self.get_json_meta().get('forward'): + # forward_from = self.get_message_forward() + # print('-----------------------------------------------------------') + # print(forward_from) + # if forward_from: + # forward_from_type = forward_from['from']['type'] + # if forward_from_type == 'channel' or forward_from_type == 'chat': + # chat_forward_id = forward_from['from']['id'] + # chat_forward = Chat(chat_forward_id, self.get_chat_instance_uuid()) + # if chat_forward.exists(): + # for chat_obj in chat_objs: + # if chat_obj.type == 'chat': + # chat_forward.add_relationship(chat_obj.get_global_id(), 'forward') + # # chat_forward.add_relationship(obj.get_global_id(), 'forward') + # SENDER # TODO HANDLE NULL SENDER user_account = self.process_sender(new_objs, obj, date, timestamp) + if user_account: # UserAccount---ChatObjects - for obj_chat in chat_objs: - user_account.add_correlation(obj_chat.type, obj_chat.get_subtype(r_str=True), obj_chat.id) + for obj_chat in chat_objs: + user_account.add_correlation(obj_chat.type, obj_chat.get_subtype(r_str=True), obj_chat.id) # if chat: # TODO Chat---Username correlation ??? # # Chat---Username => need to handle members and participants diff --git a/bin/lib/objects/Chats.py b/bin/lib/objects/Chats.py index 040c3ea5..e0dffd9d 100755 --- a/bin/lib/objects/Chats.py +++ b/bin/lib/objects/Chats.py @@ -76,6 +76,7 @@ class Chat(AbstractChatObject): meta['tags'] = self.get_tags(r_list=True) if 'icon' in options: meta['icon'] = self.get_icon() + meta['img'] = meta['icon'] if 'info' in options: meta['info'] = self.get_info() if 'participants' in options: @@ -93,6 +94,8 @@ class Chat(AbstractChatObject): if 'threads' in options: meta['threads'] = self.get_threads() print(meta['threads']) + if 'tags_safe' in options: + meta['tags_safe'] = self.is_tags_safe(meta['tags']) return meta def get_misp_object(self): diff --git a/bin/lib/objects/Messages.py b/bin/lib/objects/Messages.py index 2655c2ee..659047be 100755 --- a/bin/lib/objects/Messages.py +++ b/bin/lib/objects/Messages.py @@ -131,7 +131,7 @@ class Message(AbstractObject): if meta: _, user_account_subtype, user_account_id = user_account.split(':', 3) user_account = UsersAccount.UserAccount(user_account_id, user_account_subtype).get_meta(options={'icon', 'username', 'username_meta'}) - return user_account + return user_account def get_files_names(self): names = [] @@ -148,15 +148,32 @@ class Message(AbstractObject): def add_reaction(self, reactions, nb_reaction): r_object.hset(f'meta:reactions:{self.type}::{self.id}', reactions, nb_reaction) - # Update value on import - # reply to -> parent ? - # reply/comment - > children ? + # Interactions between users -> use replies # nb views - # reactions - # nb fowards - # room ??? - # message from channel ??? + # MENTIONS -> Messages + Chats + # # relationship -> mention - Chat -> Chat + # - Message -> Chat + # - Message -> Message ??? fetch mentioned messages + # FORWARDS + # TODO Create forward CHAT -> message + # message (is forwarded) -> message (is forwarded from) ??? + # # TODO get source message timestamp + # + # # is forwarded + # # forwarded from -> check if relationship + # # nb forwarded -> scard relationship + # + # Messages -> CHATS -> NB forwarded + # CHAT -> NB forwarded by chats -> NB messages -> parse full set ???? + # + # + # + # + # + # + # show users chats # message media + # flag is deleted -> event or missing from feeder pass ??? def get_translation(self, content=None, source=None, target='fr'): """ diff --git a/bin/lib/objects/abstract_object.py b/bin/lib/objects/abstract_object.py index d651761f..86eacc44 100755 --- a/bin/lib/objects/abstract_object.py +++ b/bin/lib/objects/abstract_object.py @@ -24,6 +24,7 @@ from lib.ConfigLoader import ConfigLoader from lib import Duplicate from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type, get_obj_inter_correlation from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations +from lib.relationships_engine import get_obj_nb_relationships, add_obj_relationship from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers logging.config.dictConfig(ail_logger.get_config(name='ail')) @@ -284,6 +285,22 @@ class AbstractObject(ABC): ## -Correlation- ## + ## Relationship ## + + def get_nb_relationships(self, filter=[]): + return get_obj_nb_relationships(self.get_global_id()) + + def add_relationship(self, obj2_global_id, relationship, source=True): + # is source + if source: + print(self.get_global_id(), obj2_global_id, relationship) + add_obj_relationship(self.get_global_id(), obj2_global_id, relationship) + # is target + else: + add_obj_relationship(obj2_global_id, self.get_global_id(), relationship) + + ## -Relationship- ## + ## Parent ## def is_parent(self): diff --git a/bin/lib/objects/ail_objects.py b/bin/lib/objects/ail_objects.py index 1b01c2c1..15717c78 100755 --- a/bin/lib/objects/ail_objects.py +++ b/bin/lib/objects/ail_objects.py @@ -10,6 +10,7 @@ sys.path.append(os.environ['AIL_BIN']) from lib.ConfigLoader import ConfigLoader from lib.ail_core import get_all_objects, get_object_all_subtypes from lib import correlations_engine +from lib import relationships_engine from lib import btc_ail from lib import Tag @@ -468,6 +469,24 @@ def get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=[], max_ # --- CORRELATION --- # +def get_obj_nb_relationships(obj_type, subtype, obj_id, filter_types=[]): + obj = get_object(obj_type, subtype, obj_id) + return obj.get_nb_relationships(filter=filter_types) + +def get_relationships_graph_node(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, + objs_hidden=set(), + flask_context=False): + obj_global_id = get_obj_global_id(obj_type, subtype, obj_id) + nodes, links, meta = relationships_engine.get_relationship_graph(obj_global_id, + filter_types=filter_types, + max_nodes=max_nodes, level=level, + objs_hidden=objs_hidden) + # print(meta) + meta['objs'] = list(meta['objs']) + return {"nodes": create_correlation_graph_nodes(nodes, obj_global_id, flask_context=flask_context), + "links": links, + "meta": meta} + # if __name__ == '__main__': # r = get_objects([{'lvl': 1, 'type': 'item', 'subtype': '', 'id': 'crawled/2020/09/14/circl.lu0f4976a4-dda4-4189-ba11-6618c4a8c951'}]) diff --git a/bin/lib/relationships_engine.py b/bin/lib/relationships_engine.py new file mode 100755 index 00000000..6791214a --- /dev/null +++ b/bin/lib/relationships_engine.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.ConfigLoader import ConfigLoader + +config_loader = ConfigLoader() +r_rel = config_loader.get_db_conn("Kvrocks_Relationships") +config_loader = None + + +RELATIONSHIPS = { + "forward", + "mention" +} +def get_relationships(): + return RELATIONSHIPS + + +def get_obj_relationships_by_type(obj_global_id, relationship): + return r_rel.smembers(f'rel:{relationship}:{obj_global_id}') + +def get_obj_nb_relationships_by_type(obj_global_id, relationship): + return r_rel.scard(f'rel:{relationship}:{obj_global_id}') + +def get_obj_relationships(obj_global_id): + relationships = [] + for relationship in get_relationships(): + for rel in get_obj_relationships_by_type(obj_global_id, relationship): + meta = {'relationship': relationship} + direction, obj_id = rel.split(':', 1) + if direction == 'i': + meta['source'] = obj_id + meta['target'] = obj_global_id + else: + meta['target'] = obj_id + meta['source'] = obj_global_id + + if not obj_id.startswith('chat'): + continue + + meta['id'] = obj_id + # meta['direction'] = direction + relationships.append(meta) + return relationships + +def get_obj_nb_relationships(obj_global_id): + nb = {} + for relationship in get_relationships(): + nb[relationship] = get_obj_nb_relationships_by_type(obj_global_id, relationship) + return nb + + +# TODO Filter by obj type ??? +def add_obj_relationship(source, target, relationship): + r_rel.sadd(f'rel:{relationship}:{source}', f'o:{target}') + r_rel.sadd(f'rel:{relationship}:{target}', f'i:{source}') + # r_rel.sadd(f'rels:{source}', relationship) + # r_rel.sadd(f'rels:{target}', relationship) + + +def get_relationship_graph(obj_global_id, filter_types=[], max_nodes=300, level=1, objs_hidden=set()): + links = [] + nodes = set() + meta = {'complete': True, 'objs': set()} + done = set() + done_link = set() + + _get_relationship_graph(obj_global_id, links, nodes, meta, level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, done=done, done_link=done_link) + return nodes, links, meta + +def _get_relationship_graph(obj_global_id, links, nodes, meta, level, max_nodes, filter_types=[], objs_hidden=set(), done=set(), done_link=set()): + meta['objs'].add(obj_global_id) + nodes.add(obj_global_id) + + for rel in get_obj_relationships(obj_global_id): + meta['objs'].add(rel['id']) + + if rel['id'] in done: + continue + + if len(nodes) > max_nodes != 0: + meta['complete'] = False + break + + nodes.add(rel['id']) + + str_link = f"{rel['source']}{rel['target']}{rel['relationship']}" + if str_link not in done_link: + links.append({"source": rel['source'], "target": rel['target'], "relationship": rel['relationship']}) + done_link.add(str_link) + + if level > 0: + next_level = level - 1 + + _get_relationship_graph(rel['id'], links, nodes, meta, next_level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, done=done, done_link=done_link) + + # done.add(rel['id']) + + +if __name__ == '__main__': + source = '' + target = '' + add_obj_relationship(source, target, 'forward') + # print(get_obj_relationships(source)) diff --git a/configs/6383.conf b/configs/6383.conf index a06d4e69..dfc1f205 100644 --- a/configs/6383.conf +++ b/configs/6383.conf @@ -664,6 +664,7 @@ namespace.db ail_datas namespace.dup ail_dups namespace.obj ail_objs namespace.tl ail_tls +namespace.rel ail_rels namespace.stat ail_stats namespace.tag ail_tags namespace.track ail_trackers diff --git a/configs/core.cfg.sample b/configs/core.cfg.sample index 0e0b900f..f93ec08a 100644 --- a/configs/core.cfg.sample +++ b/configs/core.cfg.sample @@ -196,6 +196,11 @@ host = localhost port = 6383 password = ail_objs +[Kvrocks_Relationships] +host = localhost +port = 6383 +password = ail_rels + [Kvrocks_Timeline] host = localhost port = 6383 diff --git a/var/www/blueprints/correlation.py b/var/www/blueprints/correlation.py index 4cf9ce53..2b81d96f 100644 --- a/var/www/blueprints/correlation.py +++ b/var/www/blueprints/correlation.py @@ -203,7 +203,7 @@ def get_description(): return Response(json.dumps({"status": "error", "reason": "404 Not Found"}, indent=2, sort_keys=True), mimetype='application/json'), 404 # object exist else: - res = ail_objects.get_object_meta(obj_type, subtype, obj_id, options={'tags', 'tags_safe'}, + res = ail_objects.get_object_meta(obj_type, subtype, obj_id, options={'icon', 'tags', 'tags_safe'}, flask_context=True) if 'tags' in res: res['tags'] = list(res['tags']) @@ -292,3 +292,64 @@ def correlation_tags_add(): max_nodes=nb_max, hidden=hidden, hidden_str=",".join(hidden), filter=",".join(filter_types))) + +##################################################################################### + +@correlation.route('/relationships/graph_node_json') +@login_required +@login_read_only +def relationships_graph_node_json(): + obj_id = request.args.get('id') + subtype = request.args.get('subtype') + obj_type = request.args.get('type') + max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes')) + level = sanitise_level(request.args.get('level')) + + json_graph = ail_objects.get_relationships_graph_node(obj_type, subtype, obj_id, max_nodes=max_nodes, level=level, flask_context=True) + return jsonify(json_graph) + + +@correlation.route('/relationship/show', methods=['GET', 'POST']) +@login_required +@login_read_only +def show_relationship(): + if request.method == 'POST': + object_type = request.form.get('obj_type') + subtype = request.form.get('subtype') + obj_id = request.form.get('obj_id') + max_nodes = request.form.get('max_nb_nodes_in') + level = sanitise_level(request.form.get('level')) + + # redirect to keep history and bookmark + return redirect(url_for('correlation.show_relationship', type=object_type, subtype=subtype, id=obj_id, + max_nodes=max_nodes, level=level)) + + # request.method == 'GET' + else: + obj_type = request.args.get('type') + subtype = request.args.get('subtype', '') + obj_id = request.args.get('id') + max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes')) + level = sanitise_level(request.args.get('level')) + + # check if obj_id exist + if not ail_objects.exists_obj(obj_type, subtype, obj_id): + return abort(404) + # object exist + else: # TODO remove old dict key + dict_object = {"type": obj_type, + "id": obj_id, + "object_type": obj_type, + "max_nodes": max_nodes, "level": level, + "correlation_id": obj_id, + "metadata": ail_objects.get_object_meta(obj_type, subtype, obj_id, options={'tags', 'info', 'icon', 'username'}, flask_context=True), + "nb_relation": ail_objects.get_obj_nb_relationships(obj_type, subtype, obj_id) + } + if subtype: + dict_object["subtype"] = subtype + dict_object["metadata"]['type_id'] = subtype + else: + dict_object["subtype"] = '' + dict_object["metadata_card"] = ail_objects.get_object_card_meta(obj_type, subtype, obj_id) + return render_template("show_relationship.html", dict_object=dict_object, bootstrap_label=bootstrap_label, + tags_selector_data=Tag.get_tags_selector_data()) diff --git a/var/www/templates/correlation/metadata_card_chat.html b/var/www/templates/correlation/metadata_card_chat.html new file mode 100644 index 00000000..4e672e1a --- /dev/null +++ b/var/www/templates/correlation/metadata_card_chat.html @@ -0,0 +1,78 @@ + + + +{#{% with modal_add_tags=dict_object['metadata_card']['add_tags_modal']%}#} +{# {% include 'modals/add_tags.html' %}#} +{#{% endwith %}#} + +{% include 'modals/edit_tag.html' %} + +
+
+

{{ dict_object["correlation_id"] }}

+ {{ dict_object }} +
{{ dict_object["correlation_id"] }}
+
    +
  • +
    +
    + + + + + + + + + + + + + + + + + +
    Object typeFirst seenLast seenNb seen
    + + + + {{ dict_object["metadata"]["icon"]["icon"] }} + + + {{ dict_object["object_type"] }} + {{ dict_object["metadata"]['first_seen'] }}{{ dict_object["metadata"]['last_seen'] }}{{ dict_object["metadata"]['nb_seen'] }}
    +
    +
    +
    +
    +
    +
  • + +
  • +
    +
    + Tags: + {% for tag in dict_object["metadata"]['tags'] %} + + {% endfor %} + +
    +
  • +
+ + {% with obj_type='cookie-name', obj_id=dict_object['correlation_id'], obj_subtype='' %} + {% include 'modals/investigations_register_obj.html' %} + {% endwith %} + + +
+
+ diff --git a/var/www/templates/correlation/show_correlation.html b/var/www/templates/correlation/show_correlation.html index cda58f1c..4005e13c 100644 --- a/var/www/templates/correlation/show_correlation.html +++ b/var/www/templates/correlation/show_correlation.html @@ -541,6 +541,15 @@ d3.json(url) .on("drag", dragged) .on("end", drag_end)); + /* + node.append("image") + .attr("xlink:href", "https://circl.lu/assets/images/circl-logo.png") + .attr("height", 20) + .attr("width", 20) + .attr("x", -10) + .attr("y", -10); + + */ node.append("circle") .attr("r", function(d) { diff --git a/var/www/templates/correlation/show_relationship.html b/var/www/templates/correlation/show_relationship.html new file mode 100644 index 00000000..bff41724 --- /dev/null +++ b/var/www/templates/correlation/show_relationship.html @@ -0,0 +1,719 @@ + + + + + + + AIL - framework + + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'sidebars/sidebar_objects.html' %} + +
+ + {% if dict_object["object_type"] == "pgp" %} + {% include 'correlation/metadata_card_pgp.html' %} + {% elif dict_object["object_type"] == "cryptocurrency" %} + {% include 'correlation/metadata_card_cryptocurrency.html' %} + {% elif dict_object["object_type"] == "username" %} + {% include 'correlation/metadata_card_username.html' %} + {% elif dict_object["object_type"] == "decoded" %} + {% include 'correlation/metadata_card_decoded.html' %} + {% elif dict_object["object_type"] == "chat" %} + {% include 'correlation/metadata_card_chat.html' %} + {% elif dict_object["object_type"] == "cve" %} + {% include 'correlation/metadata_card_cve.html' %} + {% elif dict_object["object_type"] == "domain" %} + {% include 'correlation/metadata_card_domain.html' %} + {% elif dict_object["object_type"] == "screenshot" %} + {% include 'correlation/metadata_card_screenshot.html' %} + {% elif dict_object["object_type"] == "title" %} + {% include 'correlation/metadata_card_title.html' %} + {% elif dict_object["object_type"] == "cookie-name" %} + {% include 'correlation/metadata_card_cookie_name.html' %} + {% elif dict_object["object_type"] == "etag" %} + {% include 'correlation/metadata_card_etag.html' %} + {% elif dict_object["object_type"] == "hhhash" %} + {% include 'correlation/metadata_card_hhhash.html' %} + {% elif dict_object["object_type"] == "item" %} + {% include 'correlation/metadata_card_item.html' %} + {% endif %} + +
+
+ +
+
+ Graph + +
+
+
+
+ +
+
+ +
+
+ +
+
+
+
+
+{# #} +{# {% if dict_object["object_type"] != "" %}#} +{# {% with obj_type=dict_object["object_type"], obj_id=dict_object["correlation_id"], obj_subtype=dict_object["metadata"]["type_id"],obj_lvl=1%}#} +{# {% include 'import_export/block_add_user_object_to_export.html' %}#} +{# {% endwith %}#} +{# {% endif %}#} +{# #} + + + + + + +
+  Graph Incomplete, Max Nodes Reached. +
+
+
+
+
+ Loading... +
+ Loading... +
+
+
+
+
+ +{#

Press H on an object / node to hide it.

#} +{# {% if dict_object["hidden"] %}#} +{#
Hidden objects:
#} +{# {% for obj_hidden in dict_object["hidden"] %}#} +{# {{ obj_hidden }}
#} +{# {% endfor %}#} +{# {% endif %}#} + +
+ +
+ +
+
+ +
    +
  • Relationship
  • +
    + + + + + +{#
  • #} +{#
    #} +{# #} +{# #} +{#
    #} +{#
    #} +{# #} +{# #} +{#
    #} +{##} +{#
  • #} +
  • + +
    + + +
    + + +
  • +
  • + +
    + + +
    + +
    + +
    + + +
  • +
    +
+ +
    +
  • +
  • +

    Double click on a node to open this object

    + + + + + + Current Object
    +

    +
  • +
+
    +
  • Direct Relationships
  • +
  • + {% for relationship in dict_object['nb_relation'] %} +
    +
    + {{ relationship }} +
    +
    + {{ dict_object['nb_relation'][relationship] }} +
    +
    + {% endfor %} +
  • +
+ +
+
+
+
+ +
+
+ + {% include 'correlation/legend_graph_correlation.html' %} + +
+
+ + + +
+
+

Tags All Objects

+
+
+
+ + + + + + + + {% include 'tags/block_tags_selector.html' %} + +
+
+
+ +
+
+
+ + + + + + + + + + From 74e41017a1afb0157eeadcbcc38f1377c06c9191 Mon Sep 17 00:00:00 2001 From: terrtia Date: Fri, 26 Jan 2024 15:55:19 +0100 Subject: [PATCH 04/15] chg: [v5.3] add v5.3 update --- update/v5.3/Update.py | 24 ++++++++++++++++++++++++ update/v5.3/Update.sh | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100755 update/v5.3/Update.py create mode 100755 update/v5.3/Update.sh diff --git a/update/v5.3/Update.py b/update/v5.3/Update.py new file mode 100755 index 00000000..20eed0af --- /dev/null +++ b/update/v5.3/Update.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys + +sys.path.append(os.environ['AIL_HOME']) +################################## +# Import Project packages +################################## +from update.bin.ail_updater import AIL_Updater +from lib import ail_updates + +class Updater(AIL_Updater): + """default Updater.""" + + def __init__(self, version): + super(Updater, self).__init__(version) + + +if __name__ == '__main__': + updater = Updater('v5.3') + updater.run_update() + diff --git a/update/v5.3/Update.sh b/update/v5.3/Update.sh new file mode 100755 index 00000000..1e040200 --- /dev/null +++ b/update/v5.3/Update.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; + +export PATH=$AIL_HOME:$PATH +export PATH=$AIL_REDIS:$PATH +export PATH=$AIL_BIN:$PATH +export PATH=$AIL_FLASK:$PATH + +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" + +echo -e $GREEN"Shutting down AIL ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +# SUBMODULES # +git submodule update + +echo "" +echo -e $GREEN"Updating python packages ..."$DEFAULT +echo "" +pip install -U gcld3 +pip install -U libretranslatepy +pip install -U xxhash +pip install -U DomainClassifier + +echo "" +echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT +echo "" +python ${AIL_HOME}/update/v5.3/Update.py +wait +echo "" +echo "" + +exit 0 From 61bccecdab60adb7e20c574bd90ac3c10efd4f5a Mon Sep 17 00:00:00 2001 From: terrtia Date: Fri, 26 Jan 2024 16:06:42 +0100 Subject: [PATCH 05/15] chg: [chats] show NB messages by chat --- bin/lib/chats_viewer.py | 2 +- bin/lib/objects/Chats.py | 2 ++ var/www/blueprints/chats_explorer.py | 2 +- var/www/templates/chats_explorer/chat_instance.html | 4 +++- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/bin/lib/chats_viewer.py b/bin/lib/chats_viewer.py index 3c46db5f..69b913f4 100755 --- a/bin/lib/chats_viewer.py +++ b/bin/lib/chats_viewer.py @@ -178,7 +178,7 @@ class ChatServiceInstance: if 'chats' in options: meta['chats'] = [] for chat_id in self.get_chats(): - meta['chats'].append(Chats.Chat(chat_id, self.uuid).get_meta({'created_at', 'icon', 'nb_subchannels'})) + meta['chats'].append(Chats.Chat(chat_id, self.uuid).get_meta({'created_at', 'icon', 'nb_subchannels', 'nb_messages'})) return meta def get_nb_chats(self): diff --git a/bin/lib/objects/Chats.py b/bin/lib/objects/Chats.py index e0dffd9d..746f0dea 100755 --- a/bin/lib/objects/Chats.py +++ b/bin/lib/objects/Chats.py @@ -83,6 +83,8 @@ class Chat(AbstractChatObject): meta['participants'] = self.get_participants() if 'nb_participants' in options: meta['nb_participants'] = self.get_nb_participants() + if 'nb_messages' in options: + meta['nb_messages'] = self.get_nb_messages() if 'username' in options: meta['username'] = self.get_username() if 'subchannels' in options: diff --git a/var/www/blueprints/chats_explorer.py b/var/www/blueprints/chats_explorer.py index eb817da3..38d6c413 100644 --- a/var/www/blueprints/chats_explorer.py +++ b/var/www/blueprints/chats_explorer.py @@ -63,7 +63,7 @@ def chats_explorer_networks(): return render_template('chats_networks.html', protocol=protocol, networks=networks) -@chats_explorer.route("chats/explorer/instance", methods=['GET']) +@chats_explorer.route("chats/explorer/instances", methods=['GET']) @login_required @login_read_only def chats_explorer_instance(): diff --git a/var/www/templates/chats_explorer/chat_instance.html b/var/www/templates/chats_explorer/chat_instance.html index 557ade0a..0f5e25ed 100644 --- a/var/www/templates/chats_explorer/chat_instance.html +++ b/var/www/templates/chats_explorer/chat_instance.html @@ -70,7 +70,8 @@ Created at First Seen Last Seen - NB SubChannels + SubChannels + Messages @@ -94,6 +95,7 @@ {% endif %} {{ chat['nb_subchannels'] }} + {{ chat['nb_messages'] }} {% endfor %} From 6a24c58c8be15ed8b27d0a86f05fe2fd8b41e3a1 Mon Sep 17 00:00:00 2001 From: terrtia Date: Mon, 29 Jan 2024 10:30:53 +0100 Subject: [PATCH 06/15] fix: [heatmap] fix tooltip position --- var/www/static/js/d3/heatmap_week_hour.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/var/www/static/js/d3/heatmap_week_hour.js b/var/www/static/js/d3/heatmap_week_hour.js index f41e8985..4696a052 100644 --- a/var/www/static/js/d3/heatmap_week_hour.js +++ b/var/www/static/js/d3/heatmap_week_hour.js @@ -50,7 +50,12 @@ const create_heatmap_week_hour = (container_id, data, options) => { .style("stroke", "black") //.style("stroke-opacity", 1) + var xPosition = d3.mouse(this)[0] + margin.left; + var yPosition = d3.mouse(this)[1] + margin.top + window.scrollY + 100; + tooltip.html(d.date + " " + d.hour + "-" + (d.hour + 1) + "h: " + d.count + " messages") + .style("left", xPosition + "px") + .style("top", yPosition + "px"); } const mouseleave = function(d) { tooltip.style("opacity", 0) From 6363a4f1cfb18e036c1773c3bde4f8759aa194a9 Mon Sep 17 00:00:00 2001 From: terrtia Date: Mon, 29 Jan 2024 10:52:18 +0100 Subject: [PATCH 07/15] fix: [chat view] fix created_at + filter --- bin/lib/chats_viewer.py | 6 +----- bin/lib/objects/ChatSubChannels.py | 5 +++-- bin/lib/objects/Chats.py | 1 - .../chats_explorer/block_obj_time_search.html | 10 +++++----- 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/bin/lib/chats_viewer.py b/bin/lib/chats_viewer.py index 69b913f4..a8346bac 100755 --- a/bin/lib/chats_viewer.py +++ b/bin/lib/chats_viewer.py @@ -280,7 +280,6 @@ def create_chat_service_instance(protocol, network=None, address=None): ####################################################################################### def get_obj_chat(chat_type, chat_subtype, chat_id): - print(chat_type, chat_subtype, chat_id) if chat_type == 'chat': return Chats.Chat(chat_id, chat_subtype) elif chat_type == 'chat-subchannel': @@ -305,7 +304,7 @@ def get_subchannels_meta_from_global_id(subchannels): for sub in subchannels: _, instance_uuid, sub_id = sub.split(':', 2) subchannel = ChatSubChannels.ChatSubChannel(sub_id, instance_uuid) - meta.append(subchannel.get_meta({'nb_messages'})) + meta.append(subchannel.get_meta({'nb_messages', 'created_at', 'icon'})) return meta def get_chat_meta_from_global_id(chat_global_id): @@ -399,9 +398,6 @@ def api_get_message(message_id, translation_target=None): if not message.exists(): return {"status": "error", "reason": "Unknown uuid"}, 404 meta = message.get_meta({'chat', 'content', 'files-names', 'icon', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, translation_target=translation_target) - # if meta['chat']: - # print(meta['chat']) - # # meta['chat'] = return meta, 200 def api_get_user_account(user_id, instance_uuid): diff --git a/bin/lib/objects/ChatSubChannels.py b/bin/lib/objects/ChatSubChannels.py index 7a799240..ef343baa 100755 --- a/bin/lib/objects/ChatSubChannels.py +++ b/bin/lib/objects/ChatSubChannels.py @@ -82,8 +82,9 @@ class ChatSubChannel(AbstractChatObject): meta['name'] = self.get_name() if 'chat' in options: meta['chat'] = self.get_chat() - if 'img' in options: - meta['img'] = self.get_img() + if 'icon' in options: + meta['icon'] = self.get_icon() + meta['img'] = meta['icon'] if 'nb_messages' in options: meta['nb_messages'] = self.get_nb_messages() if 'created_at' in options: diff --git a/bin/lib/objects/Chats.py b/bin/lib/objects/Chats.py index 746f0dea..c894cc26 100755 --- a/bin/lib/objects/Chats.py +++ b/bin/lib/objects/Chats.py @@ -95,7 +95,6 @@ class Chat(AbstractChatObject): meta['created_at'] = self.get_created_at(date=True) if 'threads' in options: meta['threads'] = self.get_threads() - print(meta['threads']) if 'tags_safe' in options: meta['tags_safe'] = self.is_tags_safe(meta['tags']) return meta diff --git a/var/www/templates/chats_explorer/block_obj_time_search.html b/var/www/templates/chats_explorer/block_obj_time_search.html index 515a8ea7..60401d42 100644 --- a/var/www/templates/chats_explorer/block_obj_time_search.html +++ b/var/www/templates/chats_explorer/block_obj_time_search.html @@ -39,9 +39,9 @@
- - - +{##} +{##} +{##} From 896b411eafbb437039c3319c0c6621d7a7a2fc1a Mon Sep 17 00:00:00 2001 From: terrtia Date: Mon, 29 Jan 2024 14:36:53 +0100 Subject: [PATCH 08/15] chg: [translation] translate chats info, users info and subchannels names --- bin/lib/Language.py | 20 +++++++++++++++++ bin/lib/chats_viewer.py | 16 +++++++------- bin/lib/objects/ChatSubChannels.py | 4 +++- bin/lib/objects/Chats.py | 4 +++- bin/lib/objects/Messages.py | 3 ++- bin/lib/objects/UsersAccount.py | 14 +++++++++++- bin/lib/objects/abstract_object.py | 11 ++++++++++ var/www/blueprints/chats_explorer.py | 9 ++++++-- .../chats_explorer/SubChannelMessages.html | 3 +++ .../chats_explorer/chat_instance.html | 2 +- .../templates/chats_explorer/chat_viewer.html | 22 ++++++++++++++----- .../chats_explorer/user_account.html | 10 +++++++-- 12 files changed, 95 insertions(+), 23 deletions(-) diff --git a/bin/lib/Language.py b/bin/lib/Language.py index 2b9bcc9b..1fee96f7 100755 --- a/bin/lib/Language.py +++ b/bin/lib/Language.py @@ -16,6 +16,7 @@ sys.path.append(os.environ['AIL_BIN']) from lib.ConfigLoader import ConfigLoader config_loader = ConfigLoader() +r_cache = config_loader.get_redis_conn("Redis_Cache") TRANSLATOR_URL = config_loader.get_config_str('Translation', 'libretranslate') config_loader = None @@ -298,6 +299,25 @@ def _clean_text_to_translate(content, html=False, keys_blocks=True): content = content.replace(it, '') return content +#### AIL Objects #### + +def get_obj_translation(obj_global_id, content, field='', source=None, target='en'): + """ + Returns translated content + """ + translation = r_cache.get(f'translation:{target}:{obj_global_id}:{field}') + if translation: + # DEBUG + # print('cache') + # r_cache.expire(f'translation:{target}:{obj_global_id}:{field}', 0) + return translation + translation = LanguageTranslator().translate(content, source=source, target=target) + if translation: + r_cache.set(f'translation:{target}:{obj_global_id}:{field}', translation) + r_cache.expire(f'translation:{target}:{obj_global_id}:{field}', 300) + return translation + +## --AIL Objects-- ## class LanguagesDetector: diff --git a/bin/lib/chats_viewer.py b/bin/lib/chats_viewer.py index a8346bac..797a9ed8 100755 --- a/bin/lib/chats_viewer.py +++ b/bin/lib/chats_viewer.py @@ -299,12 +299,12 @@ def get_obj_chat_meta(obj_chat, new_options=set()): options.add(option) return obj_chat.get_meta(options=options) -def get_subchannels_meta_from_global_id(subchannels): +def get_subchannels_meta_from_global_id(subchannels, translation_target=None): meta = [] for sub in subchannels: _, instance_uuid, sub_id = sub.split(':', 2) subchannel = ChatSubChannels.ChatSubChannel(sub_id, instance_uuid) - meta.append(subchannel.get_meta({'nb_messages', 'created_at', 'icon'})) + meta.append(subchannel.get_meta({'nb_messages', 'created_at', 'icon', 'translation'}, translation_target=translation_target)) return meta def get_chat_meta_from_global_id(chat_global_id): @@ -335,13 +335,13 @@ def api_get_chat(chat_id, chat_instance_uuid, translation_target=None, nb=-1, pa chat = Chats.Chat(chat_id, chat_instance_uuid) if not chat.exists(): return {"status": "error", "reason": "Unknown chat"}, 404 - meta = chat.get_meta({'created_at', 'icon', 'info', 'nb_participants', 'subchannels', 'threads', 'username'}) + meta = chat.get_meta({'created_at', 'icon', 'info', 'nb_participants', 'subchannels', 'threads', 'translation', 'username'}, translation_target=translation_target) if meta['username']: meta['username'] = get_username_meta_from_global_id(meta['username']) if meta['subchannels']: - meta['subchannels'] = get_subchannels_meta_from_global_id(meta['subchannels']) + meta['subchannels'] = get_subchannels_meta_from_global_id(meta['subchannels'], translation_target=translation_target) else: - if translation_target not in Language.LIST_LANGUAGES: + if translation_target not in Language.get_translation_languages(): translation_target = None meta['messages'], meta['pagination'], meta['tags_messages'] = chat.get_messages(translation_target=translation_target, nb=nb, page=page) return meta, 200 @@ -373,7 +373,7 @@ def api_get_subchannel(chat_id, chat_instance_uuid, translation_target=None, nb= subchannel = ChatSubChannels.ChatSubChannel(chat_id, chat_instance_uuid) if not subchannel.exists(): return {"status": "error", "reason": "Unknown subchannel"}, 404 - meta = subchannel.get_meta({'chat', 'created_at', 'icon', 'nb_messages', 'nb_participants', 'threads'}) + meta = subchannel.get_meta({'chat', 'created_at', 'icon', 'nb_messages', 'nb_participants', 'threads', 'translation'}, translation_target=translation_target) if meta['chat']: meta['chat'] = get_chat_meta_from_global_id(meta['chat']) if meta.get('threads'): @@ -400,11 +400,11 @@ def api_get_message(message_id, translation_target=None): meta = message.get_meta({'chat', 'content', 'files-names', 'icon', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, translation_target=translation_target) return meta, 200 -def api_get_user_account(user_id, instance_uuid): +def api_get_user_account(user_id, instance_uuid, translation_target=None): user_account = UsersAccount.UserAccount(user_id, instance_uuid) if not user_account.exists(): return {"status": "error", "reason": "Unknown user-account"}, 404 - meta = user_account.get_meta({'chats', 'icon', 'info', 'subchannels', 'threads', 'username', 'username_meta'}) + meta = user_account.get_meta({'chats', 'icon', 'info', 'subchannels', 'threads', 'translation', 'username', 'username_meta'}, translation_target=translation_target) return meta, 200 # # # # # # # # # # LATER diff --git a/bin/lib/objects/ChatSubChannels.py b/bin/lib/objects/ChatSubChannels.py index ef343baa..331c29ea 100755 --- a/bin/lib/objects/ChatSubChannels.py +++ b/bin/lib/objects/ChatSubChannels.py @@ -76,7 +76,7 @@ class ChatSubChannel(AbstractChatObject): # TODO TIME LAST MESSAGES - def get_meta(self, options=set()): + def get_meta(self, options=set(), translation_target=None): meta = self._get_meta(options=options) meta['tags'] = self.get_tags(r_list=True) meta['name'] = self.get_name() @@ -95,6 +95,8 @@ class ChatSubChannel(AbstractChatObject): meta['participants'] = self.get_participants() if 'nb_participants' in options: meta['nb_participants'] = self.get_nb_participants() + if 'translation' in options and translation_target: + meta['translation_name'] = self.translate(meta['name'], field='name', target=translation_target) return meta def get_misp_object(self): diff --git a/bin/lib/objects/Chats.py b/bin/lib/objects/Chats.py index c894cc26..dde776b0 100755 --- a/bin/lib/objects/Chats.py +++ b/bin/lib/objects/Chats.py @@ -70,7 +70,7 @@ class Chat(AbstractChatObject): icon = '\uf086' return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius': 5} - def get_meta(self, options=set()): + def get_meta(self, options=set(), translation_target=None): meta = self._get_meta(options=options) meta['name'] = self.get_name() meta['tags'] = self.get_tags(r_list=True) @@ -79,6 +79,8 @@ class Chat(AbstractChatObject): meta['img'] = meta['icon'] if 'info' in options: meta['info'] = self.get_info() + if 'translation' in options and translation_target: + meta['translation_info'] = self.translate(meta['info'], field='info', target=translation_target) if 'participants' in options: meta['participants'] = self.get_participants() if 'nb_participants' in options: diff --git a/bin/lib/objects/Messages.py b/bin/lib/objects/Messages.py index 659047be..a88eb6da 100755 --- a/bin/lib/objects/Messages.py +++ b/bin/lib/objects/Messages.py @@ -179,6 +179,7 @@ class Message(AbstractObject): """ Returns translated content """ + # return self._get_field('translated') global_id = self.get_global_id() translation = r_cache.get(f'translation:{target}:{global_id}') @@ -289,7 +290,7 @@ class Message(AbstractObject): if 'reactions' in options: meta['reactions'] = self.get_reactions() if 'translation' in options and translation_target: - meta['translation'] = self.get_translation(content=meta.get('content'), target=translation_target) + meta['translation'] = self.translate(content=meta.get('content'), target=translation_target) # meta['encoding'] = None return meta diff --git a/bin/lib/objects/UsersAccount.py b/bin/lib/objects/UsersAccount.py index 2148697a..27dbf30c 100755 --- a/bin/lib/objects/UsersAccount.py +++ b/bin/lib/objects/UsersAccount.py @@ -5,6 +5,7 @@ import os import sys # import re +# from datetime import datetime from flask import url_for from pymisp import MISPObject @@ -88,6 +89,13 @@ class UserAccount(AbstractSubtypeObject): def set_info(self, info): return self._set_field('info', info) + # def get_created_at(self, date=False): + # created_at = self._get_field('created_at') + # if date and created_at: + # created_at = datetime.fromtimestamp(float(created_at)) + # created_at = created_at.isoformat(' ') + # return created_at + # TODO MESSAGES: # 1) ALL MESSAGES + NB # 2) ALL MESSAGES TIMESTAMP @@ -122,7 +130,7 @@ class UserAccount(AbstractSubtypeObject): def get_messages_by_chat_obj(self, chat_obj): return self.get_correlation_iter_obj(chat_obj, 'message') - def get_meta(self, options=set()): # TODO Username timeline + def get_meta(self, options=set(), translation_target=None): # TODO Username timeline meta = self._get_meta(options=options) meta['id'] = self.id meta['subtype'] = self.subtype @@ -141,6 +149,10 @@ class UserAccount(AbstractSubtypeObject): meta['icon'] = self.get_icon() if 'info' in options: meta['info'] = self.get_info() + if 'translation' in options and translation_target: + meta['translation_info'] = self.translate(meta['info'], field='info', target=translation_target) + # if 'created_at': + # meta['created_at'] = self.get_created_at(date=True) if 'chats' in options: meta['chats'] = self.get_chats() if 'subchannels' in options: diff --git a/bin/lib/objects/abstract_object.py b/bin/lib/objects/abstract_object.py index 86eacc44..64697b1e 100755 --- a/bin/lib/objects/abstract_object.py +++ b/bin/lib/objects/abstract_object.py @@ -25,6 +25,7 @@ from lib import Duplicate from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type, get_obj_inter_correlation from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations from lib.relationships_engine import get_obj_nb_relationships, add_obj_relationship +from lib.Language import get_obj_translation from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers logging.config.dictConfig(ail_logger.get_config(name='ail')) @@ -301,6 +302,16 @@ class AbstractObject(ABC): ## -Relationship- ## + ## Translation ## + + def translate(self, content=None, field='', source=None, target='en'): + global_id = self.get_global_id() + if not content: + content = self.get_content() + return get_obj_translation(global_id, content, field=field, source=source, target=target) + + ## -Translation- ## + ## Parent ## def is_parent(self): diff --git a/var/www/blueprints/chats_explorer.py b/var/www/blueprints/chats_explorer.py index 38d6c413..ef385b44 100644 --- a/var/www/blueprints/chats_explorer.py +++ b/var/www/blueprints/chats_explorer.py @@ -182,9 +182,14 @@ def objects_message(): def objects_user_account(): instance_uuid = request.args.get('subtype') user_id = request.args.get('id') - user_account = chats_viewer.api_get_user_account(user_id, instance_uuid) + target = request.args.get('target') + if target == "Don't Translate": + target = None + user_account = chats_viewer.api_get_user_account(user_id, instance_uuid, translation_target=target) if user_account[1] != 200: return create_json_response(user_account[0], user_account[1]) else: user_account = user_account[0] - return render_template('user_account.html', meta=user_account, bootstrap_label=bootstrap_label) \ No newline at end of file + languages = Language.get_translation_languages() + return render_template('user_account.html', meta=user_account, bootstrap_label=bootstrap_label, + translation_languages=languages, translation_target=target) diff --git a/var/www/templates/chats_explorer/SubChannelMessages.html b/var/www/templates/chats_explorer/SubChannelMessages.html index 858a1159..5eb6153b 100644 --- a/var/www/templates/chats_explorer/SubChannelMessages.html +++ b/var/www/templates/chats_explorer/SubChannelMessages.html @@ -75,6 +75,9 @@ {{ subchannel['name'] }} + {% if subchannel['translation_name'] %} +
{{ subchannel['translation_name'] }}
+ {% endif %} {{ subchannel["created_at"] }} diff --git a/var/www/templates/chats_explorer/chat_instance.html b/var/www/templates/chats_explorer/chat_instance.html index 0f5e25ed..70524e40 100644 --- a/var/www/templates/chats_explorer/chat_instance.html +++ b/var/www/templates/chats_explorer/chat_instance.html @@ -71,7 +71,7 @@ First Seen Last Seen SubChannels - Messages + diff --git a/var/www/templates/chats_explorer/chat_viewer.html b/var/www/templates/chats_explorer/chat_viewer.html index 64f14295..0506d444 100644 --- a/var/www/templates/chats_explorer/chat_viewer.html +++ b/var/www/templates/chats_explorer/chat_viewer.html @@ -100,6 +100,10 @@ {% if chat['info'] %}
  • {{ chat['info'] }}
    + {% if chat['translation_info'] %} +
    +
    {{ chat['translation_info'] }}
    + {% endif %}
  • {% endif %} @@ -112,8 +116,12 @@ {{ tag }} {{ chat['tags_messages'][tag] }} {% endfor %} + {% with translate_url=url_for('chats_explorer.chats_explorer_chat', uuid=chat['subtype']), obj_id=chat['id'], pagination=chat['pagination'] %} + {% include 'chats_explorer/block_translation.html' %} + {% endwith %} + {% if chat['subchannels'] %} -

    Sub-Channels:

    +

    Sub-Channels:

    @@ -123,7 +131,7 @@ - + @@ -132,7 +140,12 @@ - + - @@ -56,7 +55,6 @@ -
    Created at First Seen Last SeenNB Messages
    {{ meta['id'] }} {{ meta['name'] }} + {{ meta['name'] }} + {% if meta['translation_name'] %} +
    {{ meta['translation_name'] }}
    + {% endif %} +
    {{ meta['id'] }} {{ meta['created_at'] }} @@ -161,9 +174,6 @@ {% include 'objects/image/block_blur_img_slider.html' %} - {% with translate_url=url_for('chats_explorer.chats_explorer_chat', uuid=chat['subtype']), obj_id=chat['id'], pagination=chat['pagination'] %} - {% include 'chats_explorer/block_translation.html' %} - {% endwith %} {% with obj_subtype=chat['subtype'], obj_id=chat['id'], url_endpoint=url_for("chats_explorer.chats_explorer_chat"), nb=chat['pagination']['nb'] %} {% set date_from=chat['first_seen'] %} {% set date_to=chat['last_seen'] %} diff --git a/var/www/templates/chats_explorer/user_account.html b/var/www/templates/chats_explorer/user_account.html index ee8d4e88..7d869733 100644 --- a/var/www/templates/chats_explorer/user_account.html +++ b/var/www/templates/chats_explorer/user_account.html @@ -46,7 +46,6 @@
    username IDCreated at First Seen Last Seen NB Chats
    {{ meta['username']['id'] }} {{ meta['id'] }}{{ meta['created_at'] }} {% if meta['first_seen'] %} {{ meta['first_seen'][0:4] }}-{{ meta['first_seen'][4:6] }}-{{ meta['first_seen'][6:8] }} @@ -74,6 +72,10 @@ {% if meta['info'] %}
  • {{ meta['info'] }}
    + {% if meta['translation_info'] %} +
    +
    {{ meta['translation_info'] }}
    + {% endif %}
  • {% endif %} @@ -100,6 +102,10 @@ + {% with translate_url=url_for('chats_explorer.objects_user_account', subtype=meta['subtype']), obj_id=meta['id'] %} + {% include 'chats_explorer/block_translation.html' %} + {% endwith %} + {# {% if meta['subchannels'] %}#} {#

    Sub-Channels:

    #} From a10119fb6a5d4179b666c678ed476e517b01cf24 Mon Sep 17 00:00:00 2001 From: terrtia Date: Mon, 29 Jan 2024 16:41:59 +0100 Subject: [PATCH 09/15] chg: [kvrocks] j -4 install + update to latest version --- bin/LAUNCH.sh | 5 +- configs/6383.conf | 430 ++++++++++++++++++++++++++++++++---------- installing_deps.sh | 2 +- update/v5.3/Update.sh | 13 +- 4 files changed, 345 insertions(+), 105 deletions(-) diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 68c20ac7..862a49a0 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -602,7 +602,7 @@ function launch_all { function menu_display { - options=("Redis" "Ardb" "Kvrocks" "Logs" "Scripts" "Flask" "Killall" "Update" "Update-config" "Update-thirdparty") + options=("Redis" "Kvrocks" "Logs" "Scripts" "Flask" "Killall" "Update" "Update-config" "Update-thirdparty") menu() { echo "What do you want to Launch?:" @@ -630,9 +630,6 @@ function menu_display { Redis) launch_redis; ;; - Ardb) - launch_ardb; - ;; Kvrocks) launch_kvrocks; ;; diff --git a/configs/6383.conf b/configs/6383.conf index dfc1f205..0b889dbe 100644 --- a/configs/6383.conf +++ b/configs/6383.conf @@ -1,14 +1,14 @@ ################################ GENERAL ##################################### -# By default kvrocks listens for connections from all the network interfaces -# available on the server. It is possible to listen to just one or multiple -# interfaces using the "bind" configuration directive, followed by one or -# more IP addresses. +# By default kvrocks listens for connections from localhost interface. +# It is possible to listen to just one or multiple interfaces using +# the "bind" configuration directive, followed by one or more IP addresses. # # Examples: # # bind 192.168.1.100 10.0.0.1 # bind 127.0.0.1 ::1 +# bind 0.0.0.0 bind 127.0.0.1 # Unix socket. @@ -26,32 +26,52 @@ port 6383 # Close the connection after a client is idle for N seconds (0 to disable) timeout 0 -# The number of worker's threads, increase or decrease it would effect the performance. +# The number of worker's threads, increase or decrease would affect the performance. workers 8 -# By default kvrocks does not run as a daemon. Use 'yes' if you need it. -# Note that kvrocks will write a pid file in /var/run/kvrocks.pid when daemonized. +# By default, kvrocks does not run as a daemon. Use 'yes' if you need it. +# Note that kvrocks will write a PID file in /var/run/kvrocks.pid when daemonized daemonize no -# Kvrocks implements cluster solution that is similar with redis cluster solution. +# Kvrocks implements the cluster solution that is similar to the Redis cluster solution. # You can get cluster information by CLUSTER NODES|SLOTS|INFO command, it also is -# adapted to redis-cli, redis-benchmark, redis cluster SDK and redis cluster proxy. -# But kvrocks doesn't support to communicate with each others, so you must set +# adapted to redis-cli, redis-benchmark, Redis cluster SDK, and Redis cluster proxy. +# But kvrocks doesn't support communicating with each other, so you must set # cluster topology by CLUSTER SETNODES|SETNODEID commands, more details: #219. # # PLEASE NOTE: # If you enable cluster, kvrocks will encode key with its slot id calculated by -# CRC16 and modulo 16384, endoding key with its slot id makes it efficient to -# migrate keys based on slot. So if you enabled at first time, cluster mode must +# CRC16 and modulo 16384, encoding key with its slot id makes it efficient to +# migrate keys based on the slot. So if you enabled at first time, cluster mode must # not be disabled after restarting, and vice versa. That is to say, data is not # compatible between standalone mode with cluster mode, you must migrate data # if you want to change mode, otherwise, kvrocks will make data corrupt. # # Default: no + cluster-enabled no +# By default, namespaces are stored in the configuration file and won't be replicated +# to replicas. This option allows to change this behavior, so that namespaces are also +# propagated to slaves. Note that: +# 1) it won't replicate the 'masterauth' to prevent breaking master/replica replication +# 2) it will overwrite replica's namespace with master's namespace, so be careful of in-using namespaces +# 3) cannot switch off the namespace replication once it's enabled +# +# Default: no +repl-namespace-enabled no + +# Persist the cluster nodes topology in local file($dir/nodes.conf). This configuration +# takes effect only if the cluster mode was enabled. +# +# If yes, it will try to load the cluster topology from the local file when starting, +# and dump the cluster nodes into the file if it was changed. +# +# Default: yes +persist-cluster-nodes-enabled yes + # Set the max number of connected clients at the same time. By default -# this limit is set to 10000 clients, however if the server is not +# this limit is set to 10000 clients. However, if the server is not # able to configure the process file limit to allow for the specified limit # the max number of allowed clients is set to the current file limit # @@ -71,18 +91,17 @@ maxclients 10000 # 150k passwords per second against a good box. This means that you should # use a very strong password otherwise it will be very easy to break. # -# requirepass foobared requirepass ail # If the master is password protected (using the "masterauth" configuration # directive below) it is possible to tell the slave to authenticate before -# starting the replication synchronization process, otherwise the master will +# starting the replication synchronization process. Otherwise, the master will # refuse the slave request. # # masterauth foobared # Master-Salve replication would check db name is matched. if not, the slave should -# refuse to sync the db from master. Don't use default value, set the db-name to identify +# refuse to sync the db from master. Don't use the default value, set the db-name to identify # the cluster. db-name change.me.db @@ -98,7 +117,22 @@ dir DATA_KVROCKS # # log-dir stdout -# When running daemonized, kvrocks writes a pid file in ${CONFIG_DIR}/kvrocks.pid by +# Log level +# Possible values: info, warning, error, fatal +# Default: info +log-level info + +# You can configure log-retention-days to control whether to enable the log cleaner +# and the maximum retention days that the INFO level logs will be kept. +# +# if set to -1, that means to disable the log cleaner. +# if set to 0, all previous INFO level logs will be immediately removed. +# if set to between 0 to INT_MAX, that means it will retent latest N(log-retention-days) day logs. + +# By default the log-retention-days is -1. +log-retention-days -1 + +# When running in daemonize mode, kvrocks writes a PID file in ${CONFIG_DIR}/kvrocks.pid by # default. You can specify a custom pid file location here. # pidfile /var/run/kvrocks.pid pidfile DATA_KVROCKS/kvrocks.pid @@ -146,7 +180,7 @@ tcp-backlog 511 master-use-repl-port no # Currently, master only checks sequence number when replica asks for PSYNC, -# that is not enough since they may have different replication history even +# that is not enough since they may have different replication histories even # the replica asking sequence is in the range of the master current WAL. # # We design 'Replication Sequence ID' PSYNC, we add unique replication id for @@ -180,11 +214,11 @@ use-rsid-psync no # is still in progress, the slave can act in two different ways: # # 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will -# still reply to client requests, possibly with out of date data, or the +# still reply to client requests, possibly with out-of-date data, or the # data set may just be empty if this is the first synchronization. # # 2) if slave-serve-stale-data is set to 'no' the slave will reply with -# an error "SYNC with master in progress" to all the kind of commands +# an error "SYNC with master in progress" to all kinds of commands # but to INFO and SLAVEOF. # slave-serve-stale-data yes @@ -203,6 +237,35 @@ slave-serve-stale-data yes # Default: no slave-empty-db-before-fullsync no +# A Kvrocks master is able to list the address and port of the attached +# replicas in different ways. For example the "INFO replication" section +# offers this information, which is used, among other tools, by +# Redis Sentinel in order to discover replica instances. +# Another place where this info is available is in the output of the +# "ROLE" command of a master. +# +# The listed IP address and port normally reported by a replica is +# obtained in the following way: +# +# IP: The address is auto detected by checking the peer address +# of the socket used by the replica to connect with the master. +# +# Port: The port is communicated by the replica during the replication +# handshake, and is normally the port that the replica is using to +# listen for connections. +# +# However when port forwarding or Network Address Translation (NAT) is +# used, the replica may actually be reachable via different IP and port +# pairs. The following two options can be used by a replica in order to +# report to its master a specific set of IP and port, so that both INFO +# and ROLE will report those values. +# +# There is no need to use both the options if you need to override just +# the port or the IP address. +# +# replica-announce-ip 5.5.5.5 +# replica-announce-port 1234 + # If replicas need full synchronization with master, master need to create # checkpoint for feeding replicas, and replicas also stage a checkpoint of # the master. If we also keep the backup, it maybe occupy extra disk space. @@ -212,7 +275,7 @@ slave-empty-db-before-fullsync no # Default: no purge-backup-on-fullsync no -# The maximum allowed rate (in MB/s) that should be used by Replication. +# The maximum allowed rate (in MB/s) that should be used by replication. # If the rate exceeds max-replication-mb, replication will slow down. # Default: 0 (i.e. no limit) max-replication-mb 0 @@ -220,8 +283,8 @@ max-replication-mb 0 # The maximum allowed aggregated write rate of flush and compaction (in MB/s). # If the rate exceeds max-io-mb, io will slow down. # 0 is no limit -# Default: 500 -max-io-mb 500 +# Default: 0 +max-io-mb 0 # The maximum allowed space (in GB) that should be used by RocksDB. # If the total size of the SST files exceeds max_allowed_space, writes to RocksDB will fail. @@ -231,7 +294,7 @@ max-db-size 0 # The maximum backup to keep, server cron would run every minutes to check the num of current # backup, and purge the old backup if exceed the max backup num to keep. If max-backup-to-keep -# is 0, no backup would be keep. But now, we only support 0 or 1. +# is 0, no backup would be kept. But now, we only support 0 or 1. max-backup-to-keep 1 # The maximum hours to keep the backup. If max-backup-keep-hours is 0, wouldn't purge any backup. @@ -243,6 +306,115 @@ max-backup-keep-hours 24 # Default: 16 max-bitmap-to-string-mb 16 +# Whether to enable SCAN-like cursor compatible with Redis. +# If enabled, the cursor will be unsigned 64-bit integers. +# If disabled, the cursor will be a string. +# Default: no +redis-cursor-compatible yes + +# Whether to enable the RESP3 protocol. +# NOTICE: RESP3 is still under development, don't enable it in production environment. +# +# Default: no +# resp3-enabled no + +# Maximum nesting depth allowed when parsing and serializing +# JSON documents while using JSON commands like JSON.SET. +# Default: 1024 +json-max-nesting-depth 1024 + +# The underlying storage format of JSON data type +# NOTE: This option only affects newly written/updated key-values +# The CBOR format may reduce the storage size and speed up JSON commands +# Available values: json, cbor +# Default: json +json-storage-format json + +################################## TLS ################################### + +# By default, TLS/SSL is disabled, i.e. `tls-port` is set to 0. +# To enable it, `tls-port` can be used to define TLS-listening ports. +# tls-port 0 + +# Configure a X.509 certificate and private key to use for authenticating the +# server to connected clients, masters or cluster peers. +# These files should be PEM formatted. +# +# tls-cert-file kvrocks.crt +# tls-key-file kvrocks.key + +# If the key file is encrypted using a passphrase, it can be included here +# as well. +# +# tls-key-file-pass secret + +# Configure a CA certificate(s) bundle or directory to authenticate TLS/SSL +# clients and peers. Kvrocks requires an explicit configuration of at least one +# of these, and will not implicitly use the system wide configuration. +# +# tls-ca-cert-file ca.crt +# tls-ca-cert-dir /etc/ssl/certs + +# By default, clients on a TLS port are required +# to authenticate using valid client side certificates. +# +# If "no" is specified, client certificates are not required and not accepted. +# If "optional" is specified, client certificates are accepted and must be +# valid if provided, but are not required. +# +# tls-auth-clients no +# tls-auth-clients optional + +# By default, only TLSv1.2 and TLSv1.3 are enabled and it is highly recommended +# that older formally deprecated versions are kept disabled to reduce the attack surface. +# You can explicitly specify TLS versions to support. +# Allowed values are case insensitive and include "TLSv1", "TLSv1.1", "TLSv1.2", +# "TLSv1.3" (OpenSSL >= 1.1.1) or any combination. +# To enable only TLSv1.2 and TLSv1.3, use: +# +# tls-protocols "TLSv1.2 TLSv1.3" + +# Configure allowed ciphers. See the ciphers(1ssl) manpage for more information +# about the syntax of this string. +# +# Note: this configuration applies only to <= TLSv1.2. +# +# tls-ciphers DEFAULT:!MEDIUM + +# Configure allowed TLSv1.3 ciphersuites. See the ciphers(1ssl) manpage for more +# information about the syntax of this string, and specifically for TLSv1.3 +# ciphersuites. +# +# tls-ciphersuites TLS_CHACHA20_POLY1305_SHA256 + +# When choosing a cipher, use the server's preference instead of the client +# preference. By default, the server follows the client's preference. +# +# tls-prefer-server-ciphers yes + +# By default, TLS session caching is enabled to allow faster and less expensive +# reconnections by clients that support it. Use the following directive to disable +# caching. +# +# tls-session-caching no + +# Change the default number of TLS sessions cached. A zero value sets the cache +# to unlimited size. The default size is 20480. +# +# tls-session-cache-size 5000 + +# Change the default timeout of cached TLS sessions. The default timeout is 300 +# seconds. +# +# tls-session-cache-timeout 60 + +# By default, a replica does not attempt to establish a TLS connection +# with its master. +# +# Use the following directive to enable TLS on replication links. +# +# tls-replication yes + ################################## SLOW LOG ################################### # The Kvrocks Slow Log is a mechanism to log queries that exceeded a specified @@ -301,8 +473,8 @@ supervised no # Default: empty # profiling-sample-commands "" -# Ratio of the samples would be recorded. We simply use the rand to determine -# whether to record the sample or not. +# Ratio of the samples would be recorded. It is a number between 0 and 100. +# We simply use the rand to determine whether to record the sample or not. # # Default: 0 profiling-sample-ratio 0 @@ -331,15 +503,27 @@ profiling-sample-record-threshold-ms 100 # 0-7am every day. compaction-checker-range 0-7 -# Bgsave scheduler, auto bgsave at schedule time +# When the compaction checker is triggered, the db will periodically pick the SST file +# with the highest "deleted percentage" (i.e. the percentage of deleted keys in the SST +# file) to compact, in order to free disk space. +# However, if a specific SST file was created more than "force-compact-file-age" seconds +# ago, and its percentage of deleted keys is higher than +# "force-compact-file-min-deleted-percentage", it will be forcely compacted as well. + +# Default: 172800 seconds; Range: [60, INT64_MAX]; +# force-compact-file-age 172800 +# Default: 10 %; Range: [1, 100]; +# force-compact-file-min-deleted-percentage 10 + +# Bgsave scheduler, auto bgsave at scheduled time # time expression format is the same as crontab(currently only support * and int) # e.g. bgsave-cron 0 3 * * * 0 4 * * * -# would bgsave the db at 3am and 4am everyday +# would bgsave the db at 3am and 4am every day # Command renaming. # # It is possible to change the name of dangerous commands in a shared -# environment. For instance the KEYS command may be renamed into something +# environment. For instance, the KEYS command may be renamed into something # hard to guess so that it will still be available for internal-use tools # but not available for general clients. # @@ -352,39 +536,26 @@ compaction-checker-range 0-7 # # rename-command KEYS "" -# The key-value size may so be quite different in many scenes, and use 256MiB as SST file size -# may cause data loading(large index/filter block) ineffective when the key-value was too small. -# kvrocks supports user-defined SST file in config(rocksdb.target_file_size_base), -# but it still too trivial and inconvenient to adjust the different sizes for different instances. -# so we want to periodic auto-adjust the SST size in-flight with user avg key-value size. -# -# If enabled, kvrocks will auto resize rocksdb.target_file_size_base -# and rocksdb.write_buffer_size in-flight with user avg key-value size. -# Please see #118. -# -# Default: yes -auto-resize-block-and-sst yes - ################################ MIGRATE ##################################### # If the network bandwidth is completely consumed by the migration task, # it will affect the availability of kvrocks. To avoid this situation, -# migrate-speed is adpoted to limit the migrating speed. -# Migrating speed is limited by controling the duraiton between sending data, -# the duation is calculated by: 1000000 * migrate-pipeline-size / migrate-speed (us). +# migrate-speed is adopted to limit the migrating speed. +# Migrating speed is limited by controlling the duration between sending data, +# the duration is calculated by: 1000000 * migrate-pipeline-size / migrate-speed (us). # Value: [0,INT_MAX], 0 means no limit # # Default: 4096 migrate-speed 4096 -# In order to reduce data transimission times and improve the efficiency of data migration, +# In order to reduce data transmission times and improve the efficiency of data migration, # pipeline is adopted to send multiple data at once. Pipeline size can be set by this option. # Value: [1, INT_MAX], it can't be 0 # # Default: 16 migrate-pipeline-size 16 -# In order to reduce the write forbidden time during migrating slot, we will migrate the incremetal -# data sevral times to reduce the amount of incremetal data. Until the quantity of incremetal +# In order to reduce the write forbidden time during migrating slot, we will migrate the incremental +# data several times to reduce the amount of incremental data. Until the quantity of incremental # data is reduced to a certain threshold, slot will be forbidden write. The threshold is set by # this option. # Value: [1, INT_MAX], it can't be 0 @@ -394,22 +565,21 @@ migrate-sequence-gap 10000 ################################ ROCKSDB ##################################### -# Specify the capacity of metadata column family block cache. Larger block cache -# may make request faster while more keys would be cached. Max Size is 200*1024. -# Default: 2048MB -rocksdb.metadata_block_cache_size 2048 +# Specify the capacity of column family block cache. A larger block cache +# may make requests faster while more keys would be cached. Max Size is 400*1024. +# Default: 4096MB +rocksdb.block_cache_size 4096 -# Specify the capacity of subkey column family block cache. Larger block cache -# may make request faster while more keys would be cached. Max Size is 200*1024. -# Default: 2048MB -rocksdb.subkey_block_cache_size 2048 - -# Metadata column family and subkey column family will share a single block cache -# if set 'yes'. The capacity of shared block cache is -# metadata_block_cache_size + subkey_block_cache_size +# Specify the type of cache used in the block cache. +# Accept value: "lru", "hcc" +# "lru" stands for the cache with the LRU(Least Recently Used) replacement policy. # -# Default: yes -rocksdb.share_metadata_and_subkey_block_cache yes +# "hcc" stands for the Hyper Clock Cache, a lock-free cache alternative +# that offers much improved CPU efficiency vs. LRU cache under high parallel +# load or high contention. +# +# default lru +rocksdb.block_cache_type lru # A global cache for table-level rows in RocksDB. If almost always point # lookups, enlarging row cache may improve read performance. Otherwise, @@ -423,7 +593,7 @@ rocksdb.row_cache_size 0 # files opened are always kept open. You can estimate number of files based # on target_file_size_base and target_file_size_multiplier for level-based # compaction. For universal-style compaction, you can usually set it to -1. -# Default: 4096 +# Default: 8096 rocksdb.max_open_files 8096 # Amount of data to build up in memory (backed by an unsorted log @@ -442,7 +612,7 @@ rocksdb.max_open_files 8096 # default is 64MB rocksdb.write_buffer_size 64 -# Target file size for compaction, target file size for Leve N can be caculated +# Target file size for compaction, target file size for Level N can be calculated # by target_file_size_base * (target_file_size_multiplier ^ (L-1)) # # Default: 128MB @@ -457,20 +627,29 @@ rocksdb.target_file_size_base 128 # allowed. rocksdb.max_write_buffer_number 4 +# Maximum number of concurrent background jobs (compactions and flushes). +# For backwards compatibility we will set `max_background_jobs = +# max_background_compactions + max_background_flushes` in the case where user +# sets at least one of `max_background_compactions` or `max_background_flushes` +# (we replace -1 by 1 in case one option is unset). +rocksdb.max_background_jobs 4 + +# DEPRECATED: it is automatically decided based on the value of rocksdb.max_background_jobs # Maximum number of concurrent background compaction jobs, submitted to # the default LOW priority thread pool. -rocksdb.max_background_compactions 4 +rocksdb.max_background_compactions -1 +# DEPRECATED: it is automatically decided based on the value of rocksdb.max_background_jobs # Maximum number of concurrent background memtable flush jobs, submitted by # default to the HIGH priority thread pool. If the HIGH priority thread pool # is configured to have zero threads, flush jobs will share the LOW priority # thread pool with compaction jobs. -rocksdb.max_background_flushes 4 +rocksdb.max_background_flushes -1 # This value represents the maximum number of threads that will # concurrently perform a compaction job by breaking it into multiple, # smaller ones that are run simultaneously. -# Default: 2 (i.e. no subcompactions) +# Default: 2 rocksdb.max_sub_compactions 2 # In order to limit the size of WALs, RocksDB uses DBOptions::max_total_wal_size @@ -494,8 +673,8 @@ rocksdb.max_sub_compactions 2 # default is 512MB rocksdb.max_total_wal_size 512 -# We impl the repliction with rocksdb WAL, it would trigger full sync when the seq was out of range. -# wal_ttl_seconds and wal_size_limit_mb would affect how archived logswill be deleted. +# We implement the replication with rocksdb WAL, it would trigger full sync when the seq was out of range. +# wal_ttl_seconds and wal_size_limit_mb would affect how archived logs will be deleted. # If WAL_ttl_seconds is not 0, then WAL files will be checked every WAL_ttl_seconds / 2 and those that # are older than WAL_ttl_seconds will be deleted# # @@ -505,26 +684,26 @@ rocksdb.wal_ttl_seconds 10800 # If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, # WAL files will be checked every 10 min and if total size is greater # then WAL_size_limit_MB, they will be deleted starting with the -# earliest until size_limit is met. All empty files will be deleted +# earliest until size_limit is met. All empty files will be deleted # Default: 16GB rocksdb.wal_size_limit_mb 16384 # Approximate size of user data packed per block. Note that the -# block size specified here corresponds to uncompressed data. The +# block size specified here corresponds to uncompressed data. The # actual size of the unit read from disk may be smaller if # compression is enabled. # -# Default: 4KB +# Default: 16KB rocksdb.block_size 16384 # Indicating if we'd put index/filter blocks to the block cache # -# Default: no +# Default: yes rocksdb.cache_index_and_filter_blocks yes # Specify the compression to use. Only compress level greater # than 2 to improve performance. -# Accept value: "no", "snappy" +# Accept value: "no", "snappy", "lz4", "zstd", "zlib" # default snappy rocksdb.compression snappy @@ -579,7 +758,7 @@ rocksdb.stats_dump_period_sec 0 # Default: no rocksdb.disable_auto_compactions no -# BlobDB(key-value separation) is essentially RocksDB for large-value use cases. +# BlobDB(key-value separation) is essentially RocksDB for large-value use cases. # Since 6.18.0, The new implementation is integrated into the RocksDB core. # When set, large values (blobs) are written to separate blob files, and only # pointers to them are stored in SST files. This can reduce write amplification @@ -608,7 +787,7 @@ rocksdb.blob_file_size 268435456 # Enables garbage collection of blobs. Valid blobs residing in blob files # older than a cutoff get relocated to new files as they are encountered # during compaction, which makes it possible to clean up blob files once -# they contain nothing but obsolete/garbage blobs. +# they contain nothing but obsolete/garbage blobs. # See also rocksdb.blob_garbage_collection_age_cutoff below. # # Default: yes @@ -623,16 +802,16 @@ rocksdb.enable_blob_garbage_collection yes rocksdb.blob_garbage_collection_age_cutoff 25 -# The purpose of following three options are to dynamically adjust the upper limit of -# the data that each layer can store according to the size of the different +# The purpose of the following three options are to dynamically adjust the upper limit of +# the data that each layer can store according to the size of the different # layers of the LSM. Enabling this option will bring some improvements in -# deletion efficiency and space amplification, but it will lose a certain +# deletion efficiency and space amplification, but it will lose a certain # amount of read performance. -# If you want know more details about Levels' Target Size, you can read RocksDB wiki: +# If you want to know more details about Levels' Target Size, you can read RocksDB wiki: # https://github.com/facebook/rocksdb/wiki/Leveled-Compaction#levels-target-size # -# Default: no -rocksdb.level_compaction_dynamic_level_bytes no +# Default: yes +rocksdb.level_compaction_dynamic_level_bytes yes # The total file size of level-1 sst. # @@ -641,39 +820,92 @@ rocksdb.max_bytes_for_level_base 268435456 # Multiplication factor for the total file size of L(n+1) layers. # This option is a double type number in RocksDB, but kvrocks is -# not support double data type number yet, so we use int data +# not support the double data type number yet, so we use integer # number instead of double currently. # # Default: 10 rocksdb.max_bytes_for_level_multiplier 10 +# This feature only takes effect in Iterators and MultiGet. +# If yes, RocksDB will try to read asynchronously and in parallel as much as possible to hide IO latency. +# In iterators, it will prefetch data asynchronously in the background for each file being iterated on. +# In MultiGet, it will read the necessary data blocks from those files in parallel as much as possible. + +# Default no +rocksdb.read_options.async_io no + +# If yes, the write will be flushed from the operating system +# buffer cache before the write is considered complete. +# If this flag is enabled, writes will be slower. +# If this flag is disabled, and the machine crashes, some recent +# rites may be lost. Note that if it is just the process that +# crashes (i.e., the machine does not reboot), no writes will be +# lost even if sync==false. +# +# Default: no +rocksdb.write_options.sync no + +# If yes, writes will not first go to the write ahead log, +# and the write may get lost after a crash. +# You must keep wal enabled if you use replication. +# +# Default: no +rocksdb.write_options.disable_wal no + +# If enabled and we need to wait or sleep for the write request, fails +# immediately. +# +# Default: no +rocksdb.write_options.no_slowdown no + +# If enabled, write requests are of lower priority if compaction is +# behind. In this case, no_slowdown = true, the request will be canceled +# immediately. Otherwise, it will be slowed down. +# The slowdown value is determined by RocksDB to guarantee +# it introduces minimum impacts to high priority writes. +# +# Default: no +rocksdb.write_options.low_pri no + +# If enabled, this writebatch will maintain the last insert positions of each +# memtable as hints in concurrent write. It can improve write performance +# in concurrent writes if keys in one writebatch are sequential. +# +# Default: no +rocksdb.write_options.memtable_insert_hint_per_batch no + + +# Support RocksDB auto-tune rate limiter for the background IO +# if enabled, Rate limiter will limit the compaction write if flush write is high +# Please see https://rocksdb.org/blog/2017/12/18/17-auto-tuned-rate-limiter.html +# +# Default: yes +rocksdb.rate_limiter_auto_tuned yes + +# Enable this option will schedule the deletion of obsolete files in a background thread +# on iterator destruction. It can reduce the latency if there are many files to be removed. +# see https://github.com/facebook/rocksdb/wiki/IO#avoid-blocking-io +# +# Default: yes +# rocksdb.avoid_unnecessary_blocking_io yes + ################################ NAMESPACE ##################################### # namespace.test change.me + +-# investigation -> db ???? +-# ail2ail -> a2a ???? + + backup-dir DATA_KVROCKS/backup -fullsync-recv-file-delay 0 log-dir DATA_KVROCKS -unixsocketperm 26 - - - - namespace.cor ail_correls namespace.crawl ail_crawlers namespace.db ail_datas namespace.dup ail_dups namespace.obj ail_objs -namespace.tl ail_tls namespace.rel ail_rels namespace.stat ail_stats namespace.tag ail_tags +namespace.tl ail_tls namespace.track ail_trackers - -# investigation -> db ???? -# ail2ail -> a2a ????? - - - - - - diff --git a/installing_deps.sh b/installing_deps.sh index e6f907a1..c681249b 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -88,7 +88,7 @@ DEFAULT_HOME=$(pwd) #### KVROCKS #### test ! -d kvrocks/ && git clone https://github.com/apache/incubator-kvrocks.git kvrocks pushd kvrocks -./x.py build +./x.py build -j 4 popd DEFAULT_KVROCKS_DATA=$DEFAULT_HOME/DATA_KVROCKS diff --git a/update/v5.3/Update.sh b/update/v5.3/Update.sh index 1e040200..534cd295 100755 --- a/update/v5.3/Update.sh +++ b/update/v5.3/Update.sh @@ -14,7 +14,7 @@ GREEN="\\033[1;32m" DEFAULT="\\033[0;39m" echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks +bash ${AIL_BIN}/LAUNCH.sh -k wait # SUBMODULES # @@ -28,6 +28,17 @@ pip install -U libretranslatepy pip install -U xxhash pip install -U DomainClassifier +echo "" +echo -e $GREEN"Updating KVROCKS ..."$DEFAULT +echo "" +pushd ${AIL_HOME}/kvrocks +git pull +./x.py build -j 4 +popd + +bash ${AIL_BIN}/LAUNCH.sh -lrv +bash ${AIL_BIN}/LAUNCH.sh -lkv + echo "" echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT echo "" From 2db8587d03642e16cfa8d468e9b35f4835c0c58b Mon Sep 17 00:00:00 2001 From: terrtia Date: Tue, 30 Jan 2024 10:28:50 +0100 Subject: [PATCH 10/15] chg: [Hosts] improve perf + regex timeout + cache DNS results --- bin/modules/DomClassifier.py | 30 ++++++++++++++++++------------ bin/modules/Hosts.py | 36 +++++++++++++++++++----------------- 2 files changed, 37 insertions(+), 29 deletions(-) diff --git a/bin/modules/DomClassifier.py b/bin/modules/DomClassifier.py index 94cf53db..b4620ee2 100755 --- a/bin/modules/DomClassifier.py +++ b/bin/modules/DomClassifier.py @@ -41,7 +41,13 @@ class DomClassifier(AbstractModule): addr_dns = config_loader.get_config_str("DomClassifier", "dns") - self.c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns]) + redis_host = config_loader.get_config_str('Redis_Cache', 'host') + redis_port = config_loader.get_config_int('Redis_Cache', 'port') + redis_db = config_loader.get_config_int('Redis_Cache', 'db') + self.dom_classifier = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns], + redis_host=redis_host, + redis_port=redis_port, redis_db=redis_db, + re_timeout=30) self.cc = config_loader.get_config_str("DomClassifier", "cc") self.cc_tld = config_loader.get_config_str("DomClassifier", "cc_tld") @@ -58,34 +64,34 @@ class DomClassifier(AbstractModule): item_source = item.get_source() try: - self.c.text(rawtext=host) - if not self.c.domain: + self.dom_classifier.text(rawtext=host) + if not self.dom_classifier.domain: return - print(self.c.domain) - self.c.validdomain(passive_dns=True, extended=False) - # self.logger.debug(self.c.vdomain) + print(self.dom_classifier.domain) + self.dom_classifier.validdomain(passive_dns=True, extended=False) + # self.logger.debug(self.dom_classifier.vdomain) - print(self.c.vdomain) + print(self.dom_classifier.vdomain) print() - if self.c.vdomain and d4.is_passive_dns_enabled(): - for dns_record in self.c.vdomain: + if self.dom_classifier.vdomain and d4.is_passive_dns_enabled(): + for dns_record in self.dom_classifier.vdomain: self.add_message_to_queue(obj=None, message=dns_record) if self.cc_tld: - localizeddomains = self.c.include(expression=self.cc_tld) + localizeddomains = self.dom_classifier.include(expression=self.cc_tld) if localizeddomains: print(localizeddomains) self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{item.get_id()}") if self.cc: - localizeddomains = self.c.localizedomain(cc=self.cc) + localizeddomains = self.dom_classifier.localizedomain(cc=self.cc) if localizeddomains: print(localizeddomains) self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{item.get_id()}") if r_result: - return self.c.vdomain + return self.dom_classifier.vdomain except IOError as err: self.redis_logger.error(f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed") diff --git a/bin/modules/Hosts.py b/bin/modules/Hosts.py index 488e7acf..55670777 100755 --- a/bin/modules/Hosts.py +++ b/bin/modules/Hosts.py @@ -18,13 +18,14 @@ import os import re import sys +import DomainClassifier.domainclassifier + sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## from modules.abstract_module import AbstractModule from lib.ConfigLoader import ConfigLoader -from lib.objects.Items import Item class Hosts(AbstractModule): """ @@ -43,28 +44,29 @@ class Hosts(AbstractModule): # Waiting time in seconds between to message processed self.pending_seconds = 1 - self.host_regex = r'\b([a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)\b' - re.compile(self.host_regex) - + redis_host = config_loader.get_config_str('Redis_Cache', 'host') + redis_port = config_loader.get_config_int('Redis_Cache', 'port') + redis_db = config_loader.get_config_int('Redis_Cache', 'db') + self.dom_classifier = DomainClassifier.domainclassifier.Extract(rawtext="", + redis_host=redis_host, + redis_port=redis_port, + redis_db=redis_db, + re_timeout=30) self.logger.info(f"Module: {self.module_name} Launched") def compute(self, message): - item = self.get_obj() + obj = self.get_obj() - # mimetype = item_basic.get_item_mimetype(item.get_id()) - # if mimetype.split('/')[0] == "text": - - content = item.get_content() - hosts = self.regex_findall(self.host_regex, item.get_id(), content, r_set=True) - if hosts: - print(f'{len(hosts)} host {item.get_id()}') - for host in hosts: - # print(host) - if not host.endswith('.onion'): - self.add_message_to_queue(message=str(host), queue='Host') + content = obj.get_content() + self.dom_classifier.text(content) + if self.dom_classifier.domain: + print(f'{len(self.dom_classifier.domain)} host {obj.get_id()}') + # print(self.dom_classifier.domain) + for domain in self.dom_classifier.domain: + if domain: + self.add_message_to_queue(message=domain, queue='Host') if __name__ == '__main__': - module = Hosts() module.run() From fbd7e2236afbf1e097c8256f97b921189b9186fc Mon Sep 17 00:00:00 2001 From: terrtia Date: Tue, 30 Jan 2024 11:24:12 +0100 Subject: [PATCH 11/15] fix: [crawlers] fix errored capture start time --- bin/lib/crawlers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index f98af175..ed418432 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -1331,6 +1331,8 @@ class CrawlerCapture: start_time = self.get_task().get_start_time() if r_str: return start_time + elif not start_time: + return 0 else: start_time = datetime.strptime(start_time, "%Y/%m/%d - %H:%M.%S").timestamp() return int(start_time) From d1608e89e13a4a1df8e83f2c87d941de3147a5f3 Mon Sep 17 00:00:00 2001 From: terrtia Date: Tue, 30 Jan 2024 11:29:42 +0100 Subject: [PATCH 12/15] fix: [crawlers] fix errored capture queue --- bin/crawlers/Crawler.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bin/crawlers/Crawler.py b/bin/crawlers/Crawler.py index c1ba0e0c..b1a7d7cd 100755 --- a/bin/crawlers/Crawler.py +++ b/bin/crawlers/Crawler.py @@ -142,6 +142,12 @@ class Crawler(AbstractModule): return capture elif status == crawlers.CaptureStatus.UNKNOWN: capture_start = capture.get_start_time(r_str=False) + if capture_start == 0: + task = capture.get_task() + task.delete() + capture.delete() + self.logger.warning(f'capture UNKNOWN ERROR STATE, {task.uuid} Removed from queue') + return None if int(time.time()) - capture_start > 600: # TODO ADD in new crawler config task = capture.get_task() task.reset() From 194ae960fc6849786cb040f215f68115676d7e8a Mon Sep 17 00:00:00 2001 From: terrtia Date: Tue, 30 Jan 2024 11:35:43 +0100 Subject: [PATCH 13/15] fix: [crawlers] fix capture return error code --- bin/crawlers/Crawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/crawlers/Crawler.py b/bin/crawlers/Crawler.py index b1a7d7cd..1f46e938 100755 --- a/bin/crawlers/Crawler.py +++ b/bin/crawlers/Crawler.py @@ -161,7 +161,7 @@ class Crawler(AbstractModule): except ConnectionError: print(capture.uuid) - capture.update(self, -1) + capture.update(-1) self.refresh_lacus_status() time.sleep(self.pending_seconds) From 5fab2326e60ed81dc18e0cfd62fb40a3cec84770 Mon Sep 17 00:00:00 2001 From: terrtia Date: Tue, 30 Jan 2024 11:45:43 +0100 Subject: [PATCH 14/15] fix: [misp export] fix empty event on module start --- bin/exporter/MISPExporter.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/bin/exporter/MISPExporter.py b/bin/exporter/MISPExporter.py index 4a0d3a1c..1fd32045 100755 --- a/bin/exporter/MISPExporter.py +++ b/bin/exporter/MISPExporter.py @@ -319,11 +319,7 @@ class MISPExporterAutoDaily(MISPExporter): def __init__(self, url='', key='', ssl=False): super().__init__(url=url, key=key, ssl=ssl) - # create event if don't exists - try: - self.event_id = self.get_daily_event_id() - except MISPConnectionError: - self.event_id = - 1 + self.event_id = - 1 self.date = datetime.date.today() def export(self, obj, tag): @@ -345,6 +341,7 @@ class MISPExporterAutoDaily(MISPExporter): self.add_event_object(self.event_id, obj) except MISPConnectionError: + self.event_id = - 1 return -1 From e4f21f05cc250b62dd45834f3a894a7e18e01490 Mon Sep 17 00:00:00 2001 From: terrtia Date: Tue, 30 Jan 2024 14:31:09 +0100 Subject: [PATCH 15/15] fix: [D4] fix module cache --- bin/core/D4_client.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/bin/core/D4_client.py b/bin/core/D4_client.py index 9c452912..8bd79ded 100755 --- a/bin/core/D4_client.py +++ b/bin/core/D4_client.py @@ -34,16 +34,20 @@ class D4Client(AbstractModule): self.d4_client = d4.create_d4_client() self.last_refresh = time.time() + self.last_config_check = time.time() # Send module state to logs self.logger.info(f'Module {self.module_name} initialized') def compute(self, dns_record): # Refresh D4 Client - if self.last_refresh < d4.get_config_last_update_time(): - self.d4_client = d4.create_d4_client() - self.last_refresh = time.time() - print('D4 Client: config updated') + if self.last_config_check < int(time.time()) - 30: + print('refresh rrrr') + if self.last_refresh < d4.get_config_last_update_time(): + self.d4_client = d4.create_d4_client() + self.last_refresh = time.time() + print('D4 Client: config updated') + self.last_config_check = time.time() if self.d4_client: # Send DNS Record to D4Server