chg: [message] show trackers + modules matches

2024-11-26 07:47:17 +00:00 · 2024-03-27 13:42:15 +01:00 · 2024-03-27 13:42:15 +01:00 · 5ec0d7f0cf
commit 5ec0d7f0cf
parent a3a664b7f1
10 changed files with 121 additions and 50 deletions
--- a/bin/lib/module_extractor.py
+++ b/bin/lib/module_extractor.py
@ -40,6 +40,11 @@ r_key = regex_helper.generate_redis_cache_key('extractor')

 # TODO UI Link

+CORRELATION_TO_EXTRACT = {
+    'item': ['cve', 'cryptocurrency', 'title', 'username'],
+    'message': ['cve', 'cryptocurrency', 'username']
+}
+
 MODULES = {
    'infoleak:automatic-detection="credit-card"': CreditCards(queue=False),
    'infoleak:automatic-detection="iban"': Iban(queue=False),
@ -57,9 +62,9 @@ tools = Tools(queue=False)
 for tool_name in tools.get_tools():
    MODULES[f'infoleak:automatic-detection="{tool_name}-tool"'] = tools

-def get_correl_match(extract_type, obj_id, content):
+def get_correl_match(extract_type, obj, content):
    extracted = []
-    correl = correlations_engine.get_correlation_by_correl_type('item', '', obj_id, extract_type)
+    correl = correlations_engine.get_correlation_by_correl_type(obj.type, obj.get_subtype(r_str=True), obj.id, extract_type)
    to_extract = []
    map_subtype = {}
    map_value_id = {}
@ -75,18 +80,18 @@ def get_correl_match(extract_type, obj_id, content):
            sha256_val = sha256(value.encode()).hexdigest()
        map_value_id[sha256_val] = value
    if to_extract:
-        objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj_id, content)
-        for obj in objs:
-            if map_subtype.get(obj[2]):
-                subtype = map_subtype[obj[2]]
+        objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj.get_global_id(), content)
+        for ob in objs:
+            if map_subtype.get(ob[2]):
+                subtype = map_subtype[ob[2]]
            else:
                subtype = ''
-                sha256_val = sha256(obj[2].encode()).hexdigest()
+                sha256_val = sha256(ob[2].encode()).hexdigest()
            value_id = map_value_id.get(sha256_val)
            if not value_id:
                logger.critical(f'Error module extractor: {sha256_val}\n{extract_type}\n{subtype}\n{value_id}\n{map_value_id}\n{objs}')
                value_id = 'ERROR'
-            extracted.append([obj[0], obj[1], obj[2], f'{extract_type}:{subtype}:{value_id}'])
+            extracted.append([ob[0], ob[1], ob[2], f'{extract_type}:{subtype}:{value_id}'])
    return extracted

 def _get_yara_match(data):
@ -100,7 +105,7 @@ def _get_yara_match(data):
    return yara.CALLBACK_CONTINUE

 def _get_word_regex(word):
-    return '(?:^|(?<=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))' + word + '(?:$|(?=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))'
+    return '(?i)(?:^|(?<=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))' + word + '(?:$|(?=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))'

 def convert_byte_offset_to_string(b_content, offset):
    byte_chunk = b_content[:offset + 1]
@ -115,17 +120,18 @@ def convert_byte_offset_to_string(b_content, offset):

 # TODO RETRO HUNTS
 # TODO TRACKER TYPE IN UI
-def get_tracker_match(obj_id, content):
+def get_tracker_match(obj, content):
    extracted = []
    extracted_yara = []
-    trackers = Tracker.get_obj_trackers('item', '', obj_id)
+    obj_gid = obj.get_global_id()
+    trackers = Tracker.get_obj_trackers(obj.type, obj.get_subtype(r_str=True), obj.id)
    for tracker_uuid in trackers:
        tracker = Tracker.Tracker(tracker_uuid)
        tracker_type = tracker.get_type()
        # print(tracker_type)
        tracked = tracker.get_tracked()
        if tracker_type == 'regex':  # TODO Improve word detection -> word delimiter
-            regex_match = regex_helper.regex_finditer(r_key, tracked, obj_id, content)
+            regex_match = regex_helper.regex_finditer(r_key, tracked, obj_gid, content)
            for match in regex_match:
                extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])
        elif tracker_type == 'yara':
@ -147,13 +153,13 @@ def get_tracker_match(obj_id, content):
                words = [tracked]
            for word in words:
                regex = _get_word_regex(word)
-                regex_match = regex_helper.regex_finditer(r_key, regex, obj_id, content)
+                regex_match = regex_helper.regex_finditer(r_key, regex, obj_gid, content)
                # print(regex_match)
                for match in regex_match:
                    extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])

    # Retro Hunt
-    retro_hunts = Tracker.get_obj_retro_hunts('item', '', obj_id)
+    retro_hunts = Tracker.get_obj_retro_hunts(obj.type, obj.get_subtype(r_str=True), obj.id)
    for retro_uuid in retro_hunts:
        retro_hunt = Tracker.RetroHunt(retro_uuid)
        rule = retro_hunt.get_rule(r_compile=True)
@ -182,35 +188,36 @@ def get_tracker_match(obj_id, content):
 # Type:subtype:id
 # tag:iban
 # tracker:uuid
-
-def extract(obj_id, content=None):
-    item = Item(obj_id)
-    if not item.exists():
+# def extract(obj_id, content=None):
+def extract(obj_type, subtype, obj_id, content=None):
+    obj = ail_objects.get_object(obj_type, subtype, obj_id)
+    if not obj.exists():
        return []
+    obj_gid = obj.get_global_id()

    # CHECK CACHE
-    cached = r_cache.get(f'extractor:cache:{obj_id}')
+    cached = r_cache.get(f'extractor:cache:{obj_gid}')
    # cached = None
    if cached:
-        r_cache.expire(f'extractor:cache:{obj_id}', 300)
+        r_cache.expire(f'extractor:cache:{obj_gid}', 300)
        return json.loads(cached)

    if not content:
-        content = item.get_content()
+        content = obj.get_content()

-    extracted = get_tracker_match(obj_id, content)
+    extracted = get_tracker_match(obj, content)

    # print(item.get_tags())
-    for tag in item.get_tags():
+    for tag in obj.get_tags():
        if MODULES.get(tag):
            # print(tag)
            module = MODULES.get(tag)
-            matches = module.extract(obj_id, content, tag)
+            matches = module.extract(obj, content, tag)
            if matches:
                extracted = extracted + matches

-    for obj_t in ['cve', 'cryptocurrency', 'title', 'username']:  # Decoded, PGP->extract bloc
-        matches = get_correl_match(obj_t, obj_id, content)
+    for obj_t in CORRELATION_TO_EXTRACT[obj.type]:
+        matches = get_correl_match(obj_t, obj, content)
        if matches:
            extracted = extracted + matches

@ -221,8 +228,8 @@ def extract(obj_id, content=None):
    # Save In Cache
    if extracted:
        extracted_dump = json.dumps(extracted)
-        r_cache.set(f'extractor:cache:{obj_id}', extracted_dump)
-        r_cache.expire(f'extractor:cache:{obj_id}', 300)  # TODO Reduce CACHE ???????????????
+        r_cache.set(f'extractor:cache:{obj_gid}', extracted_dump)
+        r_cache.expire(f'extractor:cache:{obj_gid}', 300)  # TODO Reduce CACHE ???????????????

    return extracted

@ -271,15 +278,7 @@ def get_extracted_by_match(extracted):

 # if __name__ == '__main__':
 #     t0 = time.time()
-#     obj_id = 'crawled/2022/09/15/circl.lu179c7903-5b21-452e-9f25-4b61d9934e2b'
-#     obj_id = 'crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd'
-#     obj_id = 'submitted/2022/09/13/submitted_ba3ee771-c91c-4f50-9d6a-8558cdac7aeb.gz'
-#     # obj_id = 'tests/2021/01/01/credit_cards.gz'
-#     # obj_id = 'crawled/2020/07/20/circl.luc9301321-f1b1-4d91-9082-5eb452b946c5'
-#     obj_id = 'submitted/2019/09/22/97172282-e4c2-4a1e-b82c-c4fb9490a56e.gz'
-#     obj_id = 'submitted/2019/09/20/4fb7f02d-1241-4ef4-b17e-80ae76038835.gz'
 #     obj_id = 'crawled/2023/02/21/circl.lu1c300acb-0cbe-480f-917e-9afe3ec958e8'
-#
 #     extract(obj_id)
 #
 #     # get_obj_correl('cve', obj_id, content)
--- a/bin/modules/CreditCards.py
+++ b/bin/modules/CreditCards.py
@ -58,9 +58,9 @@ class CreditCards(AbstractModule):
        if lib_refine.is_luhn_valid(clean_card):
            return clean_card

-    def extract(self, obj_id, content, tag):
+    def extract(self, obj, content, tag):
        extracted = []
-        cards = self.regex_finditer(self.regex, obj_id, content)
+        cards = self.regex_finditer(self.regex, obj.get_global_id(), content)
        for card in cards:
            start, end, value = card
            if self.get_valid_card(value):
--- a/bin/modules/Iban.py
+++ b/bin/modules/Iban.py
@ -62,9 +62,9 @@ class Iban(AbstractModule):
            return True
        return False

-    def extract(self, obj_id, content, tag):
+    def extract(self, obj, content, tag):
        extracted = []
-        ibans = self.regex_finditer(self.iban_regex, obj_id, content)
+        ibans = self.regex_finditer(self.iban_regex, obj.get_global_id(), content)
        for iban in ibans:
            start, end, value = iban
            value = ''.join(e for e in value if e.isalnum())
--- a/bin/modules/Mail.py
+++ b/bin/modules/Mail.py
@ -118,10 +118,10 @@ class Mail(AbstractModule):
                    print(e)
        return valid_mxdomain

-    def extract(self, obj_id, content, tag):
+    def extract(self, obj, content, tag):
        extracted = []
        mxdomains = {}
-        mails = self.regex_finditer(self.email_regex, obj_id, content)
+        mails = self.regex_finditer(self.email_regex, obj.get_global_id(), content)
        for mail in mails:
            start, end, value = mail
            mxdomain = value.rsplit('@', 1)[1].lower()
--- a/bin/modules/Onion.py
+++ b/bin/modules/Onion.py
@ -55,9 +55,9 @@ class Onion(AbstractModule):
        # TEMP var: SAVE I2P Domain (future I2P crawler)
        # self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p")

-    def extract(self, obj_id, content, tag):
+    def extract(self, obj, content, tag):
        extracted = []
-        onions = self.regex_finditer(self.onion_regex, obj_id, content)
+        onions = self.regex_finditer(self.onion_regex, obj.get_global_id(), content)
        for onion in onions:
            start, end, value = onion
            url_unpack = crawlers.unpack_url(value)
--- a/bin/modules/Phone.py
+++ b/bin/modules/Phone.py
@ -41,9 +41,9 @@ class Phone(AbstractModule):
        # Waiting time in seconds between to message processed
        self.pending_seconds = 1

-    def extract(self, obj_id, content, tag):
+    def extract(self, obj, content, tag):
        extracted = []
-        phones = self.regex_phone_iter('ZZ', obj_id, content)
+        phones = self.regex_phone_iter('ZZ', obj.get_global_id(), content)
        for phone in phones:
            extracted.append([phone[0], phone[1], phone[2], f'tag:{tag}'])
        return extracted
--- a/var/www/blueprints/chats_explorer.py
+++ b/var/www/blueprints/chats_explorer.py
@ -23,6 +23,7 @@ from lib import ail_core
 from lib import chats_viewer
 from lib import Language
 from lib import Tag
+from lib import module_extractor

 # ============ BLUEPRINT ============
 chats_explorer = Blueprint('chats_explorer', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/chats_explorer'))
@ -235,6 +236,10 @@ def objects_message():
    else:
        message = message[0]
        languages = Language.get_translation_languages()
+        extracted = module_extractor.extract('message', '', message['id'], content=message['content'])
+        extracted_matches = module_extractor.get_extracted_by_match(extracted)
+        message['extracted'] = extracted
+        message['extracted_matches'] = extracted_matches
        return render_template('ChatMessage.html', meta=message, bootstrap_label=bootstrap_label,
                               translation_languages=languages, translation_target=target,
                               modal_add_tags=Tag.get_modal_add_tags(message['id'], object_type='message'))
--- a/var/www/blueprints/objects_item.py
+++ b/var/www/blueprints/objects_item.py
@ -85,7 +85,7 @@ def showItem():  # # TODO: support post
    else:
        meta['investigations'] = []

-    extracted = module_extractor.extract(item.id, content=meta['content'])
+    extracted = module_extractor.extract('item', '', item.id, content=meta['content'])
    extracted_matches = module_extractor.get_extracted_by_match(extracted)

    return render_template("show_item.html", bootstrap_label=bootstrap_label,
--- a/var/www/templates/chats_explorer/ChatMessage.html
+++ b/var/www/templates/chats_explorer/ChatMessage.html
@ -134,6 +134,65 @@
                </div>


+                {% if meta['extracted_matches'] %}
+                    <div id="accordion_extracted" class="mb-3 mx-3">
+                        <div class="card">
+                            <div class="card-header py-1" id="heading_extracted">
+                                <div class="row">
+                                    <div class="col-11">
+                                        <div class="mt-2">
+                                            <img id="misp-logo" src="{{ url_for('static', filename='image/ail-icon.png')}}" height="32"> Extracted&nbsp;&nbsp;
+                                            <div class="badge badge-warning">{{meta['extracted_matches']|length}}</div>
+                                        </div>
+                                    </div>
+                                    <div class="col-1">
+                                        <button class="btn btn-link btn-lg py-2 float-right rotate down" data-toggle="collapse" data-target="#collapse_extracted" aria-expanded="true" aria-controls="collapseDecoded">
+                                            <i class="fas fa-chevron-circle-down"></i>
+                                        </button>
+                                    </div>
+                                </div>
+                            </div>
+
+                            <div id="collapse_extracted" class="collapse" aria-labelledby="heading_extracted" data-parent="#accordion_extracted">
+                                <div class="card-body">
+                                    <table id="table_extracted" class="table table-striped">
+                                        <thead class="thead-dark">
+                                        <tr>
+                                            <th>Type</th>
+                                            <th>ID</th>
+                                            <th>Extracted</th>
+                                        </tr>
+                                        </thead>
+                                        <tbody>
+                                        {% for match in meta['extracted_matches'] %}
+                                            <tr>
+                                                <td>
+                                                    <svg height="26" width="26">
+                                                        <g class="nodes">
+                                                            <circle cx="13" cy="13" r="13" fill="{{ meta['extracted_matches'][match]['icon']['color'] }}"></circle>
+                                                            <text x="13" y="13" text-anchor="middle" dominant-baseline="central" class="graph_node_icon {{ meta['extracted_matches'][match]['icon']['style'] }}" font-size="16px">{{ meta['extracted_matches'][match]['icon']['icon'] }}</text>
+                                                        </g>
+                                                    </svg>
+                                                    {{ meta['extracted_matches'][match]['subtype'] }}
+                                                </td>
+                                                <td>{{ meta['extracted_matches'][match]['id'] }}</td>
+                                                <td>
+                                                    {% for row in meta['extracted_matches'][match]['matches'] %}
+                                                        <a href="#{{ row[0] }}:{{row[1] }}">{{ row[2] }}</a><br>
+                                                    {% endfor %}
+                                                </td>
+                                            </tr>
+                                        {% endfor %}
+                                        </tbody>
+                                    </table>
+                                </div>
+                            </div>
+
+                        </div>
+                    </div>
+                {% endif %}
+
+
                {% include 'objects/image/block_blur_img_slider.html' %}
                {% with translate_url=url_for('chats_explorer.objects_message', id=meta['id']), obj_id=meta['id'] %}
                    {% include 'chats_explorer/block_translation.html' %}
@ -169,6 +228,10 @@

  });

+$(function () {
+    $('[data-toggle="popover"]').popover()
+})
+
 function toggle_sidebar(){
 	if($('#nav_menu').is(':visible')){
 		$('#nav_menu').hide();
--- a/var/www/templates/chats_explorer/block_message.html
+++ b/var/www/templates/chats_explorer/block_message.html
@ -76,7 +76,11 @@
                </div>
            {%  endfor %}
        {% endif %}
+        {% if not message['extracted'] %}
            <pre class="my-0">{{ message['content'] }}</pre>
+        {% else %}
+            <pre class="my-0">{{ message['content'][:message['extracted'][0][0]] }}{% for row in message['extracted'] %}<span class="hg-text" data-toggle="popover" data-trigger="hover" data-html="true" title="<svg height=&quot;26&quot; width=&quot;26&quot;><g class=&quot;nodes&quot;><circle cx=&quot;13&quot; cy=&quot;13&quot; r=&quot;13&quot; fill=&quot;{{ message['extracted_matches'][row[3]]['icon']['color'] }}&quot;></circle><text x=&quot;13&quot; y=&quot;13&quot; text-anchor=&quot;middle&quot; dominant-baseline=&quot;central&quot; class=&quot;graph_node_icon {{ message['extracted_matches'][row[3]]['icon']['style'] }}&quot; font-size=&quot;16px&quot;>{{ message['extracted_matches'][row[3]]['icon']['icon'] }}</text></g></svg> {{ message['extracted_matches'][row[3]]['subtype'] }}" data-content="{{ message['extracted_matches'][row[3]]['id'] }}" id="{{ row[0] }}:{{ row[1] }}">{{ message['content'][row[0]:row[1]] }}</span>{% if loop.index + 1 > message['extracted']|length %}{{ message['content'][message['extracted'][-1][1]:] }}{% else %}{{ message['content'][row[1]:message['extracted'][loop.index][0]] }}{% endif %}{% endfor %}</pre>
+        {% endif %}
        {% if message['translation'] %}
            <hr class="m-1">
            <pre class="my-0 text-secondary">{{ message['translation'] }}</pre>