chg: [message] show trackers + modules matches

This commit is contained in:
terrtia 2024-03-27 13:42:15 +01:00
parent a3a664b7f1
commit 5ec0d7f0cf
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
10 changed files with 121 additions and 50 deletions

View file

@ -40,6 +40,11 @@ r_key = regex_helper.generate_redis_cache_key('extractor')
# TODO UI Link
CORRELATION_TO_EXTRACT = {
'item': ['cve', 'cryptocurrency', 'title', 'username'],
'message': ['cve', 'cryptocurrency', 'username']
}
MODULES = {
'infoleak:automatic-detection="credit-card"': CreditCards(queue=False),
'infoleak:automatic-detection="iban"': Iban(queue=False),
@ -57,9 +62,9 @@ tools = Tools(queue=False)
for tool_name in tools.get_tools():
MODULES[f'infoleak:automatic-detection="{tool_name}-tool"'] = tools
def get_correl_match(extract_type, obj_id, content):
def get_correl_match(extract_type, obj, content):
extracted = []
correl = correlations_engine.get_correlation_by_correl_type('item', '', obj_id, extract_type)
correl = correlations_engine.get_correlation_by_correl_type(obj.type, obj.get_subtype(r_str=True), obj.id, extract_type)
to_extract = []
map_subtype = {}
map_value_id = {}
@ -75,18 +80,18 @@ def get_correl_match(extract_type, obj_id, content):
sha256_val = sha256(value.encode()).hexdigest()
map_value_id[sha256_val] = value
if to_extract:
objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj_id, content)
for obj in objs:
if map_subtype.get(obj[2]):
subtype = map_subtype[obj[2]]
objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj.get_global_id(), content)
for ob in objs:
if map_subtype.get(ob[2]):
subtype = map_subtype[ob[2]]
else:
subtype = ''
sha256_val = sha256(obj[2].encode()).hexdigest()
sha256_val = sha256(ob[2].encode()).hexdigest()
value_id = map_value_id.get(sha256_val)
if not value_id:
logger.critical(f'Error module extractor: {sha256_val}\n{extract_type}\n{subtype}\n{value_id}\n{map_value_id}\n{objs}')
value_id = 'ERROR'
extracted.append([obj[0], obj[1], obj[2], f'{extract_type}:{subtype}:{value_id}'])
extracted.append([ob[0], ob[1], ob[2], f'{extract_type}:{subtype}:{value_id}'])
return extracted
def _get_yara_match(data):
@ -100,7 +105,7 @@ def _get_yara_match(data):
return yara.CALLBACK_CONTINUE
def _get_word_regex(word):
return '(?:^|(?<=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))' + word + '(?:$|(?=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))'
return '(?i)(?:^|(?<=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))' + word + '(?:$|(?=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))'
def convert_byte_offset_to_string(b_content, offset):
byte_chunk = b_content[:offset + 1]
@ -115,17 +120,18 @@ def convert_byte_offset_to_string(b_content, offset):
# TODO RETRO HUNTS
# TODO TRACKER TYPE IN UI
def get_tracker_match(obj_id, content):
def get_tracker_match(obj, content):
extracted = []
extracted_yara = []
trackers = Tracker.get_obj_trackers('item', '', obj_id)
obj_gid = obj.get_global_id()
trackers = Tracker.get_obj_trackers(obj.type, obj.get_subtype(r_str=True), obj.id)
for tracker_uuid in trackers:
tracker = Tracker.Tracker(tracker_uuid)
tracker_type = tracker.get_type()
# print(tracker_type)
tracked = tracker.get_tracked()
if tracker_type == 'regex': # TODO Improve word detection -> word delimiter
regex_match = regex_helper.regex_finditer(r_key, tracked, obj_id, content)
regex_match = regex_helper.regex_finditer(r_key, tracked, obj_gid, content)
for match in regex_match:
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])
elif tracker_type == 'yara':
@ -147,13 +153,13 @@ def get_tracker_match(obj_id, content):
words = [tracked]
for word in words:
regex = _get_word_regex(word)
regex_match = regex_helper.regex_finditer(r_key, regex, obj_id, content)
regex_match = regex_helper.regex_finditer(r_key, regex, obj_gid, content)
# print(regex_match)
for match in regex_match:
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])
# Retro Hunt
retro_hunts = Tracker.get_obj_retro_hunts('item', '', obj_id)
retro_hunts = Tracker.get_obj_retro_hunts(obj.type, obj.get_subtype(r_str=True), obj.id)
for retro_uuid in retro_hunts:
retro_hunt = Tracker.RetroHunt(retro_uuid)
rule = retro_hunt.get_rule(r_compile=True)
@ -182,35 +188,36 @@ def get_tracker_match(obj_id, content):
# Type:subtype:id
# tag:iban
# tracker:uuid
def extract(obj_id, content=None):
item = Item(obj_id)
if not item.exists():
# def extract(obj_id, content=None):
def extract(obj_type, subtype, obj_id, content=None):
obj = ail_objects.get_object(obj_type, subtype, obj_id)
if not obj.exists():
return []
obj_gid = obj.get_global_id()
# CHECK CACHE
cached = r_cache.get(f'extractor:cache:{obj_id}')
cached = r_cache.get(f'extractor:cache:{obj_gid}')
# cached = None
if cached:
r_cache.expire(f'extractor:cache:{obj_id}', 300)
r_cache.expire(f'extractor:cache:{obj_gid}', 300)
return json.loads(cached)
if not content:
content = item.get_content()
content = obj.get_content()
extracted = get_tracker_match(obj_id, content)
extracted = get_tracker_match(obj, content)
# print(item.get_tags())
for tag in item.get_tags():
for tag in obj.get_tags():
if MODULES.get(tag):
# print(tag)
module = MODULES.get(tag)
matches = module.extract(obj_id, content, tag)
matches = module.extract(obj, content, tag)
if matches:
extracted = extracted + matches
for obj_t in ['cve', 'cryptocurrency', 'title', 'username']: # Decoded, PGP->extract bloc
matches = get_correl_match(obj_t, obj_id, content)
for obj_t in CORRELATION_TO_EXTRACT[obj.type]:
matches = get_correl_match(obj_t, obj, content)
if matches:
extracted = extracted + matches
@ -221,8 +228,8 @@ def extract(obj_id, content=None):
# Save In Cache
if extracted:
extracted_dump = json.dumps(extracted)
r_cache.set(f'extractor:cache:{obj_id}', extracted_dump)
r_cache.expire(f'extractor:cache:{obj_id}', 300) # TODO Reduce CACHE ???????????????
r_cache.set(f'extractor:cache:{obj_gid}', extracted_dump)
r_cache.expire(f'extractor:cache:{obj_gid}', 300) # TODO Reduce CACHE ???????????????
return extracted
@ -271,15 +278,7 @@ def get_extracted_by_match(extracted):
# if __name__ == '__main__':
# t0 = time.time()
# obj_id = 'crawled/2022/09/15/circl.lu179c7903-5b21-452e-9f25-4b61d9934e2b'
# obj_id = 'crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd'
# obj_id = 'submitted/2022/09/13/submitted_ba3ee771-c91c-4f50-9d6a-8558cdac7aeb.gz'
# # obj_id = 'tests/2021/01/01/credit_cards.gz'
# # obj_id = 'crawled/2020/07/20/circl.luc9301321-f1b1-4d91-9082-5eb452b946c5'
# obj_id = 'submitted/2019/09/22/97172282-e4c2-4a1e-b82c-c4fb9490a56e.gz'
# obj_id = 'submitted/2019/09/20/4fb7f02d-1241-4ef4-b17e-80ae76038835.gz'
# obj_id = 'crawled/2023/02/21/circl.lu1c300acb-0cbe-480f-917e-9afe3ec958e8'
#
# extract(obj_id)
#
# # get_obj_correl('cve', obj_id, content)

View file

@ -58,9 +58,9 @@ class CreditCards(AbstractModule):
if lib_refine.is_luhn_valid(clean_card):
return clean_card
def extract(self, obj_id, content, tag):
def extract(self, obj, content, tag):
extracted = []
cards = self.regex_finditer(self.regex, obj_id, content)
cards = self.regex_finditer(self.regex, obj.get_global_id(), content)
for card in cards:
start, end, value = card
if self.get_valid_card(value):

View file

@ -62,9 +62,9 @@ class Iban(AbstractModule):
return True
return False
def extract(self, obj_id, content, tag):
def extract(self, obj, content, tag):
extracted = []
ibans = self.regex_finditer(self.iban_regex, obj_id, content)
ibans = self.regex_finditer(self.iban_regex, obj.get_global_id(), content)
for iban in ibans:
start, end, value = iban
value = ''.join(e for e in value if e.isalnum())

View file

@ -118,10 +118,10 @@ class Mail(AbstractModule):
print(e)
return valid_mxdomain
def extract(self, obj_id, content, tag):
def extract(self, obj, content, tag):
extracted = []
mxdomains = {}
mails = self.regex_finditer(self.email_regex, obj_id, content)
mails = self.regex_finditer(self.email_regex, obj.get_global_id(), content)
for mail in mails:
start, end, value = mail
mxdomain = value.rsplit('@', 1)[1].lower()

View file

@ -55,9 +55,9 @@ class Onion(AbstractModule):
# TEMP var: SAVE I2P Domain (future I2P crawler)
# self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p")
def extract(self, obj_id, content, tag):
def extract(self, obj, content, tag):
extracted = []
onions = self.regex_finditer(self.onion_regex, obj_id, content)
onions = self.regex_finditer(self.onion_regex, obj.get_global_id(), content)
for onion in onions:
start, end, value = onion
url_unpack = crawlers.unpack_url(value)

View file

@ -41,9 +41,9 @@ class Phone(AbstractModule):
# Waiting time in seconds between to message processed
self.pending_seconds = 1
def extract(self, obj_id, content, tag):
def extract(self, obj, content, tag):
extracted = []
phones = self.regex_phone_iter('ZZ', obj_id, content)
phones = self.regex_phone_iter('ZZ', obj.get_global_id(), content)
for phone in phones:
extracted.append([phone[0], phone[1], phone[2], f'tag:{tag}'])
return extracted

View file

@ -23,6 +23,7 @@ from lib import ail_core
from lib import chats_viewer
from lib import Language
from lib import Tag
from lib import module_extractor
# ============ BLUEPRINT ============
chats_explorer = Blueprint('chats_explorer', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/chats_explorer'))
@ -235,6 +236,10 @@ def objects_message():
else:
message = message[0]
languages = Language.get_translation_languages()
extracted = module_extractor.extract('message', '', message['id'], content=message['content'])
extracted_matches = module_extractor.get_extracted_by_match(extracted)
message['extracted'] = extracted
message['extracted_matches'] = extracted_matches
return render_template('ChatMessage.html', meta=message, bootstrap_label=bootstrap_label,
translation_languages=languages, translation_target=target,
modal_add_tags=Tag.get_modal_add_tags(message['id'], object_type='message'))

View file

@ -85,7 +85,7 @@ def showItem(): # # TODO: support post
else:
meta['investigations'] = []
extracted = module_extractor.extract(item.id, content=meta['content'])
extracted = module_extractor.extract('item', '', item.id, content=meta['content'])
extracted_matches = module_extractor.get_extracted_by_match(extracted)
return render_template("show_item.html", bootstrap_label=bootstrap_label,

View file

@ -14,8 +14,8 @@
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/tags.js') }}"></script>
@ -134,6 +134,65 @@
</div>
{% if meta['extracted_matches'] %}
<div id="accordion_extracted" class="mb-3 mx-3">
<div class="card">
<div class="card-header py-1" id="heading_extracted">
<div class="row">
<div class="col-11">
<div class="mt-2">
<img id="misp-logo" src="{{ url_for('static', filename='image/ail-icon.png')}}" height="32"> Extracted&nbsp;&nbsp;
<div class="badge badge-warning">{{meta['extracted_matches']|length}}</div>
</div>
</div>
<div class="col-1">
<button class="btn btn-link btn-lg py-2 float-right rotate down" data-toggle="collapse" data-target="#collapse_extracted" aria-expanded="true" aria-controls="collapseDecoded">
<i class="fas fa-chevron-circle-down"></i>
</button>
</div>
</div>
</div>
<div id="collapse_extracted" class="collapse" aria-labelledby="heading_extracted" data-parent="#accordion_extracted">
<div class="card-body">
<table id="table_extracted" class="table table-striped">
<thead class="thead-dark">
<tr>
<th>Type</th>
<th>ID</th>
<th>Extracted</th>
</tr>
</thead>
<tbody>
{% for match in meta['extracted_matches'] %}
<tr>
<td>
<svg height="26" width="26">
<g class="nodes">
<circle cx="13" cy="13" r="13" fill="{{ meta['extracted_matches'][match]['icon']['color'] }}"></circle>
<text x="13" y="13" text-anchor="middle" dominant-baseline="central" class="graph_node_icon {{ meta['extracted_matches'][match]['icon']['style'] }}" font-size="16px">{{ meta['extracted_matches'][match]['icon']['icon'] }}</text>
</g>
</svg>
{{ meta['extracted_matches'][match]['subtype'] }}
</td>
<td>{{ meta['extracted_matches'][match]['id'] }}</td>
<td>
{% for row in meta['extracted_matches'][match]['matches'] %}
<a href="#{{ row[0] }}:{{row[1] }}">{{ row[2] }}</a><br>
{% endfor %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
</div>
{% endif %}
{% include 'objects/image/block_blur_img_slider.html' %}
{% with translate_url=url_for('chats_explorer.objects_message', id=meta['id']), obj_id=meta['id'] %}
{% include 'chats_explorer/block_translation.html' %}
@ -169,6 +228,10 @@
});
$(function () {
$('[data-toggle="popover"]').popover()
})
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();

View file

@ -76,7 +76,11 @@
</div>
{% endfor %}
{% endif %}
<pre class="my-0">{{ message['content'] }}</pre>
{% if not message['extracted'] %}
<pre class="my-0">{{ message['content'] }}</pre>
{% else %}
<pre class="my-0">{{ message['content'][:message['extracted'][0][0]] }}{% for row in message['extracted'] %}<span class="hg-text" data-toggle="popover" data-trigger="hover" data-html="true" title="<svg height=&quot;26&quot; width=&quot;26&quot;><g class=&quot;nodes&quot;><circle cx=&quot;13&quot; cy=&quot;13&quot; r=&quot;13&quot; fill=&quot;{{ message['extracted_matches'][row[3]]['icon']['color'] }}&quot;></circle><text x=&quot;13&quot; y=&quot;13&quot; text-anchor=&quot;middle&quot; dominant-baseline=&quot;central&quot; class=&quot;graph_node_icon {{ message['extracted_matches'][row[3]]['icon']['style'] }}&quot; font-size=&quot;16px&quot;>{{ message['extracted_matches'][row[3]]['icon']['icon'] }}</text></g></svg> {{ message['extracted_matches'][row[3]]['subtype'] }}" data-content="{{ message['extracted_matches'][row[3]]['id'] }}" id="{{ row[0] }}:{{ row[1] }}">{{ message['content'][row[0]:row[1]] }}</span>{% if loop.index + 1 > message['extracted']|length %}{{ message['content'][message['extracted'][-1][1]:] }}{% else %}{{ message['content'][row[1]:message['extracted'][loop.index][0]] }}{% endif %}{% endfor %}</pre>
{% endif %}
{% if message['translation'] %}
<hr class="m-1">
<pre class="my-0 text-secondary">{{ message['translation'] }}</pre>