mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 00:28:22 +00:00
chg: [extractor] add cache + UI extractor + word/set extractor
This commit is contained in:
parent
ab24343b48
commit
0fa27c6a51
8 changed files with 208 additions and 69 deletions
|
@ -1,17 +1,19 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
import yara
|
||||
|
||||
from operator import itemgetter
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
import lib.objects.ail_objects
|
||||
from lib.objects import ail_objects
|
||||
from lib.objects.Items import Item
|
||||
from lib import correlations_engine
|
||||
from lib import regex_helper
|
||||
|
@ -25,23 +27,19 @@ from modules.Mail import Mail
|
|||
from modules.Onion import Onion
|
||||
from modules.Tools import Tools
|
||||
|
||||
creditCards = CreditCards()
|
||||
ibans = Iban()
|
||||
mails = Mail()
|
||||
onions = Onion()
|
||||
tools = Tools()
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
config_loader = None
|
||||
|
||||
r_key = regex_helper.generate_redis_cache_key('extractor')
|
||||
|
||||
# TODO UI Link
|
||||
|
||||
MODULES = {
|
||||
'infoleak:automatic-detection="credit-card"': creditCards,
|
||||
'infoleak:automatic-detection="iban"': ibans,
|
||||
'infoleak:automatic-detection="mail"': mails,
|
||||
'infoleak:automatic-detection="onion"': onions,
|
||||
'infoleak:automatic-detection="credit-card"': CreditCards(),
|
||||
'infoleak:automatic-detection="iban"': Iban(),
|
||||
'infoleak:automatic-detection="mail"': Mail(),
|
||||
'infoleak:automatic-detection="onion"': Onion(),
|
||||
# APIkey ???
|
||||
# Credentials
|
||||
# Zerobins
|
||||
|
@ -49,20 +47,28 @@ MODULES = {
|
|||
# SQL Injetction / Libinjection ???
|
||||
|
||||
}
|
||||
tools = Tools()
|
||||
for tool_name in tools.get_tools():
|
||||
MODULES[f'infoleak:automatic-detection="{tool_name}-tool"'] = tools
|
||||
|
||||
def get_correl_match(extract_type, obj_id, content, filter_subtypes=['']):
|
||||
def get_correl_match(extract_type, obj_id, content):
|
||||
extracted = []
|
||||
correl = correlations_engine.get_correlation_by_correl_type('item', '', obj_id, extract_type)
|
||||
to_extract = []
|
||||
map_subtype = {}
|
||||
for c in correl:
|
||||
subtype, value = c.split(':', 1)
|
||||
# if subtype in filter_subtypes:
|
||||
map_subtype[value] = subtype
|
||||
to_extract.append(value)
|
||||
if to_extract:
|
||||
return regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj_id, content)
|
||||
objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj_id, content)
|
||||
for obj in objs:
|
||||
if map_subtype[obj[2]]:
|
||||
subtype = map_subtype[obj[2]]
|
||||
else:
|
||||
return []
|
||||
subtype = ''
|
||||
extracted.append([obj[0], obj[1], obj[2], f'{extract_type}:{subtype}:{obj[2]}'])
|
||||
return extracted
|
||||
|
||||
def _get_yara_match(data):
|
||||
for row in data.get('strings'):
|
||||
|
@ -73,14 +79,27 @@ def _get_yara_match(data):
|
|||
r_cache.expire(f'extractor:yara:match:{r_key}', 300)
|
||||
return yara.CALLBACK_CONTINUE
|
||||
|
||||
def _get_word_regex(word):
|
||||
return '(?:^|(?<=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))' + word + '(?:$|(?=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))'
|
||||
|
||||
# TODO RETRO HUNTS
|
||||
# TODO TRACKER TYPE IN UI
|
||||
def get_tracker_match(obj_id, content):
|
||||
cached = r_cache.get(f'extractor:cache:{obj_id}')
|
||||
if cached:
|
||||
r_cache.expire(f'extractor:cache:{obj_id}', 300)
|
||||
return json.loads(cached)
|
||||
|
||||
extracted = []
|
||||
trackers = Tracker.get_obj_all_trackers('item', '', obj_id)
|
||||
for tracker_uuid in trackers:
|
||||
tracker_type = Tracker.get_tracker_type(tracker_uuid)
|
||||
print(tracker_type)
|
||||
tracker = Tracker.get_tracker_by_uuid(tracker_uuid)
|
||||
if tracker_type == 'regex':
|
||||
return regex_helper.regex_finditer(r_key, tracker, obj_id, content)
|
||||
if tracker_type == 'regex': # TODO Improve word detection -> word delimiter
|
||||
regex_match = regex_helper.regex_finditer(r_key, tracker, obj_id, content)
|
||||
for match in regex_match:
|
||||
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker_uuid}'])
|
||||
elif tracker_type == 'yara':
|
||||
rule = Tracker.get_yara_rule_by_uuid(tracker_uuid)
|
||||
rule.match(data=content, callback=_get_yara_match,
|
||||
|
@ -90,22 +109,39 @@ def get_tracker_match(obj_id, content):
|
|||
extracted = []
|
||||
for match in yara_match:
|
||||
start, end, value = match.split(':', 2)
|
||||
extracted.append((int(start), int(end), value))
|
||||
extracted.append([int(start), int(end), value, f'tracker:{tracker_uuid}'])
|
||||
|
||||
elif tracker_type == 'word' or tracker_type == 'set':
|
||||
if tracker_type == 'set':
|
||||
tracker = tracker.rsplit(';', 1)[0]
|
||||
words = tracker.split(',')
|
||||
else:
|
||||
words = [tracker]
|
||||
for word in words:
|
||||
regex = _get_word_regex(word)
|
||||
regex_match = regex_helper.regex_finditer(r_key, regex, obj_id, content)
|
||||
print(regex_match)
|
||||
for match in regex_match:
|
||||
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker_uuid}'])
|
||||
|
||||
# Save In Cache
|
||||
if extracted:
|
||||
extracted_dump = json.dumps(extracted)
|
||||
r_cache.set(f'extractor:cache:{obj_id}', extracted_dump)
|
||||
r_cache.expire(f'extractor:cache:{obj_id}', 300) # TODO Reduce CACHE ???????????????
|
||||
|
||||
return extracted
|
||||
|
||||
# elif tracker_type == 'term': # TODO
|
||||
#
|
||||
# elif tracker_type == '':
|
||||
return []
|
||||
|
||||
# Type:subtype:id
|
||||
# tag:iban
|
||||
# tracker:uuid
|
||||
|
||||
def extract(obj_id, content=None):
|
||||
item = Item(obj_id)
|
||||
if not content:
|
||||
content = item.get_content()
|
||||
extracted = []
|
||||
|
||||
extracted = extracted + get_tracker_match(obj_id, content)
|
||||
extracted = get_tracker_match(obj_id, content)
|
||||
|
||||
# print(item.get_tags())
|
||||
for tag in item.get_tags():
|
||||
|
@ -121,28 +157,65 @@ def extract(obj_id, content=None):
|
|||
if matches:
|
||||
extracted = extracted + matches
|
||||
|
||||
from operator import itemgetter
|
||||
|
||||
# SORT By Start Pos
|
||||
extracted = sorted(extracted, key=itemgetter(0))
|
||||
print(extracted)
|
||||
# print(extracted)
|
||||
return extracted
|
||||
|
||||
# TODO ADD LINK UI
|
||||
def get_extracted_by_match(extracted):
|
||||
matches = {}
|
||||
for start, end, value, str_obj in extracted:
|
||||
|
||||
if __name__ == '__main__':
|
||||
t0 = time.time()
|
||||
obj_id = 'crawled/2022/09/15/circl.lu179c7903-5b21-452e-9f25-4b61d9934e2b'
|
||||
obj_id = 'crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd'
|
||||
obj_id = 'submitted/2022/09/13/submitted_ba3ee771-c91c-4f50-9d6a-8558cdac7aeb.gz'
|
||||
# obj_id = 'tests/2021/01/01/credit_cards.gz'
|
||||
# obj_id = 'crawled/2020/07/20/circl.luc9301321-f1b1-4d91-9082-5eb452b946c5'
|
||||
obj_id = 'submitted/2019/09/22/97172282-e4c2-4a1e-b82c-c4fb9490a56e.gz'
|
||||
obj_id = 'submitted/2019/09/20/4fb7f02d-1241-4ef4-b17e-80ae76038835.gz'
|
||||
if str_obj not in matches:
|
||||
matches[str_obj] = {}
|
||||
ob_type, row_id = str_obj.split(':', 1)
|
||||
if ob_type == 'tag': # TODO put me in object class
|
||||
matches[str_obj]['subtype'] = 'tag'
|
||||
matches[str_obj]['id'] = row_id
|
||||
matches[str_obj]['icon'] = {'style': 'fas', 'icon': '\uf02b', 'color': '#28a745', 'radius': 5}
|
||||
matches[str_obj]['link'] = ''
|
||||
elif ob_type == 'tracker': # TODO put me in object class
|
||||
matches[str_obj]['subtype'] = 'tracker'
|
||||
matches[str_obj]['id'] = row_id
|
||||
matches[str_obj]['icon'] = {'style': 'fas', 'icon': '\uf05b', 'color': '#ffc107', 'radius': 5}
|
||||
matches[str_obj]['link'] = ''
|
||||
else:
|
||||
row_id = row_id.split(':', 1)
|
||||
if len(row_id) == 2:
|
||||
subtype = row_id[0]
|
||||
obj_id = row_id[1]
|
||||
else:
|
||||
subtype = ''
|
||||
obj_id = row_id[0]
|
||||
matches[str_obj]['subtype'] = subtype
|
||||
matches[str_obj]['id'] = obj_id
|
||||
matches[str_obj]['icon'] = ail_objects.get_object_svg(ob_type, subtype, obj_id)
|
||||
matches[str_obj]['link'] = ail_objects.get_object_link(ob_type, subtype, obj_id)
|
||||
|
||||
extract(obj_id)
|
||||
matches[str_obj]['matches'] = []
|
||||
|
||||
# get_obj_correl('cve', obj_id, content)
|
||||
# r = get_tracker_match(obj_id, content)
|
||||
# print(r)
|
||||
match = [start, end, value]
|
||||
matches[str_obj]['matches'].append(match)
|
||||
return matches
|
||||
|
||||
print(time.time() - t0)
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# t0 = time.time()
|
||||
# obj_id = 'crawled/2022/09/15/circl.lu179c7903-5b21-452e-9f25-4b61d9934e2b'
|
||||
# obj_id = 'crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd'
|
||||
# obj_id = 'submitted/2022/09/13/submitted_ba3ee771-c91c-4f50-9d6a-8558cdac7aeb.gz'
|
||||
# # obj_id = 'tests/2021/01/01/credit_cards.gz'
|
||||
# # obj_id = 'crawled/2020/07/20/circl.luc9301321-f1b1-4d91-9082-5eb452b946c5'
|
||||
# obj_id = 'submitted/2019/09/22/97172282-e4c2-4a1e-b82c-c4fb9490a56e.gz'
|
||||
# obj_id = 'submitted/2019/09/20/4fb7f02d-1241-4ef4-b17e-80ae76038835.gz'
|
||||
# obj_id = 'crawled/2023/02/21/circl.lu1c300acb-0cbe-480f-917e-9afe3ec958e8'
|
||||
#
|
||||
# extract(obj_id)
|
||||
#
|
||||
# # get_obj_correl('cve', obj_id, content)
|
||||
# # r = get_tracker_match(obj_id, content)
|
||||
# # print(r)
|
||||
#
|
||||
# print(time.time() - t0)
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ class CreditCards(AbstractModule):
|
|||
for card in cards:
|
||||
start, end, value = card
|
||||
if self.get_valid_card(value):
|
||||
extracted.append(card)
|
||||
extracted.append([start, end, value, f'tag:{tag}'])
|
||||
return extracted
|
||||
|
||||
def compute(self, message, r_result=False):
|
||||
|
|
|
@ -69,8 +69,7 @@ class Iban(AbstractModule):
|
|||
start, end, value = iban
|
||||
value = ''.join(e for e in value if e.isalnum())
|
||||
if self.is_valid_iban(value):
|
||||
print(value)
|
||||
extracted.append(iban)
|
||||
extracted.append([start, end, value, f'tag:{tag}'])
|
||||
return extracted
|
||||
|
||||
def compute(self, message):
|
||||
|
|
|
@ -130,7 +130,7 @@ class Mail(AbstractModule):
|
|||
mxdomains[mxdomain].append(mail)
|
||||
for mx in self.check_mx_record(mxdomains.keys()):
|
||||
for row in mxdomains[mx]:
|
||||
extracted.append(row)
|
||||
extracted.append([row[0], row[1], row[2], f'tag:{tag}'])
|
||||
return extracted
|
||||
|
||||
# # TODO: sanitize mails
|
||||
|
|
|
@ -62,7 +62,7 @@ class Onion(AbstractModule):
|
|||
url_unpack = crawlers.unpack_url(value)
|
||||
domain = url_unpack['domain']
|
||||
if crawlers.is_valid_onion_domain(domain):
|
||||
extracted.append(onion)
|
||||
extracted.append([start, end, value, f'tag:{tag}'])
|
||||
return extracted
|
||||
|
||||
def compute(self, message):
|
||||
|
|
|
@ -409,8 +409,12 @@ class Tools(AbstractModule):
|
|||
return TOOLS.keys()
|
||||
|
||||
def extract(self, obj_id, content, tag):
|
||||
extracted = []
|
||||
tool_name = tag.rsplit('"', 2)[1][:-5]
|
||||
return self.regex_finditer(TOOLS[tool_name]['regex'], obj_id, content)
|
||||
tools = self.regex_finditer(TOOLS[tool_name]['regex'], obj_id, content)
|
||||
for tool in tools:
|
||||
extracted.append([tool[0], tool[1], tool[2], f'tag:{tag}'])
|
||||
return extracted
|
||||
|
||||
def compute(self, message):
|
||||
item = Item(message)
|
||||
|
|
|
@ -67,7 +67,7 @@ def showItem(): # # TODO: support post
|
|||
abort(404)
|
||||
|
||||
item = Item(item_id)
|
||||
meta = item.get_meta(options=['content', 'crawler', 'duplicates', 'lines', 'size'])
|
||||
meta = item.get_meta(options={'content', 'crawler', 'duplicates', 'lines', 'size'})
|
||||
|
||||
meta['name'] = meta['id'].replace('/', ' / ')
|
||||
meta['father'] = item_basic.get_item_parent(item_id)
|
||||
|
@ -76,11 +76,13 @@ def showItem(): # # TODO: support post
|
|||
meta['hive_case'] = Export.get_item_hive_cases(item_id)
|
||||
|
||||
extracted = module_extractor.extract(item.id, content=meta['content'])
|
||||
extracted_matches = module_extractor.get_extracted_by_match(extracted)
|
||||
|
||||
return render_template("show_item.html", bootstrap_label=bootstrap_label,
|
||||
modal_add_tags=Tag.get_modal_add_tags(meta['id'], object_type='item'),
|
||||
is_hive_connected=Export.get_item_hive_cases(item_id),
|
||||
meta=meta, extracted=extracted)
|
||||
meta=meta,
|
||||
extracted=extracted, extracted_matches=extracted_matches)
|
||||
|
||||
# kvrocks data
|
||||
|
||||
|
|
|
@ -20,6 +20,9 @@
|
|||
<script src="{{ url_for('static', filename='js/tags.js') }}"></script>
|
||||
|
||||
<style>
|
||||
.popover{
|
||||
max-width: 100%;
|
||||
}
|
||||
.rotate{
|
||||
-moz-transition: all 0.1s linear;
|
||||
-webkit-transition: all 0.1s linear;
|
||||
|
@ -349,10 +352,62 @@
|
|||
{% endif %}
|
||||
|
||||
|
||||
{% if extracted %}
|
||||
{% for row in extracted %}
|
||||
<div><a href="#{{ row[0] }}:{{ row[1] }}">{{ row[2] }}</a></div>
|
||||
{% if extracted_matches %}
|
||||
<div id="accordion_extracted" class="mb-3 mx-3">
|
||||
<div class="card">
|
||||
<div class="card-header py-1" id="heading_extracted">
|
||||
<div class="row">
|
||||
<div class="col-11">
|
||||
<div class="mt-2">
|
||||
<img id="misp-logo" src="{{ url_for('static', filename='image/ail-icon.png')}}" height="32"> Extracted
|
||||
<div class="badge badge-warning">{{extracted_matches|length}}</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-1">
|
||||
<button class="btn btn-link btn-lg py-2 float-right rotate down" data-toggle="collapse" data-target="#collapse_extracted" aria-expanded="true" aria-controls="collapseDecoded">
|
||||
<i class="fas fa-chevron-circle-down"></i>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="collapse_extracted" class="collapse show" aria-labelledby="heading_extracted" data-parent="#accordion_extracted">
|
||||
<div class="card-body">
|
||||
<table id="table_extracted" class="table table-striped">
|
||||
<thead class="thead-dark">
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>ID</th>
|
||||
<th>Extracted</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for match in extracted_matches %}
|
||||
<tr>
|
||||
<td>
|
||||
<svg height="26" width="26">
|
||||
<g class="nodes">
|
||||
<circle cx="13" cy="13" r="13" fill="{{ extracted_matches[match]['icon']['color'] }}"></circle>
|
||||
<text x="13" y="13" text-anchor="middle" dominant-baseline="central" class="graph_node_icon {{ extracted_matches[match]['icon']['style'] }}" font-size="16px">{{ extracted_matches[match]['icon']['icon'] }}</text>
|
||||
</g>
|
||||
</svg>
|
||||
{{ extracted_matches[match]['subtype'] }}
|
||||
</td>
|
||||
<td>{{ extracted_matches[match]['id'] }}</td>
|
||||
<td>
|
||||
{% for row in extracted_matches[match]['matches'] %}
|
||||
<a href="#{{ row[0] }}:{{row[1] }}">{{ row[2] }}</a><br>
|
||||
{% endfor %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
|
||||
|
@ -382,7 +437,7 @@
|
|||
{% if not extracted %}
|
||||
<p class="my-0"> <pre class="border">{{ meta['content'] }}</pre></p>
|
||||
{% else %}
|
||||
<p class="my-0"> <pre class="border">{{ meta['content'][:extracted[0][0]] }}{% for row in extracted %}<span class="hg-text" id="{{ row[0] }}:{{ row[1] }}">{{ meta['content'][row[0]:row[1]] }}</span>{% if loop.index + 1 > extracted|length %}{{ meta['content'][extracted[-1][1]:] }}{% else %}{{ meta['content'][row[1]:extracted[loop.index][0]] }}{% endif %}{% endfor %}</pre></p>
|
||||
<p class="my-0"> <pre class="border">{{ meta['content'][:extracted[0][0]] }}{% for row in extracted %}<span class="hg-text" data-toggle="popover" data-trigger="hover" data-html="true" title="<svg height="26" width="26"><g class="nodes"><circle cx="13" cy="13" r="13" fill="{{ extracted_matches[row[3]]['icon']['color'] }}"></circle><text x="13" y="13" text-anchor="middle" dominant-baseline="central" class="graph_node_icon {{ extracted_matches[row[3]]['icon']['style'] }}" font-size="16px">{{ extracted_matches[row[3]]['icon']['icon'] }}</text></g></svg> {{ extracted_matches[row[3]]['subtype'] }}" data-content="{{ extracted_matches[row[3]]['id'] }}" id="{{ row[0] }}:{{ row[1] }}">{{ meta['content'][row[0]:row[1]] }}</span>{% if loop.index + 1 > extracted|length %}{{ meta['content'][extracted[-1][1]:] }}{% else %}{{ meta['content'][row[1]:extracted[loop.index][0]] }}{% endif %}{% endfor %}</pre></p>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="tab-pane fade" id="pills-html2text" role="tabpanel" aria-labelledby="pills-html2text-tab">
|
||||
|
@ -395,7 +450,7 @@
|
|||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
<script>
|
||||
var ltags
|
||||
var ltagsgalaxies
|
||||
$(document).ready(function(){
|
||||
|
@ -405,8 +460,11 @@
|
|||
// "iDisplayLength": 5,
|
||||
// "order": [[ 1, "asc" ]]
|
||||
// });
|
||||
{% if extracted %}
|
||||
$('#table_extracted').DataTable();
|
||||
{% endif %}
|
||||
$(".rotate").click(function(){
|
||||
$(this).toggleClass("down") ;
|
||||
$(this).toggleClass("down");
|
||||
})
|
||||
});
|
||||
|
||||
|
@ -418,7 +476,7 @@
|
|||
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</script>
|
||||
|
||||
{% if meta['crawler'] %}
|
||||
<script>
|
||||
|
@ -470,6 +528,9 @@
|
|||
}
|
||||
|
||||
blocks.addEventListener('change', pixelate, false);
|
||||
$(function () {
|
||||
$('[data-toggle="popover"]').popover()
|
||||
})
|
||||
</script>
|
||||
{% endif %}
|
||||
|
||||
|
|
Loading…
Reference in a new issue