diff --git a/bin/lib/module_extractor.py b/bin/lib/module_extractor.py
index be596621..fe81f9cb 100755
--- a/bin/lib/module_extractor.py
+++ b/bin/lib/module_extractor.py
@@ -1,17 +1,19 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
-
+import json
import os
import sys
import time
import yara
+from operator import itemgetter
+
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
-import lib.objects.ail_objects
+from lib.objects import ail_objects
from lib.objects.Items import Item
from lib import correlations_engine
from lib import regex_helper
@@ -25,23 +27,19 @@ from modules.Mail import Mail
from modules.Onion import Onion
from modules.Tools import Tools
-creditCards = CreditCards()
-ibans = Iban()
-mails = Mail()
-onions = Onion()
-tools = Tools()
-
config_loader = ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")
config_loader = None
r_key = regex_helper.generate_redis_cache_key('extractor')
+# TODO UI Link
+
MODULES = {
- 'infoleak:automatic-detection="credit-card"': creditCards,
- 'infoleak:automatic-detection="iban"': ibans,
- 'infoleak:automatic-detection="mail"': mails,
- 'infoleak:automatic-detection="onion"': onions,
+ 'infoleak:automatic-detection="credit-card"': CreditCards(),
+ 'infoleak:automatic-detection="iban"': Iban(),
+ 'infoleak:automatic-detection="mail"': Mail(),
+ 'infoleak:automatic-detection="onion"': Onion(),
# APIkey ???
# Credentials
# Zerobins
@@ -49,20 +47,28 @@ MODULES = {
# SQL Injetction / Libinjection ???
}
+tools = Tools()
for tool_name in tools.get_tools():
MODULES[f'infoleak:automatic-detection="{tool_name}-tool"'] = tools
-def get_correl_match(extract_type, obj_id, content, filter_subtypes=['']):
+def get_correl_match(extract_type, obj_id, content):
+ extracted = []
correl = correlations_engine.get_correlation_by_correl_type('item', '', obj_id, extract_type)
to_extract = []
+ map_subtype = {}
for c in correl:
subtype, value = c.split(':', 1)
- # if subtype in filter_subtypes:
+ map_subtype[value] = subtype
to_extract.append(value)
if to_extract:
- return regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj_id, content)
- else:
- return []
+ objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj_id, content)
+ for obj in objs:
+ if map_subtype[obj[2]]:
+ subtype = map_subtype[obj[2]]
+ else:
+ subtype = ''
+ extracted.append([obj[0], obj[1], obj[2], f'{extract_type}:{subtype}:{obj[2]}'])
+ return extracted
def _get_yara_match(data):
for row in data.get('strings'):
@@ -73,14 +79,27 @@ def _get_yara_match(data):
r_cache.expire(f'extractor:yara:match:{r_key}', 300)
return yara.CALLBACK_CONTINUE
+def _get_word_regex(word):
+ return '(?:^|(?<=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))' + word + '(?:$|(?=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))'
+
# TODO RETRO HUNTS
+# TODO TRACKER TYPE IN UI
def get_tracker_match(obj_id, content):
+ cached = r_cache.get(f'extractor:cache:{obj_id}')
+ if cached:
+ r_cache.expire(f'extractor:cache:{obj_id}', 300)
+ return json.loads(cached)
+
+ extracted = []
trackers = Tracker.get_obj_all_trackers('item', '', obj_id)
for tracker_uuid in trackers:
tracker_type = Tracker.get_tracker_type(tracker_uuid)
+ print(tracker_type)
tracker = Tracker.get_tracker_by_uuid(tracker_uuid)
- if tracker_type == 'regex':
- return regex_helper.regex_finditer(r_key, tracker, obj_id, content)
+ if tracker_type == 'regex': # TODO Improve word detection -> word delimiter
+ regex_match = regex_helper.regex_finditer(r_key, tracker, obj_id, content)
+ for match in regex_match:
+ extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker_uuid}'])
elif tracker_type == 'yara':
rule = Tracker.get_yara_rule_by_uuid(tracker_uuid)
rule.match(data=content, callback=_get_yara_match,
@@ -90,22 +109,39 @@ def get_tracker_match(obj_id, content):
extracted = []
for match in yara_match:
start, end, value = match.split(':', 2)
- extracted.append((int(start), int(end), value))
- return extracted
+ extracted.append([int(start), int(end), value, f'tracker:{tracker_uuid}'])
- # elif tracker_type == 'term': # TODO
- #
- # elif tracker_type == '':
- return []
+ elif tracker_type == 'word' or tracker_type == 'set':
+ if tracker_type == 'set':
+ tracker = tracker.rsplit(';', 1)[0]
+ words = tracker.split(',')
+ else:
+ words = [tracker]
+ for word in words:
+ regex = _get_word_regex(word)
+ regex_match = regex_helper.regex_finditer(r_key, regex, obj_id, content)
+ print(regex_match)
+ for match in regex_match:
+ extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker_uuid}'])
+ # Save In Cache
+ if extracted:
+ extracted_dump = json.dumps(extracted)
+ r_cache.set(f'extractor:cache:{obj_id}', extracted_dump)
+ r_cache.expire(f'extractor:cache:{obj_id}', 300) # TODO Reduce CACHE ???????????????
+
+ return extracted
+
+# Type:subtype:id
+# tag:iban
+# tracker:uuid
def extract(obj_id, content=None):
item = Item(obj_id)
if not content:
content = item.get_content()
- extracted = []
- extracted = extracted + get_tracker_match(obj_id, content)
+ extracted = get_tracker_match(obj_id, content)
# print(item.get_tags())
for tag in item.get_tags():
@@ -116,33 +152,70 @@ def extract(obj_id, content=None):
if matches:
extracted = extracted + matches
- for obj_t in ['cve', 'cryptocurrency', 'username']: # Decoded, PGP->extract bloc
+ for obj_t in ['cve', 'cryptocurrency', 'username']: # Decoded, PGP->extract bloc
matches = get_correl_match(obj_t, obj_id, content)
if matches:
extracted = extracted + matches
- from operator import itemgetter
-
+ # SORT By Start Pos
extracted = sorted(extracted, key=itemgetter(0))
- print(extracted)
+ # print(extracted)
return extracted
+# TODO ADD LINK UI
+def get_extracted_by_match(extracted):
+ matches = {}
+ for start, end, value, str_obj in extracted:
-if __name__ == '__main__':
- t0 = time.time()
- obj_id = 'crawled/2022/09/15/circl.lu179c7903-5b21-452e-9f25-4b61d9934e2b'
- obj_id = 'crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd'
- obj_id = 'submitted/2022/09/13/submitted_ba3ee771-c91c-4f50-9d6a-8558cdac7aeb.gz'
- # obj_id = 'tests/2021/01/01/credit_cards.gz'
- # obj_id = 'crawled/2020/07/20/circl.luc9301321-f1b1-4d91-9082-5eb452b946c5'
- obj_id = 'submitted/2019/09/22/97172282-e4c2-4a1e-b82c-c4fb9490a56e.gz'
- obj_id = 'submitted/2019/09/20/4fb7f02d-1241-4ef4-b17e-80ae76038835.gz'
+ if str_obj not in matches:
+ matches[str_obj] = {}
+ ob_type, row_id = str_obj.split(':', 1)
+ if ob_type == 'tag': # TODO put me in object class
+ matches[str_obj]['subtype'] = 'tag'
+ matches[str_obj]['id'] = row_id
+ matches[str_obj]['icon'] = {'style': 'fas', 'icon': '\uf02b', 'color': '#28a745', 'radius': 5}
+ matches[str_obj]['link'] = ''
+ elif ob_type == 'tracker': # TODO put me in object class
+ matches[str_obj]['subtype'] = 'tracker'
+ matches[str_obj]['id'] = row_id
+ matches[str_obj]['icon'] = {'style': 'fas', 'icon': '\uf05b', 'color': '#ffc107', 'radius': 5}
+ matches[str_obj]['link'] = ''
+ else:
+ row_id = row_id.split(':', 1)
+ if len(row_id) == 2:
+ subtype = row_id[0]
+ obj_id = row_id[1]
+ else:
+ subtype = ''
+ obj_id = row_id[0]
+ matches[str_obj]['subtype'] = subtype
+ matches[str_obj]['id'] = obj_id
+ matches[str_obj]['icon'] = ail_objects.get_object_svg(ob_type, subtype, obj_id)
+ matches[str_obj]['link'] = ail_objects.get_object_link(ob_type, subtype, obj_id)
- extract(obj_id)
+ matches[str_obj]['matches'] = []
- # get_obj_correl('cve', obj_id, content)
- # r = get_tracker_match(obj_id, content)
- # print(r)
+ match = [start, end, value]
+ matches[str_obj]['matches'].append(match)
+ return matches
- print(time.time() - t0)
+
+# if __name__ == '__main__':
+# t0 = time.time()
+# obj_id = 'crawled/2022/09/15/circl.lu179c7903-5b21-452e-9f25-4b61d9934e2b'
+# obj_id = 'crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd'
+# obj_id = 'submitted/2022/09/13/submitted_ba3ee771-c91c-4f50-9d6a-8558cdac7aeb.gz'
+# # obj_id = 'tests/2021/01/01/credit_cards.gz'
+# # obj_id = 'crawled/2020/07/20/circl.luc9301321-f1b1-4d91-9082-5eb452b946c5'
+# obj_id = 'submitted/2019/09/22/97172282-e4c2-4a1e-b82c-c4fb9490a56e.gz'
+# obj_id = 'submitted/2019/09/20/4fb7f02d-1241-4ef4-b17e-80ae76038835.gz'
+# obj_id = 'crawled/2023/02/21/circl.lu1c300acb-0cbe-480f-917e-9afe3ec958e8'
+#
+# extract(obj_id)
+#
+# # get_obj_correl('cve', obj_id, content)
+# # r = get_tracker_match(obj_id, content)
+# # print(r)
+#
+# print(time.time() - t0)
diff --git a/bin/modules/CreditCards.py b/bin/modules/CreditCards.py
index 6ea60f1e..33bdebe5 100755
--- a/bin/modules/CreditCards.py
+++ b/bin/modules/CreditCards.py
@@ -64,7 +64,7 @@ class CreditCards(AbstractModule):
for card in cards:
start, end, value = card
if self.get_valid_card(value):
- extracted.append(card)
+ extracted.append([start, end, value, f'tag:{tag}'])
return extracted
def compute(self, message, r_result=False):
diff --git a/bin/modules/Iban.py b/bin/modules/Iban.py
index 83ad6b12..0d7597f2 100755
--- a/bin/modules/Iban.py
+++ b/bin/modules/Iban.py
@@ -69,8 +69,7 @@ class Iban(AbstractModule):
start, end, value = iban
value = ''.join(e for e in value if e.isalnum())
if self.is_valid_iban(value):
- print(value)
- extracted.append(iban)
+ extracted.append([start, end, value, f'tag:{tag}'])
return extracted
def compute(self, message):
diff --git a/bin/modules/Mail.py b/bin/modules/Mail.py
index 9982f903..af704599 100755
--- a/bin/modules/Mail.py
+++ b/bin/modules/Mail.py
@@ -130,7 +130,7 @@ class Mail(AbstractModule):
mxdomains[mxdomain].append(mail)
for mx in self.check_mx_record(mxdomains.keys()):
for row in mxdomains[mx]:
- extracted.append(row)
+ extracted.append([row[0], row[1], row[2], f'tag:{tag}'])
return extracted
# # TODO: sanitize mails
diff --git a/bin/modules/Onion.py b/bin/modules/Onion.py
index 15120098..e8bb5c7a 100755
--- a/bin/modules/Onion.py
+++ b/bin/modules/Onion.py
@@ -62,7 +62,7 @@ class Onion(AbstractModule):
url_unpack = crawlers.unpack_url(value)
domain = url_unpack['domain']
if crawlers.is_valid_onion_domain(domain):
- extracted.append(onion)
+ extracted.append([start, end, value, f'tag:{tag}'])
return extracted
def compute(self, message):
diff --git a/bin/modules/Tools.py b/bin/modules/Tools.py
index 7af97301..0803ec2b 100755
--- a/bin/modules/Tools.py
+++ b/bin/modules/Tools.py
@@ -409,8 +409,12 @@ class Tools(AbstractModule):
return TOOLS.keys()
def extract(self, obj_id, content, tag):
+ extracted = []
tool_name = tag.rsplit('"', 2)[1][:-5]
- return self.regex_finditer(TOOLS[tool_name]['regex'], obj_id, content)
+ tools = self.regex_finditer(TOOLS[tool_name]['regex'], obj_id, content)
+ for tool in tools:
+ extracted.append([tool[0], tool[1], tool[2], f'tag:{tag}'])
+ return extracted
def compute(self, message):
item = Item(message)
diff --git a/var/www/blueprints/objects_item.py b/var/www/blueprints/objects_item.py
index 442f8caa..67b05648 100644
--- a/var/www/blueprints/objects_item.py
+++ b/var/www/blueprints/objects_item.py
@@ -67,7 +67,7 @@ def showItem(): # # TODO: support post
abort(404)
item = Item(item_id)
- meta = item.get_meta(options=['content', 'crawler', 'duplicates', 'lines', 'size'])
+ meta = item.get_meta(options={'content', 'crawler', 'duplicates', 'lines', 'size'})
meta['name'] = meta['id'].replace('/', ' / ')
meta['father'] = item_basic.get_item_parent(item_id)
@@ -76,11 +76,13 @@ def showItem(): # # TODO: support post
meta['hive_case'] = Export.get_item_hive_cases(item_id)
extracted = module_extractor.extract(item.id, content=meta['content'])
+ extracted_matches = module_extractor.get_extracted_by_match(extracted)
return render_template("show_item.html", bootstrap_label=bootstrap_label,
- modal_add_tags=Tag.get_modal_add_tags(meta['id'], object_type='item'),
- is_hive_connected=Export.get_item_hive_cases(item_id),
- meta=meta, extracted=extracted)
+ modal_add_tags=Tag.get_modal_add_tags(meta['id'], object_type='item'),
+ is_hive_connected=Export.get_item_hive_cases(item_id),
+ meta=meta,
+ extracted=extracted, extracted_matches=extracted_matches)
# kvrocks data
diff --git a/var/www/templates/objects/item/show_item.html b/var/www/templates/objects/item/show_item.html
index 99e5f23c..18499a78 100644
--- a/var/www/templates/objects/item/show_item.html
+++ b/var/www/templates/objects/item/show_item.html
@@ -20,6 +20,9 @@