mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-22 22:27:17 +00:00
chg: [core] merge master + fix object subtype correlation stats
This commit is contained in:
commit
c5cef5fd00
30 changed files with 415 additions and 191 deletions
|
@ -267,8 +267,8 @@ function launching_scripts {
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script_AIL" -X screen -t "LibInjection" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./LibInjection.py; read x"
|
screen -S "Script_AIL" -X screen -t "LibInjection" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./LibInjection.py; read x"
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script_AIL" -X screen -t "Zerobins" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Zerobins.py; read x"
|
# screen -S "Script_AIL" -X screen -t "Pasties" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Pasties.py; read x"
|
||||||
sleep 0.1
|
# sleep 0.1
|
||||||
|
|
||||||
screen -S "Script_AIL" -X screen -t "MISP_Thehive_Auto_Push" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./MISP_Thehive_Auto_Push.py; read x"
|
screen -S "Script_AIL" -X screen -t "MISP_Thehive_Auto_Push" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./MISP_Thehive_Auto_Push.py; read x"
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
|
|
|
@ -22,6 +22,7 @@ from lib.objects.Domains import Domain
|
||||||
from lib.objects.Items import Item
|
from lib.objects.Items import Item
|
||||||
from lib.objects import Screenshots
|
from lib.objects import Screenshots
|
||||||
from lib.objects import Titles
|
from lib.objects import Titles
|
||||||
|
from trackers.Tracker_Yara import Tracker_Yara
|
||||||
|
|
||||||
logging.config.dictConfig(ail_logger.get_config(name='crawlers'))
|
logging.config.dictConfig(ail_logger.get_config(name='crawlers'))
|
||||||
|
|
||||||
|
@ -35,6 +36,8 @@ class Crawler(AbstractModule):
|
||||||
# Waiting time in seconds between to message processed
|
# Waiting time in seconds between to message processed
|
||||||
self.pending_seconds = 1
|
self.pending_seconds = 1
|
||||||
|
|
||||||
|
self.tracker_yara = Tracker_Yara(queue=False)
|
||||||
|
|
||||||
config_loader = ConfigLoader()
|
config_loader = ConfigLoader()
|
||||||
|
|
||||||
self.default_har = config_loader.get_config_boolean('Crawler', 'default_har')
|
self.default_har = config_loader.get_config_boolean('Crawler', 'default_har')
|
||||||
|
@ -284,6 +287,12 @@ class Crawler(AbstractModule):
|
||||||
if title_content:
|
if title_content:
|
||||||
title = Titles.create_title(title_content)
|
title = Titles.create_title(title_content)
|
||||||
title.add(item.get_date(), item)
|
title.add(item.get_date(), item)
|
||||||
|
# Tracker
|
||||||
|
self.tracker_yara.compute_manual(title)
|
||||||
|
if not title.is_tags_safe():
|
||||||
|
unsafe_tag = 'dark-web:topic="pornography-child-exploitation"'
|
||||||
|
self.domain.add_tag(unsafe_tag)
|
||||||
|
item.add_tag(unsafe_tag)
|
||||||
|
|
||||||
# SCREENSHOT
|
# SCREENSHOT
|
||||||
if self.screenshot:
|
if self.screenshot:
|
||||||
|
|
|
@ -124,16 +124,27 @@ class MailExporterTracker(MailExporter):
|
||||||
def __init__(self, host=None, port=None, password=None, user='', sender=''):
|
def __init__(self, host=None, port=None, password=None, user='', sender=''):
|
||||||
super().__init__(host=host, port=port, password=password, user=user, sender=sender)
|
super().__init__(host=host, port=port, password=password, user=user, sender=sender)
|
||||||
|
|
||||||
def export(self, tracker, obj): # TODO match
|
def export(self, tracker, obj, matches=[]):
|
||||||
tracker_type = tracker.get_type()
|
tracker_type = tracker.get_type()
|
||||||
tracker_name = tracker.get_tracked()
|
tracker_name = tracker.get_tracked()
|
||||||
subject = f'AIL Framework Tracker: {tracker_name}' # TODO custom subject
|
description = tracker.get_description()
|
||||||
|
if not description:
|
||||||
|
description = tracker_name
|
||||||
|
|
||||||
|
subject = f'AIL Framework Tracker: {description}'
|
||||||
body = f"AIL Framework, New occurrence for {tracker_type} tracker: {tracker_name}\n"
|
body = f"AIL Framework, New occurrence for {tracker_type} tracker: {tracker_name}\n"
|
||||||
body += f'Item: {obj.id}\nurl:{obj.get_link()}'
|
body += f'Item: {obj.id}\nurl:{obj.get_link()}'
|
||||||
|
|
||||||
# TODO match option
|
if matches:
|
||||||
# if match:
|
body += '\n'
|
||||||
# body += f'Tracker Match:\n\n{escape(match)}'
|
nb = 1
|
||||||
|
for match in matches:
|
||||||
|
body += f'\nMatch {nb}: {match[0]}\nExtract:\n{match[1]}\n\n'
|
||||||
|
nb += 1
|
||||||
|
else:
|
||||||
|
body = f"AIL Framework, New occurrence for {tracker_type} tracker: {tracker_name}\n"
|
||||||
|
body += f'Item: {obj.id}\nurl:{obj.get_link()}'
|
||||||
|
|
||||||
|
# print(body)
|
||||||
for mail in tracker.get_mails():
|
for mail in tracker.get_mails():
|
||||||
self._export(mail, subject, body)
|
self._export(mail, subject, body)
|
||||||
|
|
|
@ -31,8 +31,12 @@ class DefaultFeeder:
|
||||||
Return feeder name. first part of the item_id and display in the UI
|
Return feeder name. first part of the item_id and display in the UI
|
||||||
"""
|
"""
|
||||||
if not self.name:
|
if not self.name:
|
||||||
return self.get_source()
|
name = self.get_source()
|
||||||
return self.name
|
else:
|
||||||
|
name = self.name
|
||||||
|
if not name:
|
||||||
|
name = 'default'
|
||||||
|
return name
|
||||||
|
|
||||||
def get_source(self):
|
def get_source(self):
|
||||||
return self.json_data.get('source')
|
return self.json_data.get('source')
|
||||||
|
|
|
@ -83,6 +83,7 @@ class ConfigLoader(object):
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
# # # # Directory Config # # # #
|
# # # # Directory Config # # # #
|
||||||
|
|
||||||
config_loader = ConfigLoader()
|
config_loader = ConfigLoader()
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import logging
|
||||||
|
import logging.config
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
@ -24,11 +26,16 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
##################################
|
##################################
|
||||||
from packages import Date
|
from packages import Date
|
||||||
from lib.ail_core import get_objects_tracked, get_object_all_subtypes, get_objects_retro_hunted
|
from lib.ail_core import get_objects_tracked, get_object_all_subtypes, get_objects_retro_hunted
|
||||||
|
from lib import ail_logger
|
||||||
from lib import ConfigLoader
|
from lib import ConfigLoader
|
||||||
from lib import item_basic
|
from lib import item_basic
|
||||||
from lib import Tag
|
from lib import Tag
|
||||||
from lib.Users import User
|
from lib.Users import User
|
||||||
|
|
||||||
|
# LOGS
|
||||||
|
logging.config.dictConfig(ail_logger.get_config(name='modules'))
|
||||||
|
logger = logging.getLogger()
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
config_loader = ConfigLoader.ConfigLoader()
|
||||||
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||||
|
|
||||||
|
@ -248,7 +255,8 @@ class Tracker:
|
||||||
return self._get_field('user_id')
|
return self._get_field('user_id')
|
||||||
|
|
||||||
def webhook_export(self):
|
def webhook_export(self):
|
||||||
return r_tracker.hexists(f'tracker:{self.uuid}', 'webhook')
|
webhook = self.get_webhook()
|
||||||
|
return webhook is not None and webhook
|
||||||
|
|
||||||
def get_webhook(self):
|
def get_webhook(self):
|
||||||
return r_tracker.hget(f'tracker:{self.uuid}', 'webhook')
|
return r_tracker.hget(f'tracker:{self.uuid}', 'webhook')
|
||||||
|
@ -560,8 +568,6 @@ class Tracker:
|
||||||
os.remove(filepath)
|
os.remove(filepath)
|
||||||
|
|
||||||
# Filters
|
# Filters
|
||||||
filters = self.get_filters()
|
|
||||||
if not filters:
|
|
||||||
filters = get_objects_tracked()
|
filters = get_objects_tracked()
|
||||||
for obj_type in filters:
|
for obj_type in filters:
|
||||||
r_tracker.srem(f'trackers:objs:{tracker_type}:{obj_type}', tracked)
|
r_tracker.srem(f'trackers:objs:{tracker_type}:{obj_type}', tracked)
|
||||||
|
@ -923,7 +929,7 @@ def api_add_tracker(dict_input, user_id):
|
||||||
# Filters # TODO MOVE ME
|
# Filters # TODO MOVE ME
|
||||||
filters = dict_input.get('filters', {})
|
filters = dict_input.get('filters', {})
|
||||||
if filters:
|
if filters:
|
||||||
if filters.keys() == {'decoded', 'item', 'pgp'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
if filters.keys() == {'decoded', 'item', 'pgp', 'title'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
||||||
filters = {}
|
filters = {}
|
||||||
for obj_type in filters:
|
for obj_type in filters:
|
||||||
if obj_type not in get_objects_tracked():
|
if obj_type not in get_objects_tracked():
|
||||||
|
@ -998,7 +1004,7 @@ def api_edit_tracker(dict_input, user_id):
|
||||||
# Filters # TODO MOVE ME
|
# Filters # TODO MOVE ME
|
||||||
filters = dict_input.get('filters', {})
|
filters = dict_input.get('filters', {})
|
||||||
if filters:
|
if filters:
|
||||||
if filters.keys() == {'decoded', 'item', 'pgp'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
if filters.keys() == {'decoded', 'item', 'pgp', 'title'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
||||||
if not filters['decoded'] and not filters['item']:
|
if not filters['decoded'] and not filters['item']:
|
||||||
filters = {}
|
filters = {}
|
||||||
for obj_type in filters:
|
for obj_type in filters:
|
||||||
|
@ -1151,7 +1157,11 @@ def get_tracked_yara_rules():
|
||||||
for obj_type in get_objects_tracked():
|
for obj_type in get_objects_tracked():
|
||||||
rules = {}
|
rules = {}
|
||||||
for tracked in _get_tracked_by_obj_type('yara', obj_type):
|
for tracked in _get_tracked_by_obj_type('yara', obj_type):
|
||||||
rules[tracked] = os.path.join(get_yara_rules_dir(), tracked)
|
rule = os.path.join(get_yara_rules_dir(), tracked)
|
||||||
|
if not os.path.exists(rule):
|
||||||
|
logger.critical(f"Yara rule don't exists {tracked} : {obj_type}")
|
||||||
|
else:
|
||||||
|
rules[tracked] = rule
|
||||||
to_track[obj_type] = yara.compile(filepaths=rules)
|
to_track[obj_type] = yara.compile(filepaths=rules)
|
||||||
print(to_track)
|
print(to_track)
|
||||||
return to_track
|
return to_track
|
||||||
|
|
|
@ -52,7 +52,7 @@ def get_object_all_subtypes(obj_type):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def get_objects_tracked():
|
def get_objects_tracked():
|
||||||
return ['decoded', 'item', 'pgp']
|
return ['decoded', 'item', 'pgp', 'title']
|
||||||
|
|
||||||
def get_objects_retro_hunted():
|
def get_objects_retro_hunted():
|
||||||
return ['decoded', 'item']
|
return ['decoded', 'item']
|
||||||
|
|
|
@ -234,7 +234,9 @@ def extract_title_from_html(html):
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
title = soup.title
|
title = soup.title
|
||||||
if title:
|
if title:
|
||||||
return str(title.string)
|
title = title.string
|
||||||
|
if title:
|
||||||
|
return str(title)
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def extract_description_from_html(html):
|
def extract_description_from_html(html):
|
||||||
|
@ -1690,6 +1692,19 @@ def api_add_crawler_task(data, user_id=None):
|
||||||
return {'error': 'The access to this cookiejar is restricted'}, 403
|
return {'error': 'The access to this cookiejar is restricted'}, 403
|
||||||
cookiejar_uuid = cookiejar.uuid
|
cookiejar_uuid = cookiejar.uuid
|
||||||
|
|
||||||
|
cookies = data.get('cookies', None)
|
||||||
|
if not cookiejar_uuid and cookies:
|
||||||
|
# Create new cookiejar
|
||||||
|
cookiejar_uuid = create_cookiejar(user_id, "single-shot cookiejar", 1, None)
|
||||||
|
cookiejar = Cookiejar(cookiejar_uuid)
|
||||||
|
for cookie in cookies:
|
||||||
|
try:
|
||||||
|
name = cookie.get('name')
|
||||||
|
value = cookie.get('value')
|
||||||
|
cookiejar.add_cookie(name, value, None, None, None, None, None)
|
||||||
|
except KeyError:
|
||||||
|
return {'error': 'Invalid cookie key, please submit a valid JSON', 'cookiejar_uuid': cookiejar_uuid}, 400
|
||||||
|
|
||||||
frequency = data.get('frequency', None)
|
frequency = data.get('frequency', None)
|
||||||
if frequency:
|
if frequency:
|
||||||
if frequency not in ['monthly', 'weekly', 'daily', 'hourly']:
|
if frequency not in ['monthly', 'weekly', 'daily', 'hourly']:
|
||||||
|
@ -2010,7 +2025,7 @@ def test_ail_crawlers():
|
||||||
# TODO MOVE ME IN CRAWLER OR FLASK
|
# TODO MOVE ME IN CRAWLER OR FLASK
|
||||||
load_blacklist()
|
load_blacklist()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
# if __name__ == '__main__':
|
||||||
# delete_captures()
|
# delete_captures()
|
||||||
|
|
||||||
# item_id = 'crawled/2023/02/20/data.gz'
|
# item_id = 'crawled/2023/02/20/data.gz'
|
||||||
|
@ -2022,4 +2037,4 @@ if __name__ == '__main__':
|
||||||
# _reprocess_all_hars_cookie_name()
|
# _reprocess_all_hars_cookie_name()
|
||||||
# _reprocess_all_hars_etag()
|
# _reprocess_all_hars_etag()
|
||||||
# _gzip_all_hars()
|
# _gzip_all_hars()
|
||||||
_reprocess_all_hars_hhhashs()
|
# _reprocess_all_hars_hhhashs()
|
||||||
|
|
|
@ -204,15 +204,22 @@ def _get_dir_source_name(directory, source_name=None, l_sources_name=set(), filt
|
||||||
if not l_sources_name:
|
if not l_sources_name:
|
||||||
l_sources_name = set()
|
l_sources_name = set()
|
||||||
if source_name:
|
if source_name:
|
||||||
|
path = os.path.join(directory, source_name)
|
||||||
|
if os.path.isdir(path):
|
||||||
l_dir = os.listdir(os.path.join(directory, source_name))
|
l_dir = os.listdir(os.path.join(directory, source_name))
|
||||||
|
else:
|
||||||
|
l_dir = []
|
||||||
else:
|
else:
|
||||||
l_dir = os.listdir(directory)
|
l_dir = os.listdir(directory)
|
||||||
# empty directory
|
# empty directory
|
||||||
if not l_dir:
|
if not l_dir:
|
||||||
|
if source_name:
|
||||||
return l_sources_name.add(source_name)
|
return l_sources_name.add(source_name)
|
||||||
|
else:
|
||||||
|
return l_sources_name
|
||||||
else:
|
else:
|
||||||
for src_name in l_dir:
|
for src_name in l_dir:
|
||||||
if len(src_name) == 4:
|
if len(src_name) == 4 and source_name:
|
||||||
# try:
|
# try:
|
||||||
int(src_name)
|
int(src_name)
|
||||||
to_add = os.path.join(source_name)
|
to_add = os.path.join(source_name)
|
||||||
|
|
|
@ -85,9 +85,6 @@ class CookieName(AbstractDaterangeObject):
|
||||||
meta['content'] = self.get_content()
|
meta['content'] = self.get_content()
|
||||||
return meta
|
return meta
|
||||||
|
|
||||||
def add(self, date, obj_id): # date = HAR Date
|
|
||||||
self._add(date, 'domain', '', obj_id)
|
|
||||||
|
|
||||||
def create(self, content, _first_seen=None, _last_seen=None):
|
def create(self, content, _first_seen=None, _last_seen=None):
|
||||||
if not isinstance(content, str):
|
if not isinstance(content, str):
|
||||||
content = content.decode()
|
content = content.decode()
|
||||||
|
|
|
@ -79,9 +79,6 @@ class Cve(AbstractDaterangeObject):
|
||||||
meta['tags'] = self.get_tags(r_list=True)
|
meta['tags'] = self.get_tags(r_list=True)
|
||||||
return meta
|
return meta
|
||||||
|
|
||||||
def add(self, date, item_id):
|
|
||||||
self._add(date, 'item', '', item_id)
|
|
||||||
|
|
||||||
def get_cve_search(self):
|
def get_cve_search(self):
|
||||||
try:
|
try:
|
||||||
response = requests.get(f'https://cvepremium.circl.lu/api/cve/{self.id}', timeout=10)
|
response = requests.get(f'https://cvepremium.circl.lu/api/cve/{self.id}', timeout=10)
|
||||||
|
|
|
@ -239,8 +239,8 @@ class Decoded(AbstractDaterangeObject):
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def add(self, algo_name, date, obj_id, mimetype=None):
|
def add(self, date, obj, algo_name, mimetype=None):
|
||||||
self._add(date, 'item', '', obj_id)
|
self._add(date, obj)
|
||||||
if not mimetype:
|
if not mimetype:
|
||||||
mimetype = self.get_mimetype()
|
mimetype = self.get_mimetype()
|
||||||
|
|
||||||
|
@ -460,7 +460,7 @@ def get_all_decodeds_objects(filters={}):
|
||||||
############################################################################
|
############################################################################
|
||||||
|
|
||||||
def sanityze_decoder_names(decoder_name):
|
def sanityze_decoder_names(decoder_name):
|
||||||
if decoder_name not in Decodeds.get_algos():
|
if decoder_name not in get_algos():
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
return decoder_name
|
return decoder_name
|
||||||
|
|
|
@ -85,9 +85,6 @@ class Etag(AbstractDaterangeObject):
|
||||||
meta['content'] = self.get_content()
|
meta['content'] = self.get_content()
|
||||||
return meta
|
return meta
|
||||||
|
|
||||||
def add(self, date, obj_id): # date = HAR Date
|
|
||||||
self._add(date, 'domain', '', obj_id)
|
|
||||||
|
|
||||||
def create(self, content, _first_seen=None, _last_seen=None):
|
def create(self, content, _first_seen=None, _last_seen=None):
|
||||||
if not isinstance(content, str):
|
if not isinstance(content, str):
|
||||||
content = content.decode()
|
content = content.decode()
|
||||||
|
|
|
@ -86,9 +86,6 @@ class Favicon(AbstractDaterangeObject):
|
||||||
# def get_links(self):
|
# def get_links(self):
|
||||||
# # TODO GET ALL URLS FROM CORRELATED ITEMS
|
# # TODO GET ALL URLS FROM CORRELATED ITEMS
|
||||||
|
|
||||||
def add(self, date, obj_id): # TODO correlation base 64 -> calc md5
|
|
||||||
self._add(date, 'domain', '', obj_id)
|
|
||||||
|
|
||||||
def create(self, content, _first_seen=None, _last_seen=None):
|
def create(self, content, _first_seen=None, _last_seen=None):
|
||||||
if not isinstance(content, str):
|
if not isinstance(content, str):
|
||||||
content = content.decode()
|
content = content.decode()
|
||||||
|
|
|
@ -86,9 +86,6 @@ class HHHash(AbstractDaterangeObject):
|
||||||
meta['content'] = self.get_content()
|
meta['content'] = self.get_content()
|
||||||
return meta
|
return meta
|
||||||
|
|
||||||
def add(self, date, obj_id): # date = HAR Date
|
|
||||||
self._add(date, 'domain', '', obj_id)
|
|
||||||
|
|
||||||
def create(self, hhhash_header, _first_seen=None, _last_seen=None): # TODO CREATE ADD FUNCTION -> urls set
|
def create(self, hhhash_header, _first_seen=None, _last_seen=None): # TODO CREATE ADD FUNCTION -> urls set
|
||||||
self._set_field('content', hhhash_header)
|
self._set_field('content', hhhash_header)
|
||||||
self._create()
|
self._create()
|
||||||
|
|
|
@ -175,7 +175,7 @@ class Message(AbstractObject):
|
||||||
if options is None:
|
if options is None:
|
||||||
options = set()
|
options = set()
|
||||||
meta = self.get_default_meta(tags=True)
|
meta = self.get_default_meta(tags=True)
|
||||||
meta['date'] = self.get_date() # TODO replace me by timestamp ??????
|
meta['date'] = self.get_date()
|
||||||
meta['source'] = self.get_source()
|
meta['source'] = self.get_source()
|
||||||
# optional meta fields
|
# optional meta fields
|
||||||
if 'content' in options:
|
if 'content' in options:
|
||||||
|
|
|
@ -45,6 +45,8 @@ class Title(AbstractDaterangeObject):
|
||||||
def get_content(self, r_type='str'):
|
def get_content(self, r_type='str'):
|
||||||
if r_type == 'str':
|
if r_type == 'str':
|
||||||
return self._get_field('content')
|
return self._get_field('content')
|
||||||
|
elif r_type == 'bytes':
|
||||||
|
return self._get_field('content').encode()
|
||||||
|
|
||||||
def get_link(self, flask_context=False):
|
def get_link(self, flask_context=False):
|
||||||
if flask_context:
|
if flask_context:
|
||||||
|
@ -82,9 +84,6 @@ class Title(AbstractDaterangeObject):
|
||||||
meta['content'] = self.get_content()
|
meta['content'] = self.get_content()
|
||||||
return meta
|
return meta
|
||||||
|
|
||||||
def add(self, date, item_id):
|
|
||||||
self._add(date, 'item', '', item_id)
|
|
||||||
|
|
||||||
def create(self, content, _first_seen=None, _last_seen=None):
|
def create(self, content, _first_seen=None, _last_seen=None):
|
||||||
self._set_field('content', content)
|
self._set_field('content', content)
|
||||||
self._create()
|
self._create()
|
||||||
|
@ -122,4 +121,3 @@ class Titles(AbstractDaterangeObjects):
|
||||||
# # print(r)
|
# # print(r)
|
||||||
# r = titles.search_by_id('f7d57B', r_pos=True, case_sensitive=False)
|
# r = titles.search_by_id('f7d57B', r_pos=True, case_sensitive=False)
|
||||||
# print(r)
|
# print(r)
|
||||||
|
|
||||||
|
|
|
@ -125,9 +125,7 @@ class AbstractDaterangeObject(AbstractObject, ABC):
|
||||||
def _add_create(self):
|
def _add_create(self):
|
||||||
r_object.sadd(f'{self.type}:all', self.id)
|
r_object.sadd(f'{self.type}:all', self.id)
|
||||||
|
|
||||||
# TODO don't increase nb if same hash in item with different encoding
|
def _add(self, date, obj):
|
||||||
# if hash already in item
|
|
||||||
def _add(self, date, obj_type, subtype, obj_id):
|
|
||||||
if not self.exists():
|
if not self.exists():
|
||||||
self._add_create()
|
self._add_create()
|
||||||
self.set_first_seen(date)
|
self.set_first_seen(date)
|
||||||
|
@ -136,26 +134,22 @@ class AbstractDaterangeObject(AbstractObject, ABC):
|
||||||
self.update_daterange(date)
|
self.update_daterange(date)
|
||||||
update_obj_date(date, self.type)
|
update_obj_date(date, self.type)
|
||||||
|
|
||||||
if obj_type == 'item':
|
if obj:
|
||||||
# NB Object seen by day TODO
|
# Correlations
|
||||||
if not self.is_correlated(obj_type, subtype, obj_id): # nb seen by day
|
self.add_correlation(obj.type, obj.get_subtype(r_str=True), obj.get_id())
|
||||||
|
|
||||||
|
# Stats NB by day: # TODO Don't increase on reprocess
|
||||||
r_object.zincrby(f'{self.type}:date:{date}', 1, self.id)
|
r_object.zincrby(f'{self.type}:date:{date}', 1, self.id)
|
||||||
|
|
||||||
# Correlations
|
if obj.type == 'item':
|
||||||
self.add_correlation(obj_type, subtype, obj_id)
|
item_id = obj.get_id()
|
||||||
|
# domain
|
||||||
if is_crawled(obj_id): # Domain
|
if is_crawled(item_id):
|
||||||
domain = get_item_domain(obj_id)
|
domain = get_item_domain(item_id)
|
||||||
self.add_correlation('domain', '', domain)
|
self.add_correlation('domain', '', domain)
|
||||||
else:
|
|
||||||
# Correlations
|
|
||||||
self.add_correlation(obj_type, subtype, obj_id)
|
|
||||||
|
|
||||||
# TODO Don't increase on reprocess
|
def add(self, date, obj):
|
||||||
r_object.zincrby(f'{self.type}:date:{date}', 1, self.id)
|
self._add(date, obj)
|
||||||
# r_object.zincrby(f'{self.type}:obj:{obj_type}', 1, self.id)
|
|
||||||
# 1 Domain by day / 1 HAR by day
|
|
||||||
# Domain check / file created -> issue with scheduler
|
|
||||||
|
|
||||||
# TODO:ADD objects + Stats
|
# TODO:ADD objects + Stats
|
||||||
def _create(self, first_seen=None, last_seen=None):
|
def _create(self, first_seen=None, last_seen=None):
|
||||||
|
|
|
@ -113,6 +113,34 @@ def regex_finditer(r_key, regex, item_id, content, max_time=30):
|
||||||
proc.terminate()
|
proc.terminate()
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
|
def _regex_match(r_key, regex, content):
|
||||||
|
if re.match(regex, content):
|
||||||
|
r_serv_cache.set(r_key, 1)
|
||||||
|
r_serv_cache.expire(r_key, 360)
|
||||||
|
|
||||||
|
def regex_match(r_key, regex, item_id, content, max_time=30):
|
||||||
|
proc = Proc(target=_regex_match, args=(r_key, regex, content))
|
||||||
|
try:
|
||||||
|
proc.start()
|
||||||
|
proc.join(max_time)
|
||||||
|
if proc.is_alive():
|
||||||
|
proc.terminate()
|
||||||
|
# Statistics.incr_module_timeout_statistic(r_key)
|
||||||
|
err_mess = f"{r_key}: processing timeout: {item_id}"
|
||||||
|
logger.info(err_mess)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
if r_serv_cache.exists(r_key):
|
||||||
|
r_serv_cache.delete(r_key)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
r_serv_cache.delete(r_key)
|
||||||
|
return False
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("Caught KeyboardInterrupt, terminating regex worker")
|
||||||
|
proc.terminate()
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
def _regex_search(r_key, regex, content):
|
def _regex_search(r_key, regex, content):
|
||||||
if re.search(regex, content):
|
if re.search(regex, content):
|
||||||
r_serv_cache.set(r_key, 1)
|
r_serv_cache.set(r_key, 1)
|
||||||
|
|
|
@ -54,7 +54,7 @@ class CveModule(AbstractModule):
|
||||||
date = item.get_date()
|
date = item.get_date()
|
||||||
for cve_id in cves:
|
for cve_id in cves:
|
||||||
cve = Cves.Cve(cve_id)
|
cve = Cves.Cve(cve_id)
|
||||||
cve.add(date, item_id)
|
cve.add(date, item)
|
||||||
|
|
||||||
warning = f'{item_id} contains CVEs {cves}'
|
warning = f'{item_id} contains CVEs {cves}'
|
||||||
print(warning)
|
print(warning)
|
||||||
|
|
|
@ -21,7 +21,6 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
##################################
|
##################################
|
||||||
from modules.abstract_module import AbstractModule
|
from modules.abstract_module import AbstractModule
|
||||||
from lib.ConfigLoader import ConfigLoader
|
from lib.ConfigLoader import ConfigLoader
|
||||||
from lib.objects.Items import Item
|
|
||||||
from lib.objects.Decodeds import Decoded
|
from lib.objects.Decodeds import Decoded
|
||||||
from trackers.Tracker_Term import Tracker_Term
|
from trackers.Tracker_Term import Tracker_Term
|
||||||
from trackers.Tracker_Regex import Tracker_Regex
|
from trackers.Tracker_Regex import Tracker_Regex
|
||||||
|
@ -87,17 +86,16 @@ class Decoder(AbstractModule):
|
||||||
self.logger.info(f'Module {self.module_name} initialized')
|
self.logger.info(f'Module {self.module_name} initialized')
|
||||||
|
|
||||||
def compute(self, message):
|
def compute(self, message):
|
||||||
item = self.get_obj()
|
content = self.obj.get_content()
|
||||||
content = item.get_content()
|
date = self.obj.get_date()
|
||||||
date = item.get_date()
|
|
||||||
new_decodeds = []
|
new_decodeds = []
|
||||||
|
|
||||||
for decoder in self.decoder_order:
|
for decoder in self.decoder_order:
|
||||||
find = False
|
find = False
|
||||||
dname = decoder['name']
|
dname = decoder['name']
|
||||||
|
|
||||||
encodeds = self.regex_findall(decoder['regex'], item.id, content)
|
encodeds = self.regex_findall(decoder['regex'], self.obj.id, content)
|
||||||
# PERF remove encoded from item content
|
# PERF remove encoded from obj content
|
||||||
for encoded in encodeds:
|
for encoded in encodeds:
|
||||||
content = content.replace(encoded, '', 1)
|
content = content.replace(encoded, '', 1)
|
||||||
encodeds = set(encodeds)
|
encodeds = set(encodeds)
|
||||||
|
@ -113,19 +111,19 @@ class Decoder(AbstractModule):
|
||||||
if not decoded.exists():
|
if not decoded.exists():
|
||||||
mimetype = decoded.guess_mimetype(decoded_file)
|
mimetype = decoded.guess_mimetype(decoded_file)
|
||||||
if not mimetype:
|
if not mimetype:
|
||||||
print(sha1_string, item.id)
|
print(sha1_string, self.obj.id)
|
||||||
raise Exception(f'Invalid mimetype: {decoded.id} {item.id}')
|
raise Exception(f'Invalid mimetype: {decoded.id} {self.obj.id}')
|
||||||
decoded.save_file(decoded_file, mimetype)
|
decoded.save_file(decoded_file, mimetype)
|
||||||
new_decodeds.append(decoded.id)
|
new_decodeds.append(decoded.id)
|
||||||
else:
|
else:
|
||||||
mimetype = decoded.get_mimetype()
|
mimetype = decoded.get_mimetype()
|
||||||
decoded.add(dname, date, item.id, mimetype=mimetype)
|
decoded.add(date, self.obj, dname, mimetype=mimetype)
|
||||||
|
|
||||||
# new_decodeds.append(decoded.id)
|
# new_decodeds.append(decoded.id)
|
||||||
self.logger.info(f'{item.id} : {dname} - {decoded.id} - {mimetype}')
|
self.logger.info(f'{self.obj.id} : {dname} - {decoded.id} - {mimetype}')
|
||||||
|
|
||||||
if find:
|
if find:
|
||||||
self.logger.info(f'{item.id} - {dname}')
|
self.logger.info(f'{self.obj.id} - {dname}')
|
||||||
|
|
||||||
# Send to Tags
|
# Send to Tags
|
||||||
tag = f'infoleak:automatic-detection="{dname}"'
|
tag = f'infoleak:automatic-detection="{dname}"'
|
||||||
|
@ -134,12 +132,13 @@ class Decoder(AbstractModule):
|
||||||
####################
|
####################
|
||||||
# TRACKERS DECODED
|
# TRACKERS DECODED
|
||||||
for decoded_id in new_decodeds:
|
for decoded_id in new_decodeds:
|
||||||
|
decoded = Decoded(decoded_id)
|
||||||
try:
|
try:
|
||||||
self.tracker_term.compute(decoded_id, obj_type='decoded')
|
self.tracker_term.compute_manual(decoded)
|
||||||
self.tracker_regex.compute(decoded_id, obj_type='decoded')
|
self.tracker_regex.compute_manual(decoded)
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
pass
|
pass
|
||||||
self.tracker_yara.compute(decoded_id, obj_type='decoded')
|
self.tracker_yara.compute_manual(decoded)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
144
bin/modules/Pasties.py
Executable file
144
bin/modules/Pasties.py
Executable file
|
@ -0,0 +1,144 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
"""
|
||||||
|
The Pasties Module
|
||||||
|
======================
|
||||||
|
This module spots domain-pasties services for further processing
|
||||||
|
"""
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import External packages
|
||||||
|
##################################
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
from pyfaup.faup import Faup
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from modules.abstract_module import AbstractModule
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
from lib import crawlers
|
||||||
|
|
||||||
|
# TODO add url validator
|
||||||
|
|
||||||
|
pasties_blocklist_urls = set()
|
||||||
|
pasties_domains = {}
|
||||||
|
|
||||||
|
class Pasties(AbstractModule):
|
||||||
|
"""
|
||||||
|
Pasties module for AIL framework
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(Pasties, self).__init__()
|
||||||
|
self.faup = Faup()
|
||||||
|
|
||||||
|
config_loader = ConfigLoader()
|
||||||
|
self.r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||||
|
|
||||||
|
self.pasties = {}
|
||||||
|
self.urls_blocklist = set()
|
||||||
|
self.load_pasties_domains()
|
||||||
|
|
||||||
|
# Send module state to logs
|
||||||
|
self.logger.info(f'Module {self.module_name} initialized')
|
||||||
|
|
||||||
|
def load_pasties_domains(self):
|
||||||
|
self.pasties = {}
|
||||||
|
self.urls_blocklist = set()
|
||||||
|
|
||||||
|
domains_pasties = os.path.join(os.environ['AIL_HOME'], 'files/domains_pasties')
|
||||||
|
if os.path.exists(domains_pasties):
|
||||||
|
with open(domains_pasties) as f:
|
||||||
|
for line in f:
|
||||||
|
url = line.strip()
|
||||||
|
if url: # TODO validate line
|
||||||
|
self.faup.decode(url)
|
||||||
|
url_decoded = self.faup.get()
|
||||||
|
host = url_decoded['host']
|
||||||
|
# if url_decoded.get('port', ''):
|
||||||
|
# host = f'{host}:{url_decoded["port"]}'
|
||||||
|
path = url_decoded.get('resource_path', '')
|
||||||
|
# print(url_decoded)
|
||||||
|
if path and path != '/':
|
||||||
|
if path[-1] != '/':
|
||||||
|
path = f'{path}/'
|
||||||
|
else:
|
||||||
|
path = None
|
||||||
|
|
||||||
|
if host in self.pasties:
|
||||||
|
if path:
|
||||||
|
self.pasties[host].add(path)
|
||||||
|
else:
|
||||||
|
if path:
|
||||||
|
self.pasties[host] = {path}
|
||||||
|
else:
|
||||||
|
self.pasties[host] = set()
|
||||||
|
|
||||||
|
url_blocklist = os.path.join(os.environ['AIL_HOME'], 'files/domains_pasties_blacklist')
|
||||||
|
if os.path.exists(url_blocklist):
|
||||||
|
with open(url_blocklist) as f:
|
||||||
|
for line in f:
|
||||||
|
url = line.strip()
|
||||||
|
self.faup.decode(url)
|
||||||
|
url_decoded = self.faup.get()
|
||||||
|
host = url_decoded['host']
|
||||||
|
# if url_decoded.get('port', ''):
|
||||||
|
# host = f'{host}:{url_decoded["port"]}'
|
||||||
|
path = url_decoded.get('resource_path', '')
|
||||||
|
url = f'{host}{path}'
|
||||||
|
if url_decoded['query_string']:
|
||||||
|
url = url + url_decoded['query_string']
|
||||||
|
self.urls_blocklist.add(url)
|
||||||
|
|
||||||
|
def send_to_crawler(self, url, obj_id):
|
||||||
|
if not self.r_cache.exists(f'{self.module_name}:url:{url}'):
|
||||||
|
self.r_cache.set(f'{self.module_name}:url:{url}', int(time.time()))
|
||||||
|
self.r_cache.expire(f'{self.module_name}:url:{url}', 86400)
|
||||||
|
crawlers.create_task(url, depth=0, har=False, screenshot=False, proxy='force_tor', priority=60, parent=obj_id)
|
||||||
|
|
||||||
|
def compute(self, message):
|
||||||
|
url = message.split()
|
||||||
|
|
||||||
|
self.faup.decode(url)
|
||||||
|
url_decoded = self.faup.get()
|
||||||
|
# print(url_decoded)
|
||||||
|
url_host = url_decoded['host']
|
||||||
|
# if url_decoded.get('port', ''):
|
||||||
|
# url_host = f'{url_host}:{url_decoded["port"]}'
|
||||||
|
path = url_decoded.get('resource_path', '')
|
||||||
|
if url_host in self.pasties:
|
||||||
|
if url.startswith('http://'):
|
||||||
|
if url[7:] in self.urls_blocklist:
|
||||||
|
return None
|
||||||
|
elif url.startswith('https://'):
|
||||||
|
if url[8:] in self.urls_blocklist:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
if url in self.urls_blocklist:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not self.pasties[url_host]:
|
||||||
|
if path and path != '/':
|
||||||
|
print('send to crawler', url_host, url)
|
||||||
|
self.send_to_crawler(url, self.obj.id)
|
||||||
|
else:
|
||||||
|
if path.endswith('/'):
|
||||||
|
path_end = path[:-1]
|
||||||
|
else:
|
||||||
|
path_end = f'{path}/'
|
||||||
|
for url_path in self.pasties[url_host]:
|
||||||
|
if path.startswith(url_path):
|
||||||
|
if url_path != path and url_path != path_end:
|
||||||
|
print('send to crawler', url_path, url)
|
||||||
|
self.send_to_crawler(url, self.obj.id))
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
module = Pasties()
|
||||||
|
module.run()
|
|
@ -24,7 +24,6 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
##################################
|
##################################
|
||||||
from modules.abstract_module import AbstractModule
|
from modules.abstract_module import AbstractModule
|
||||||
from lib.objects import Pgps
|
from lib.objects import Pgps
|
||||||
from lib.objects.Items import Item
|
|
||||||
from trackers.Tracker_Term import Tracker_Term
|
from trackers.Tracker_Term import Tracker_Term
|
||||||
from trackers.Tracker_Regex import Tracker_Regex
|
from trackers.Tracker_Regex import Tracker_Regex
|
||||||
from trackers.Tracker_Yara import Tracker_Yara
|
from trackers.Tracker_Yara import Tracker_Yara
|
||||||
|
@ -61,7 +60,6 @@ class PgpDump(AbstractModule):
|
||||||
self.tracker_yara = Tracker_Yara(queue=False)
|
self.tracker_yara = Tracker_Yara(queue=False)
|
||||||
|
|
||||||
# init
|
# init
|
||||||
self.item_id = None
|
|
||||||
self.keys = set()
|
self.keys = set()
|
||||||
self.private_keys = set()
|
self.private_keys = set()
|
||||||
self.names = set()
|
self.names = set()
|
||||||
|
@ -93,11 +91,11 @@ class PgpDump(AbstractModule):
|
||||||
print()
|
print()
|
||||||
pgp_block = self.remove_html(pgp_block)
|
pgp_block = self.remove_html(pgp_block)
|
||||||
# Remove Version
|
# Remove Version
|
||||||
versions = self.regex_findall(self.reg_tool_version, self.item_id, pgp_block)
|
versions = self.regex_findall(self.reg_tool_version, self.obj.id, pgp_block)
|
||||||
for version in versions:
|
for version in versions:
|
||||||
pgp_block = pgp_block.replace(version, '')
|
pgp_block = pgp_block.replace(version, '')
|
||||||
# Remove Comment
|
# Remove Comment
|
||||||
comments = self.regex_findall(self.reg_block_comment, self.item_id, pgp_block)
|
comments = self.regex_findall(self.reg_block_comment, self.obj.id, pgp_block)
|
||||||
for comment in comments:
|
for comment in comments:
|
||||||
pgp_block = pgp_block.replace(comment, '')
|
pgp_block = pgp_block.replace(comment, '')
|
||||||
# Remove Empty Lines
|
# Remove Empty Lines
|
||||||
|
@ -130,7 +128,7 @@ class PgpDump(AbstractModule):
|
||||||
try:
|
try:
|
||||||
output = output.decode()
|
output = output.decode()
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
self.logger.error(f'Error PgpDump UnicodeDecodeError: {self.item_id}')
|
self.logger.error(f'Error PgpDump UnicodeDecodeError: {self.obj.id}')
|
||||||
output = ''
|
output = ''
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
@ -145,7 +143,7 @@ class PgpDump(AbstractModule):
|
||||||
private = True
|
private = True
|
||||||
else:
|
else:
|
||||||
private = False
|
private = False
|
||||||
users = self.regex_findall(self.reg_user_id, self.item_id, pgpdump_output)
|
users = self.regex_findall(self.reg_user_id, self.obj.id, pgpdump_output)
|
||||||
for user in users:
|
for user in users:
|
||||||
# avoid key injection in user_id:
|
# avoid key injection in user_id:
|
||||||
pgpdump_output.replace(user, '', 1)
|
pgpdump_output.replace(user, '', 1)
|
||||||
|
@ -159,7 +157,7 @@ class PgpDump(AbstractModule):
|
||||||
name = user
|
name = user
|
||||||
self.names.add(name)
|
self.names.add(name)
|
||||||
|
|
||||||
keys = self.regex_findall(self.reg_key_id, self.item_id, pgpdump_output)
|
keys = self.regex_findall(self.reg_key_id, self.obj.id, pgpdump_output)
|
||||||
for key_id in keys:
|
for key_id in keys:
|
||||||
key_id = key_id.replace('Key ID - ', '', 1)
|
key_id = key_id.replace('Key ID - ', '', 1)
|
||||||
if key_id != '0x0000000000000000':
|
if key_id != '0x0000000000000000':
|
||||||
|
@ -171,28 +169,26 @@ class PgpDump(AbstractModule):
|
||||||
print('symmetrically encrypted')
|
print('symmetrically encrypted')
|
||||||
|
|
||||||
def compute(self, message):
|
def compute(self, message):
|
||||||
item = self.get_obj()
|
content = self.obj.get_content()
|
||||||
self.item_id = item.get_id()
|
|
||||||
content = item.get_content()
|
|
||||||
|
|
||||||
pgp_blocks = []
|
pgp_blocks = []
|
||||||
# Public Block
|
# Public Block
|
||||||
for pgp_block in self.regex_findall(self.reg_pgp_public_blocs, self.item_id, content):
|
for pgp_block in self.regex_findall(self.reg_pgp_public_blocs, self.obj.id, content):
|
||||||
# content = content.replace(pgp_block, '')
|
# content = content.replace(pgp_block, '')
|
||||||
pgp_block = self.sanitize_pgp_block(pgp_block)
|
pgp_block = self.sanitize_pgp_block(pgp_block)
|
||||||
pgp_blocks.append(pgp_block)
|
pgp_blocks.append(pgp_block)
|
||||||
# Private Block
|
# Private Block
|
||||||
for pgp_block in self.regex_findall(self.reg_pgp_private_blocs, self.item_id, content):
|
for pgp_block in self.regex_findall(self.reg_pgp_private_blocs, self.obj.id, content):
|
||||||
# content = content.replace(pgp_block, '')
|
# content = content.replace(pgp_block, '')
|
||||||
pgp_block = self.sanitize_pgp_block(pgp_block)
|
pgp_block = self.sanitize_pgp_block(pgp_block)
|
||||||
pgp_blocks.append(pgp_block)
|
pgp_blocks.append(pgp_block)
|
||||||
# Signature
|
# Signature
|
||||||
for pgp_block in self.regex_findall(self.reg_pgp_signature, self.item_id, content):
|
for pgp_block in self.regex_findall(self.reg_pgp_signature, self.obj.id, content):
|
||||||
# content = content.replace(pgp_block, '')
|
# content = content.replace(pgp_block, '')
|
||||||
pgp_block = self.sanitize_pgp_block(pgp_block)
|
pgp_block = self.sanitize_pgp_block(pgp_block)
|
||||||
pgp_blocks.append(pgp_block)
|
pgp_blocks.append(pgp_block)
|
||||||
# Message
|
# Message
|
||||||
for pgp_block in self.regex_findall(self.reg_pgp_message, self.item_id, content):
|
for pgp_block in self.regex_findall(self.reg_pgp_message, self.obj.id, content):
|
||||||
pgp_block = self.sanitize_pgp_block(pgp_block)
|
pgp_block = self.sanitize_pgp_block(pgp_block)
|
||||||
pgp_blocks.append(pgp_block)
|
pgp_blocks.append(pgp_block)
|
||||||
|
|
||||||
|
@ -206,26 +202,26 @@ class PgpDump(AbstractModule):
|
||||||
self.extract_id_from_pgpdump_output(pgpdump_output)
|
self.extract_id_from_pgpdump_output(pgpdump_output)
|
||||||
|
|
||||||
if self.keys or self.names or self.mails:
|
if self.keys or self.names or self.mails:
|
||||||
print(self.item_id)
|
print(self.obj.id)
|
||||||
date = item.get_date()
|
date = self.obj.get_date()
|
||||||
for key in self.keys:
|
for key in self.keys:
|
||||||
pgp = Pgps.Pgp(key, 'key')
|
pgp = Pgps.Pgp(key, 'key')
|
||||||
pgp.add(date, item)
|
pgp.add(date, self.obj)
|
||||||
print(f' key: {key}')
|
print(f' key: {key}')
|
||||||
for name in self.names:
|
for name in self.names:
|
||||||
pgp = Pgps.Pgp(name, 'name')
|
pgp = Pgps.Pgp(name, 'name')
|
||||||
pgp.add(date, item)
|
pgp.add(date, self.obj)
|
||||||
print(f' name: {name}')
|
print(f' name: {name}')
|
||||||
self.tracker_term.compute(name, obj_type='pgp', subtype='name')
|
self.tracker_term.compute_manual(pgp)
|
||||||
self.tracker_regex.compute(name, obj_type='pgp', subtype='name')
|
self.tracker_regex.compute_manual(pgp)
|
||||||
self.tracker_yara.compute(name, obj_type='pgp', subtype='name')
|
self.tracker_yara.compute_manual(pgp)
|
||||||
for mail in self.mails:
|
for mail in self.mails:
|
||||||
pgp = Pgps.Pgp(mail, 'mail')
|
pgp = Pgps.Pgp(mail, 'mail')
|
||||||
pgp.add(date, item)
|
pgp.add(date, self.obj)
|
||||||
print(f' mail: {mail}')
|
print(f' mail: {mail}')
|
||||||
self.tracker_term.compute(mail, obj_type='pgp', subtype='mail')
|
self.tracker_term.compute_manual(pgp)
|
||||||
self.tracker_regex.compute(mail, obj_type='pgp', subtype='mail')
|
self.tracker_regex.compute_manual(pgp)
|
||||||
self.tracker_yara.compute(mail, obj_type='pgp', subtype='mail')
|
self.tracker_yara.compute_manual(pgp)
|
||||||
|
|
||||||
# Keys extracted from PGP PRIVATE KEY BLOCK
|
# Keys extracted from PGP PRIVATE KEY BLOCK
|
||||||
for key in self.private_keys:
|
for key in self.private_keys:
|
||||||
|
@ -241,4 +237,3 @@ class PgpDump(AbstractModule):
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
module = PgpDump()
|
module = PgpDump()
|
||||||
module.run()
|
module.run()
|
||||||
|
|
||||||
|
|
|
@ -1,72 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
"""
|
|
||||||
The Zerobins Module
|
|
||||||
======================
|
|
||||||
This module spots zerobins-like services for further processing
|
|
||||||
"""
|
|
||||||
|
|
||||||
##################################
|
|
||||||
# Import External packages
|
|
||||||
##################################
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from modules.abstract_module import AbstractModule
|
|
||||||
from lib import crawlers
|
|
||||||
|
|
||||||
|
|
||||||
class Zerobins(AbstractModule):
|
|
||||||
"""
|
|
||||||
Zerobins module for AIL framework
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
super(Zerobins, self).__init__()
|
|
||||||
|
|
||||||
binz = [
|
|
||||||
r'^https:\/\/(zerobin||privatebin)\..*$', # historical ones
|
|
||||||
]
|
|
||||||
|
|
||||||
self.regex = re.compile('|'.join(binz))
|
|
||||||
|
|
||||||
# Pending time between two computation (computeNone) in seconds
|
|
||||||
self.pending_seconds = 10
|
|
||||||
|
|
||||||
# Send module state to logs
|
|
||||||
self.logger.info(f'Module {self.module_name} initialized')
|
|
||||||
|
|
||||||
def computeNone(self):
|
|
||||||
"""
|
|
||||||
Compute when no message in queue
|
|
||||||
"""
|
|
||||||
self.logger.debug("No message in queue")
|
|
||||||
|
|
||||||
def compute(self, message):
|
|
||||||
"""
|
|
||||||
Compute a message in queue
|
|
||||||
"""
|
|
||||||
url = message
|
|
||||||
item = self.get_obj()
|
|
||||||
|
|
||||||
# Extract zerobins addresses
|
|
||||||
matching_binz = self.regex_findall(self.regex, item.get_id(), url)
|
|
||||||
|
|
||||||
if len(matching_binz) > 0:
|
|
||||||
for bin_url in matching_binz:
|
|
||||||
print(f'send {bin_url} to crawler')
|
|
||||||
# TODO Change priority ???
|
|
||||||
crawlers.create_task(bin_url, depth=0, har=False, screenshot=False, proxy='force_tor',
|
|
||||||
parent='manual', priority=60)
|
|
||||||
|
|
||||||
self.logger.debug("Compute message in queue")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
module = Zerobins()
|
|
||||||
module.run()
|
|
|
@ -117,6 +117,9 @@ class AbstractModule(ABC):
|
||||||
def get_available_queues(self):
|
def get_available_queues(self):
|
||||||
return self.queue.get_out_queues()
|
return self.queue.get_out_queues()
|
||||||
|
|
||||||
|
def regex_match(self, regex, obj_id, content):
|
||||||
|
return regex_helper.regex_match(self.r_cache_key, regex, obj_id, content, max_time=self.max_execution_time)
|
||||||
|
|
||||||
def regex_search(self, regex, obj_id, content):
|
def regex_search(self, regex, obj_id, content):
|
||||||
return regex_helper.regex_search(self.r_cache_key, regex, obj_id, content, max_time=self.max_execution_time)
|
return regex_helper.regex_search(self.r_cache_key, regex, obj_id, content, max_time=self.max_execution_time)
|
||||||
|
|
||||||
|
@ -201,6 +204,10 @@ class AbstractModule(ABC):
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def compute_manual(self, obj, message=None):
|
||||||
|
self.obj = obj
|
||||||
|
return self.compute(message)
|
||||||
|
|
||||||
def computeNone(self):
|
def computeNone(self):
|
||||||
"""
|
"""
|
||||||
Method of the Module when there is no message
|
Method of the Module when there is no message
|
||||||
|
|
|
@ -41,6 +41,8 @@ class Tracker_Regex(AbstractModule):
|
||||||
self.tracked_regexs = Tracker.get_tracked_regexs()
|
self.tracked_regexs = Tracker.get_tracked_regexs()
|
||||||
self.last_refresh = time.time()
|
self.last_refresh = time.time()
|
||||||
|
|
||||||
|
self.obj = None
|
||||||
|
|
||||||
# Exporter
|
# Exporter
|
||||||
self.exporters = {'mail': MailExporterTracker(),
|
self.exporters = {'mail': MailExporterTracker(),
|
||||||
'webhook': WebHookExporterTracker()}
|
'webhook': WebHookExporterTracker()}
|
||||||
|
@ -66,12 +68,46 @@ class Tracker_Regex(AbstractModule):
|
||||||
content = obj.get_content()
|
content = obj.get_content()
|
||||||
|
|
||||||
for dict_regex in self.tracked_regexs[obj_type]:
|
for dict_regex in self.tracked_regexs[obj_type]:
|
||||||
matched = self.regex_findall(dict_regex['regex'], obj_id, content)
|
matches = self.regex_finditer(dict_regex['regex'], obj_id, content)
|
||||||
if matched:
|
if matches:
|
||||||
self.new_tracker_found(dict_regex['tracked'], 'regex', obj)
|
self.new_tracker_found(dict_regex['tracked'], 'regex', obj, matches)
|
||||||
|
|
||||||
def new_tracker_found(self, tracker_name, tracker_type, obj):
|
def extract_matches(self, re_matches, limit=500, lines=5):
|
||||||
|
matches = []
|
||||||
|
content = self.obj.get_content()
|
||||||
|
l_content = len(content)
|
||||||
|
for match in re_matches:
|
||||||
|
start = match[0]
|
||||||
|
value = match[2]
|
||||||
|
end = match[1]
|
||||||
|
|
||||||
|
# Start
|
||||||
|
if start > limit:
|
||||||
|
i_start = start - limit
|
||||||
|
else:
|
||||||
|
i_start = 0
|
||||||
|
str_start = content[i_start:start].splitlines()
|
||||||
|
if len(str_start) > lines:
|
||||||
|
str_start = '\n'.join(str_start[-lines + 1:])
|
||||||
|
else:
|
||||||
|
str_start = content[i_start:start]
|
||||||
|
|
||||||
|
# End
|
||||||
|
if end + limit > l_content:
|
||||||
|
i_end = l_content
|
||||||
|
else:
|
||||||
|
i_end = end + limit
|
||||||
|
str_end = content[end:i_end].splitlines()
|
||||||
|
if len(str_end) > lines:
|
||||||
|
str_end = '\n'.join(str_end[:lines + 1])
|
||||||
|
else:
|
||||||
|
str_end = content[end:i_end]
|
||||||
|
matches.append((value, f'{str_start}{value}{str_end}'))
|
||||||
|
return matches
|
||||||
|
|
||||||
|
def new_tracker_found(self, tracker_name, tracker_type, obj, re_matches):
|
||||||
obj_id = obj.get_id()
|
obj_id = obj.get_id()
|
||||||
|
matches = None
|
||||||
for tracker_uuid in Tracker.get_trackers_by_tracked_obj_type(tracker_type, obj.get_type(), tracker_name):
|
for tracker_uuid in Tracker.get_trackers_by_tracked_obj_type(tracker_type, obj.get_type(), tracker_name):
|
||||||
tracker = Tracker.Tracker(tracker_uuid)
|
tracker = Tracker.Tracker(tracker_uuid)
|
||||||
|
|
||||||
|
@ -92,8 +128,9 @@ class Tracker_Regex(AbstractModule):
|
||||||
obj.add_tag(tag)
|
obj.add_tag(tag)
|
||||||
|
|
||||||
if tracker.mail_export():
|
if tracker.mail_export():
|
||||||
# TODO add matches + custom subjects
|
if not matches:
|
||||||
self.exporters['mail'].export(tracker, obj)
|
matches = self.extract_matches(re_matches)
|
||||||
|
self.exporters['mail'].export(tracker, obj, matches)
|
||||||
|
|
||||||
if tracker.webhook_export():
|
if tracker.webhook_export():
|
||||||
self.exporters['webhook'].export(tracker, obj)
|
self.exporters['webhook'].export(tracker, obj)
|
||||||
|
@ -102,4 +139,3 @@ class Tracker_Regex(AbstractModule):
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
module = Tracker_Regex()
|
module = Tracker_Regex()
|
||||||
module.run()
|
module.run()
|
||||||
# module.compute('submitted/2023/05/02/submitted_b1e518f1-703b-40f6-8238-d1c22888197e.gz')
|
|
||||||
|
|
|
@ -73,8 +73,56 @@ class Tracker_Yara(AbstractModule):
|
||||||
print(f'{self.obj.get_id()}: yara scanning timed out')
|
print(f'{self.obj.get_id()}: yara scanning timed out')
|
||||||
self.redis_logger.info(f'{self.obj.get_id()}: yara scanning timed out')
|
self.redis_logger.info(f'{self.obj.get_id()}: yara scanning timed out')
|
||||||
|
|
||||||
|
def convert_byte_offset_to_string(self, b_content, offset):
|
||||||
|
byte_chunk = b_content[:offset + 1]
|
||||||
|
try:
|
||||||
|
string_chunk = byte_chunk.decode()
|
||||||
|
offset = len(string_chunk) - 1
|
||||||
|
return offset
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
return self.convert_byte_offset_to_string(b_content, offset - 1)
|
||||||
|
|
||||||
|
def extract_matches(self, data, limit=500, lines=5):
|
||||||
|
matches = []
|
||||||
|
content = self.obj.get_content()
|
||||||
|
l_content = len(content)
|
||||||
|
b_content = content.encode()
|
||||||
|
for string_match in data.get('strings'):
|
||||||
|
for string_match_instance in string_match.instances:
|
||||||
|
start = string_match_instance.offset
|
||||||
|
value = string_match_instance.matched_data.decode()
|
||||||
|
end = start + string_match_instance.matched_length
|
||||||
|
# str
|
||||||
|
start = self.convert_byte_offset_to_string(b_content, start)
|
||||||
|
end = self.convert_byte_offset_to_string(b_content, end)
|
||||||
|
|
||||||
|
# Start
|
||||||
|
if start > limit:
|
||||||
|
i_start = start - limit
|
||||||
|
else:
|
||||||
|
i_start = 0
|
||||||
|
str_start = content[i_start:start].splitlines()
|
||||||
|
if len(str_start) > lines:
|
||||||
|
str_start = '\n'.join(str_start[-lines + 1:])
|
||||||
|
else:
|
||||||
|
str_start = content[i_start:start]
|
||||||
|
|
||||||
|
# End
|
||||||
|
if end + limit > l_content:
|
||||||
|
i_end = l_content
|
||||||
|
else:
|
||||||
|
i_end = end + limit
|
||||||
|
str_end = content[end:i_end].splitlines()
|
||||||
|
if len(str_end) > lines:
|
||||||
|
str_end = '\n'.join(str_end[:lines + 1])
|
||||||
|
else:
|
||||||
|
str_end = content[end:i_end]
|
||||||
|
matches.append((value, f'{str_start}{value}{str_end}'))
|
||||||
|
return matches
|
||||||
|
|
||||||
def yara_rules_match(self, data):
|
def yara_rules_match(self, data):
|
||||||
tracker_name = data['namespace']
|
tracker_name = data['namespace']
|
||||||
|
matches = None
|
||||||
obj_id = self.obj.get_id()
|
obj_id = self.obj.get_id()
|
||||||
for tracker_uuid in Tracker.get_trackers_by_tracked_obj_type('yara', self.obj.get_type(), tracker_name):
|
for tracker_uuid in Tracker.get_trackers_by_tracked_obj_type('yara', self.obj.get_type(), tracker_name):
|
||||||
tracker = Tracker.Tracker(tracker_uuid)
|
tracker = Tracker.Tracker(tracker_uuid)
|
||||||
|
@ -95,8 +143,9 @@ class Tracker_Yara(AbstractModule):
|
||||||
|
|
||||||
# Mails
|
# Mails
|
||||||
if tracker.mail_export():
|
if tracker.mail_export():
|
||||||
# TODO add matches + custom subjects
|
if not matches:
|
||||||
self.exporters['mail'].export(tracker, self.obj)
|
matches = self.extract_matches(data)
|
||||||
|
self.exporters['mail'].export(tracker, self.obj, matches)
|
||||||
|
|
||||||
# Webhook
|
# Webhook
|
||||||
if tracker.webhook_export():
|
if tracker.webhook_export():
|
||||||
|
|
|
@ -158,8 +158,8 @@ publish = Importers,Tags
|
||||||
subscribe = Item
|
subscribe = Item
|
||||||
publish = Tags
|
publish = Tags
|
||||||
|
|
||||||
[Zerobins]
|
#[Pasties]
|
||||||
subscribe = Url
|
#subscribe = Url
|
||||||
|
|
||||||
#[Sync_module]
|
#[Sync_module]
|
||||||
#publish = Sync
|
#publish = Sync
|
||||||
|
|
|
@ -68,7 +68,7 @@ pylibinjection>=0.2.4
|
||||||
phonenumbers>8.12.1
|
phonenumbers>8.12.1
|
||||||
|
|
||||||
# Web
|
# Web
|
||||||
flask>=1.1.4
|
flask==2.3.3
|
||||||
flask-login
|
flask-login
|
||||||
bcrypt>3.1.6
|
bcrypt>3.1.6
|
||||||
|
|
||||||
|
|
|
@ -132,6 +132,10 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="custom-control custom-switch mt-1">
|
||||||
|
<input class="custom-control-input" type="checkbox" name="title_obj" id="title_obj" checked="">
|
||||||
|
<label class="custom-control-label" for="title_obj"><i class="fas fa-heading"></i> Decoded <i class="fas fa-info-circle text-info" data-toggle="tooltip" data-placement="right" title="Title that has been extracted from a HTML page"></i></label>
|
||||||
|
</div>
|
||||||
|
|
||||||
{# <div class="custom-control custom-switch mt-1">#}
|
{# <div class="custom-control custom-switch mt-1">#}
|
||||||
{# <input class="custom-control-input" type="checkbox" name="level" id="screenshot_obj" checked="">#}
|
{# <input class="custom-control-input" type="checkbox" name="level" id="screenshot_obj" checked="">#}
|
||||||
|
|
Loading…
Reference in a new issue