ail-framework/bin/lib/correlations_engine.py

233 lines
9.9 KiB
Python
Raw Normal View History

2022-08-19 16:53:31 +02:00
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
config_loader = ConfigLoader()
r_metadata = config_loader.get_db_conn("Kvrocks_Correlations")
config_loader = None
##################################
# CORRELATION MIGRATION
##################################
#
# MIGRATE TO KVROCKS + Rename correlation Keys
# => Add support for correlations between subtypes
# => Common correlation engine for each objects
#
# Objects Iterations: -screenshot
# -decoded
# -subtypes
# -domains
#
# /!\ Handle reinsertion /!\
#
#
# CORRELATION DB ????? => purge if needed
#
#
#
#
#
##################################
# CORRELATION MIGRATION
##################################
CORRELATION_TYPES_BY_OBJ = {
"barcode": ["chat", "cve", "cryptocurrency", "decoded", "domain", "image", "message", "screenshot"],
"chat": ["barcode", "chat-subchannel", "chat-thread", "image", "message", "ocr", "user-account"], # message or direct correlation like cve, bitcoin, ... ???
"chat-subchannel": ["chat", "chat-thread", "image", "message", "ocr", "user-account"],
"chat-thread": ["chat", "chat-subchannel", "image", "message", "ocr", "user-account"],
"cookie-name": ["domain"],
"cryptocurrency": ["barcode", "domain", "item", "message", "ocr", "qrcode"],
"cve": ["barcode", "domain", "item", "message", "ocr", "qrcode"],
"decoded": ["barcode", "domain", "item", "message", "ocr", "qrcode"],
"domain": ["barcode", "cve", "cookie-name", "cryptocurrency", "dom-hash", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
"dom-hash": ["domain", "item"],
2023-07-06 11:26:32 +02:00
"etag": ["domain"],
"favicon": ["domain", "item"], # TODO Decoded
"file-name": ["chat", "message"],
"hhhash": ["domain"],
"image": ["barcode", "chat", "chat-subchannel", "chat-thread", "message", "ocr", "qrcode", "user-account"], # TODO subchannel + threads ????
"item": ["cve", "cryptocurrency", "decoded", "domain", "dom-hash", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
"message": ["barcode", "chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "ocr", "pgp", "user-account"],
"ocr": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "image", "message", "pgp", "user-account"],
"pgp": ["domain", "item", "message", "ocr"],
"qrcode": ["chat", "cve", "cryptocurrency", "decoded", "domain", "image", "message", "screenshot"], # "chat-subchannel", "chat-thread" ?????
"screenshot": ["barcode", "domain", "item", "qrcode"],
"title": ["domain", "item"],
"user-account": ["chat", "chat-subchannel", "chat-thread", "image", "message", "ocr", "username"],
"username": ["domain", "item", "message", "user-account"],
2022-08-19 16:53:31 +02:00
}
def get_obj_correl_types(obj_type):
return CORRELATION_TYPES_BY_OBJ.get(obj_type)
def sanityze_obj_correl_types(obj_type, correl_types, sanityze=True):
if not sanityze:
return correl_types
2022-08-19 16:53:31 +02:00
obj_correl_types = get_obj_correl_types(obj_type)
if correl_types:
correl_types = set(correl_types).intersection(obj_correl_types)
if not correl_types:
correl_types = obj_correl_types
if not correl_types:
return []
2022-08-19 16:53:31 +02:00
return correl_types
def get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type):
return r_metadata.scard(f'correlation:obj:{obj_type}:{subtype}:{correl_type}:{obj_id}')
def get_nb_correlations(obj_type, subtype, obj_id, filter_types=[]):
if subtype is None:
subtype = ''
2022-11-22 10:47:15 +01:00
obj_correlations = {}
2022-08-19 16:53:31 +02:00
filter_types = sanityze_obj_correl_types(obj_type, filter_types)
for correl_type in filter_types:
2022-11-22 10:47:15 +01:00
obj_correlations[correl_type] = get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type)
2022-08-19 16:53:31 +02:00
return obj_correlations
def get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type, unpack=False):
correl = r_metadata.smembers(f'correlation:obj:{obj_type}:{subtype}:{correl_type}:{obj_id}')
if unpack:
unpacked = []
for str_correl in correl:
unpacked.append(str_correl.split(':', 1))
return unpacked
else:
return correl
2022-08-19 16:53:31 +02:00
def get_correlations(obj_type, subtype, obj_id, filter_types=[], unpack=False, sanityze=True):
2022-08-19 16:53:31 +02:00
if subtype is None:
subtype = ''
obj_correlations = {}
filter_types = sanityze_obj_correl_types(obj_type, filter_types, sanityze=sanityze)
2022-08-19 16:53:31 +02:00
for correl_type in filter_types:
obj_correlations[correl_type] = get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type,
unpack=unpack)
2022-08-19 16:53:31 +02:00
return obj_correlations
def exists_obj_correlation(obj_type, subtype, obj_id, obj2_type):
if subtype is None:
subtype = ''
return r_metadata.exists(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}')
def is_obj_correlated(obj_type, subtype, obj_id, obj2_type, subtype2, obj2_id):
if subtype is None:
subtype = ''
if subtype2 is None:
subtype2 = ''
try:
return r_metadata.sismember(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}', f'{subtype2}:{obj2_id}')
except:
return False
2022-08-19 16:53:31 +02:00
def get_obj_inter_correlation(obj_type1, subtype1, obj_id1, obj_type2, subtype2, obj_id2, correl_type):
return r_metadata.sinter(f'correlation:obj:{obj_type1}:{subtype1}:{correl_type}:{obj_id1}', f'correlation:obj:{obj_type2}:{subtype2}:{correl_type}:{obj_id2}')
2022-08-19 16:53:31 +02:00
def add_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id):
if subtype1 is None:
subtype1 = ''
if subtype2 is None:
subtype2 = ''
r_metadata.sadd(f'correlation:obj:{obj1_type}:{subtype1}:{obj2_type}:{obj1_id}', f'{subtype2}:{obj2_id}')
r_metadata.sadd(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype1}:{obj1_id}')
def delete_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id):
if subtype1 is None:
subtype1 = ''
if subtype2 is None:
subtype2 = ''
r_metadata.srem(f'correlation:obj:{obj1_type}:{subtype1}:{obj2_type}:{obj1_id}', f'{subtype2}:{obj2_id}')
r_metadata.srem(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype1}:{obj1_id}')
2022-08-19 16:53:31 +02:00
def delete_obj_correlations(obj_type, subtype, obj_id):
obj_correlations = get_correlations(obj_type, subtype, obj_id)
for correl_type in obj_correlations:
for str_obj in obj_correlations[correl_type]:
subtype2, obj2_id = str_obj.split(':', 1)
delete_obj_correlation(obj_type, subtype, obj_id, correl_type, subtype2, obj2_id)
2023-04-05 16:09:06 +02:00
# # bypass max result/objects ???
# def get_correlation_depht(obj_type, subtype, obj_id, filter_types=[], level=1, nb_max=300):
# objs = set()
# _get_correlation_depht(objs, obj_type, subtype, obj_id, filter_types, level, nb_max)
# return objs
#
# def _get_correlation_depht(objs, obj_type, subtype, obj_id, filter_types, level, nb_max, previous_str_obj=''):
# obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
# objs.add(obj_str_id)
#
# obj_correlations = get_correlations(obj_type, subtype, obj_id, filter_types=filter_types)
# for correl_type in obj_correlations:
# for str_obj in obj_correlations[correl_type]:
# subtype2, obj2_id = str_obj.split(':', 1)
# obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id)
#
# if obj2_str_id == previous_str_obj:
# continue
#
# if len(nodes) > nb_max:
# break
# objs.add(obj2_str_id)
#
# if level > 0:
# next_level = level - 1
# _get_correlation_depht(objs, correl_type, subtype2, obj2_id, filter_types, next_level, nb_max,
# previous_str_obj=obj_str_id)
2022-08-19 16:53:31 +02:00
def get_obj_str_id(obj_type, subtype, obj_id):
2022-08-19 16:53:31 +02:00
if subtype is None:
subtype = ''
return f'{obj_type}:{subtype}:{obj_id}'
2022-08-19 16:53:31 +02:00
def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, objs_hidden=set(), flask_context=False):
2022-08-19 16:53:31 +02:00
links = set()
nodes = set()
meta = {'complete': True, 'objs': set()}
2022-08-19 16:53:31 +02:00
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
_get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj='')
return obj_str_id, nodes, links, meta
2022-08-19 16:53:31 +02:00
def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], objs_hidden=set(), previous_str_obj=''):
2022-08-19 16:53:31 +02:00
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
meta['objs'].add(obj_str_id)
2022-08-19 16:53:31 +02:00
nodes.add(obj_str_id)
obj_correlations = get_correlations(obj_type, subtype, obj_id, filter_types=filter_types)
# print(obj_correlations)
2022-08-19 16:53:31 +02:00
for correl_type in obj_correlations:
for str_obj in obj_correlations[correl_type]:
subtype2, obj2_id = str_obj.split(':', 1)
obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id)
# filter objects to hide
if obj2_str_id in objs_hidden:
continue
meta['objs'].add(obj2_str_id)
2022-08-19 16:53:31 +02:00
if obj2_str_id == previous_str_obj:
continue
if len(nodes) > max_nodes != 0:
meta['complete'] = False
2022-08-19 16:53:31 +02:00
break
nodes.add(obj2_str_id)
links.add((obj_str_id, obj2_str_id))
if level > 0:
next_level = level - 1
_get_correlations_graph_node(links, nodes, meta, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj=obj_str_id)