2022-08-19 14:53:31 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
|
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
|
|
|
##################################
|
|
|
|
# Import Project packages
|
|
|
|
##################################
|
|
|
|
from lib.ConfigLoader import ConfigLoader
|
|
|
|
|
|
|
|
config_loader = ConfigLoader()
|
|
|
|
r_metadata = config_loader.get_db_conn("Kvrocks_Correlations")
|
|
|
|
config_loader = None
|
|
|
|
|
|
|
|
##################################
|
|
|
|
# CORRELATION MIGRATION
|
|
|
|
##################################
|
|
|
|
#
|
|
|
|
# MIGRATE TO KVROCKS + Rename correlation Keys
|
|
|
|
# => Add support for correlations between subtypes
|
|
|
|
# => Common correlation engine for each objects
|
|
|
|
#
|
|
|
|
# Objects Iterations: -screenshot
|
|
|
|
# -decoded
|
|
|
|
# -subtypes
|
|
|
|
# -domains
|
|
|
|
#
|
|
|
|
# /!\ Handle reinsertion /!\
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# CORRELATION DB ????? => purge if needed
|
|
|
|
#
|
|
|
|
#
|
|
|
|
#
|
|
|
|
#
|
|
|
|
#
|
|
|
|
##################################
|
|
|
|
# CORRELATION MIGRATION
|
|
|
|
##################################
|
|
|
|
|
|
|
|
CORRELATION_TYPES_BY_OBJ = {
|
2024-04-24 12:43:11 +00:00
|
|
|
"chat": ["chat-subchannel", "chat-thread", "image", "message", "ocr", "user-account"], # message or direct correlation like cve, bitcoin, ... ???
|
|
|
|
"chat-subchannel": ["chat", "chat-thread", "image", "message", "ocr", "user-account"],
|
|
|
|
"chat-thread": ["chat", "chat-subchannel", "image", "message", "ocr", "user-account"], # TODO user account
|
2023-06-16 13:39:13 +00:00
|
|
|
"cookie-name": ["domain"],
|
2024-04-24 12:43:11 +00:00
|
|
|
"cryptocurrency": ["domain", "item", "message", "ocr"],
|
|
|
|
"cve": ["domain", "item", "message", "ocr"],
|
|
|
|
"decoded": ["domain", "item", "message", "ocr"],
|
2023-07-17 13:47:17 +00:00
|
|
|
"domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
|
2023-07-06 09:26:32 +00:00
|
|
|
"etag": ["domain"],
|
2023-06-12 14:51:45 +00:00
|
|
|
"favicon": ["domain", "item"], # TODO Decoded
|
2023-11-27 15:25:09 +00:00
|
|
|
"file-name": ["chat", "message"],
|
2023-07-17 13:47:17 +00:00
|
|
|
"hhhash": ["domain"],
|
2024-04-24 12:43:11 +00:00
|
|
|
"image": ["chat", "chat-subchannel", "chat-thread", "message", "ocr", "user-account"], # TODO subchannel + threads ????
|
2023-08-28 14:29:38 +00:00
|
|
|
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
|
2024-04-24 12:43:11 +00:00
|
|
|
"message": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "ocr", "pgp", "user-account"],
|
|
|
|
"ocr": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "image", "message", "pgp", "user-account"],
|
|
|
|
"pgp": ["domain", "item", "message", "ocr"],
|
2022-10-25 14:25:19 +00:00
|
|
|
"screenshot": ["domain", "item"],
|
2023-05-25 12:33:12 +00:00
|
|
|
"title": ["domain", "item"],
|
2024-04-24 12:43:11 +00:00
|
|
|
"user-account": ["chat", "chat-subchannel", "chat-thread", "image", "message", "ocr", "username"],
|
2023-12-13 10:51:53 +00:00
|
|
|
"username": ["domain", "item", "message", "user-account"],
|
2022-08-19 14:53:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
def get_obj_correl_types(obj_type):
|
|
|
|
return CORRELATION_TYPES_BY_OBJ.get(obj_type)
|
|
|
|
|
|
|
|
def sanityze_obj_correl_types(obj_type, correl_types):
|
|
|
|
obj_correl_types = get_obj_correl_types(obj_type)
|
|
|
|
if correl_types:
|
|
|
|
correl_types = set(correl_types).intersection(obj_correl_types)
|
|
|
|
if not correl_types:
|
|
|
|
correl_types = obj_correl_types
|
2023-08-18 09:05:21 +00:00
|
|
|
if not correl_types:
|
|
|
|
return []
|
2022-08-19 14:53:31 +00:00
|
|
|
return correl_types
|
|
|
|
|
|
|
|
def get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type):
|
|
|
|
return r_metadata.scard(f'correlation:obj:{obj_type}:{subtype}:{correl_type}:{obj_id}')
|
|
|
|
|
|
|
|
def get_nb_correlations(obj_type, subtype, obj_id, filter_types=[]):
|
|
|
|
if subtype is None:
|
|
|
|
subtype = ''
|
2022-11-22 09:47:15 +00:00
|
|
|
obj_correlations = {}
|
2022-08-19 14:53:31 +00:00
|
|
|
filter_types = sanityze_obj_correl_types(obj_type, filter_types)
|
|
|
|
for correl_type in filter_types:
|
2022-11-22 09:47:15 +00:00
|
|
|
obj_correlations[correl_type] = get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type)
|
2022-08-19 14:53:31 +00:00
|
|
|
return obj_correlations
|
|
|
|
|
2023-04-21 08:26:14 +00:00
|
|
|
def get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type, unpack=False):
|
|
|
|
correl = r_metadata.smembers(f'correlation:obj:{obj_type}:{subtype}:{correl_type}:{obj_id}')
|
|
|
|
if unpack:
|
|
|
|
unpacked = []
|
|
|
|
for str_correl in correl:
|
|
|
|
unpacked.append(str_correl.split(':', 1))
|
|
|
|
return unpacked
|
|
|
|
else:
|
|
|
|
return correl
|
2022-08-19 14:53:31 +00:00
|
|
|
|
2023-04-21 08:26:14 +00:00
|
|
|
def get_correlations(obj_type, subtype, obj_id, filter_types=[], unpack=False):
|
2022-08-19 14:53:31 +00:00
|
|
|
if subtype is None:
|
|
|
|
subtype = ''
|
|
|
|
obj_correlations = {}
|
|
|
|
filter_types = sanityze_obj_correl_types(obj_type, filter_types)
|
|
|
|
for correl_type in filter_types:
|
2023-04-21 08:26:14 +00:00
|
|
|
obj_correlations[correl_type] = get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type,
|
|
|
|
unpack=unpack)
|
2022-08-19 14:53:31 +00:00
|
|
|
return obj_correlations
|
|
|
|
|
|
|
|
def exists_obj_correlation(obj_type, subtype, obj_id, obj2_type):
|
|
|
|
if subtype is None:
|
|
|
|
subtype = ''
|
|
|
|
return r_metadata.exists(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}')
|
|
|
|
|
|
|
|
def is_obj_correlated(obj_type, subtype, obj_id, obj2_type, subtype2, obj2_id):
|
|
|
|
if subtype is None:
|
|
|
|
subtype = ''
|
|
|
|
if subtype2 is None:
|
|
|
|
subtype2 = ''
|
2023-01-09 15:03:06 +00:00
|
|
|
try:
|
|
|
|
return r_metadata.sismember(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}', f'{subtype2}:{obj2_id}')
|
|
|
|
except:
|
|
|
|
return False
|
2022-08-19 14:53:31 +00:00
|
|
|
|
2024-01-02 16:15:45 +00:00
|
|
|
def get_obj_inter_correlation(obj_type1, subtype1, obj_id1, obj_type2, subtype2, obj_id2, correl_type):
|
|
|
|
return r_metadata.sinter(f'correlation:obj:{obj_type1}:{subtype1}:{correl_type}:{obj_id1}', f'correlation:obj:{obj_type2}:{subtype2}:{correl_type}:{obj_id2}')
|
|
|
|
|
2022-08-19 14:53:31 +00:00
|
|
|
def add_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id):
|
|
|
|
if subtype1 is None:
|
|
|
|
subtype1 = ''
|
|
|
|
if subtype2 is None:
|
|
|
|
subtype2 = ''
|
|
|
|
r_metadata.sadd(f'correlation:obj:{obj1_type}:{subtype1}:{obj2_type}:{obj1_id}', f'{subtype2}:{obj2_id}')
|
|
|
|
r_metadata.sadd(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype1}:{obj1_id}')
|
|
|
|
|
|
|
|
|
|
|
|
def delete_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id):
|
|
|
|
if subtype1 is None:
|
|
|
|
subtype1 = ''
|
|
|
|
if subtype2 is None:
|
|
|
|
subtype2 = ''
|
2022-11-28 14:01:40 +00:00
|
|
|
r_metadata.srem(f'correlation:obj:{obj1_type}:{subtype1}:{obj2_type}:{obj1_id}', f'{subtype2}:{obj2_id}')
|
|
|
|
r_metadata.srem(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype1}:{obj1_id}')
|
2022-08-19 14:53:31 +00:00
|
|
|
|
2023-04-06 13:13:27 +00:00
|
|
|
def delete_obj_correlations(obj_type, subtype, obj_id):
|
|
|
|
obj_correlations = get_correlations(obj_type, subtype, obj_id)
|
|
|
|
for correl_type in obj_correlations:
|
|
|
|
for str_obj in obj_correlations[correl_type]:
|
|
|
|
subtype2, obj2_id = str_obj.split(':', 1)
|
|
|
|
delete_obj_correlation(obj_type, subtype, obj_id, correl_type, subtype2, obj2_id)
|
|
|
|
|
2023-04-05 14:09:06 +00:00
|
|
|
# # bypass max result/objects ???
|
|
|
|
# def get_correlation_depht(obj_type, subtype, obj_id, filter_types=[], level=1, nb_max=300):
|
|
|
|
# objs = set()
|
|
|
|
# _get_correlation_depht(objs, obj_type, subtype, obj_id, filter_types, level, nb_max)
|
|
|
|
# return objs
|
|
|
|
#
|
|
|
|
# def _get_correlation_depht(objs, obj_type, subtype, obj_id, filter_types, level, nb_max, previous_str_obj=''):
|
|
|
|
# obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
|
|
|
|
# objs.add(obj_str_id)
|
|
|
|
#
|
|
|
|
# obj_correlations = get_correlations(obj_type, subtype, obj_id, filter_types=filter_types)
|
|
|
|
# for correl_type in obj_correlations:
|
|
|
|
# for str_obj in obj_correlations[correl_type]:
|
|
|
|
# subtype2, obj2_id = str_obj.split(':', 1)
|
|
|
|
# obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id)
|
|
|
|
#
|
|
|
|
# if obj2_str_id == previous_str_obj:
|
|
|
|
# continue
|
|
|
|
#
|
|
|
|
# if len(nodes) > nb_max:
|
|
|
|
# break
|
|
|
|
# objs.add(obj2_str_id)
|
|
|
|
#
|
|
|
|
# if level > 0:
|
|
|
|
# next_level = level - 1
|
|
|
|
# _get_correlation_depht(objs, correl_type, subtype2, obj2_id, filter_types, next_level, nb_max,
|
|
|
|
# previous_str_obj=obj_str_id)
|
2022-08-19 14:53:31 +00:00
|
|
|
|
2022-11-28 14:01:40 +00:00
|
|
|
def get_obj_str_id(obj_type, subtype, obj_id):
|
2022-08-19 14:53:31 +00:00
|
|
|
if subtype is None:
|
|
|
|
subtype = ''
|
2023-06-20 09:23:58 +00:00
|
|
|
return f'{obj_type}:{subtype}:{obj_id}'
|
2022-08-19 14:53:31 +00:00
|
|
|
|
2023-07-07 14:29:32 +00:00
|
|
|
def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, objs_hidden=set(), flask_context=False):
|
2022-08-19 14:53:31 +00:00
|
|
|
links = set()
|
|
|
|
nodes = set()
|
2023-06-20 09:23:58 +00:00
|
|
|
meta = {'complete': True, 'objs': set()}
|
2022-08-19 14:53:31 +00:00
|
|
|
|
|
|
|
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
|
|
|
|
|
2023-07-07 14:29:32 +00:00
|
|
|
_get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj='')
|
2023-06-20 09:23:58 +00:00
|
|
|
return obj_str_id, nodes, links, meta
|
2022-08-19 14:53:31 +00:00
|
|
|
|
|
|
|
|
2023-07-07 14:29:32 +00:00
|
|
|
def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], objs_hidden=set(), previous_str_obj=''):
|
2022-08-19 14:53:31 +00:00
|
|
|
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
|
2023-06-20 09:23:58 +00:00
|
|
|
meta['objs'].add(obj_str_id)
|
2022-08-19 14:53:31 +00:00
|
|
|
nodes.add(obj_str_id)
|
|
|
|
|
2022-09-20 14:11:48 +00:00
|
|
|
obj_correlations = get_correlations(obj_type, subtype, obj_id, filter_types=filter_types)
|
2022-11-28 14:01:40 +00:00
|
|
|
# print(obj_correlations)
|
2022-08-19 14:53:31 +00:00
|
|
|
for correl_type in obj_correlations:
|
|
|
|
for str_obj in obj_correlations[correl_type]:
|
|
|
|
subtype2, obj2_id = str_obj.split(':', 1)
|
|
|
|
obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id)
|
2023-07-07 14:29:32 +00:00
|
|
|
# filter objects to hide
|
|
|
|
if obj2_str_id in objs_hidden:
|
|
|
|
continue
|
|
|
|
|
2023-06-20 09:23:58 +00:00
|
|
|
meta['objs'].add(obj2_str_id)
|
2022-08-19 14:53:31 +00:00
|
|
|
|
|
|
|
if obj2_str_id == previous_str_obj:
|
|
|
|
continue
|
|
|
|
|
2023-06-18 14:09:57 +00:00
|
|
|
if len(nodes) > max_nodes != 0:
|
2023-06-20 09:23:58 +00:00
|
|
|
meta['complete'] = False
|
2022-08-19 14:53:31 +00:00
|
|
|
break
|
|
|
|
nodes.add(obj2_str_id)
|
|
|
|
links.add((obj_str_id, obj2_str_id))
|
|
|
|
|
|
|
|
if level > 0:
|
|
|
|
next_level = level - 1
|
2023-07-07 14:29:32 +00:00
|
|
|
_get_correlations_graph_node(links, nodes, meta, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj=obj_str_id)
|
2023-06-20 09:23:58 +00:00
|
|
|
|