ail-framework/bin/lib/correlations_engine.py

723 lines
25 KiB
Python
Raw Normal View History

2022-08-19 14:53:31 +00:00
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import redis
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
config_loader = ConfigLoader()
r_metadata = config_loader.get_db_conn("Kvrocks_Correlations")
config_loader = None
##################################
# CORRELATION MIGRATION
##################################
#
# MIGRATE TO KVROCKS + Rename correlation Keys
# => Add support for correlations between subtypes
# => Common correlation engine for each objects
#
# Objects Iterations: -screenshot
# -decoded
# -subtypes
# -domains
#
# /!\ Handle reinsertion /!\
#
#
# CORRELATION DB ????? => purge if needed
#
#
#
#
#
##################################
# CORRELATION MIGRATION
##################################
CORRELATION_TYPES_BY_OBJ = {
"cryptocurrency" : ["domain", "item"],
"decoded" : ["domain", "item"],
"domain": ["cryptocurrency", "decoded", "item", "pgp", "username", "screenshot"],
"item": ["cryptocurrency", "decoded", "domain", "pgp", "username", "screenshot"],
"pgp" : ["domain", "item"],
"username" : ["domain", "item"],
"screenshot" : ["domain", "item"],
}
def get_obj_correl_types(obj_type):
return CORRELATION_TYPES_BY_OBJ.get(obj_type)
def sanityze_obj_correl_types(obj_type, correl_types):
obj_correl_types = get_obj_correl_types(obj_type)
if correl_types:
correl_types = set(correl_types).intersection(obj_correl_types)
if not correl_types:
correl_types = obj_correl_types
return correl_types
def get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type):
return r_metadata.scard(f'correlation:obj:{obj_type}:{subtype}:{correl_type}:{obj_id}')
def get_nb_correlations(obj_type, subtype, obj_id, filter_types=[]):
if subtype is None:
subtype = ''
nb_correlations = 0
filter_types = sanityze_obj_correl_types(obj_type, filter_types)
for correl_type in filter_types:
obj_correlations += get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type)
return obj_correlations
def get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type):
return r_metadata.smembers(f'correlation:obj:{obj_type}:{subtype}:{correl_type}:{obj_id}')
def get_correlations(obj_type, subtype, obj_id, filter_types=[]):
if subtype is None:
subtype = ''
obj_correlations = {}
filter_types = sanityze_obj_correl_types(obj_type, filter_types)
for correl_type in filter_types:
obj_correlations[correl_type] = get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type)
return obj_correlations
def exists_obj_correlation(obj_type, subtype, obj_id, obj2_type):
if subtype is None:
subtype = ''
return r_metadata.exists(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}')
def is_obj_correlated(obj_type, subtype, obj_id, obj2_type, subtype2, obj2_id):
if subtype is None:
subtype = ''
if subtype2 is None:
subtype2 = ''
return r_metadata.sismember(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}', '{subtype2}:{obj2_id}')
def add_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id):
if subtype1 is None:
subtype1 = ''
if subtype2 is None:
subtype2 = ''
r_metadata.sadd(f'correlation:obj:{obj1_type}:{subtype1}:{obj2_type}:{obj1_id}', f'{subtype2}:{obj2_id}')
r_metadata.sadd(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype1}:{obj1_id}')
def delete_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id):
if subtype1 is None:
subtype1 = ''
if subtype2 is None:
subtype2 = ''
r_metadata.srem(f'correlation:obj:{obj1_type}:{subtype}:{obj2_type}:{obj_id}', f'{subtype2}:{obj2_id}')
r_metadata.srem(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype}:{obj_id}')
# # TODO: CORRELATION GRAPH
def get_obj_str_id(obj_type, subtype, obj_id): ################ REPLACE BY : ?????????????????????????
if subtype is None:
subtype = ''
return f'{obj_type};{subtype};{obj_id}'
def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, flask_context=False):
links = set()
nodes = set()
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
_get_correlations_graph_node(links, nodes, obj_type, subtype, obj_id, level, max_nodes, filter_types=filter_types, previous_str_obj='')
2022-08-19 14:53:31 +00:00
return obj_str_id, nodes, links
def _get_correlations_graph_node(links, nodes, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], previous_str_obj=''):
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
nodes.add(obj_str_id)
obj_correlations = get_correlations(obj_type, subtype, obj_id, filter_types=filter_types)
#print(obj_correlations)
2022-08-19 14:53:31 +00:00
for correl_type in obj_correlations:
for str_obj in obj_correlations[correl_type]:
subtype2, obj2_id = str_obj.split(':', 1)
obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id)
if obj2_str_id == previous_str_obj:
continue
if len(nodes) > max_nodes:
break
nodes.add(obj2_str_id)
links.add((obj_str_id, obj2_str_id))
if level > 0:
next_level = level - 1
_get_correlations_graph_node(links, nodes, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, previous_str_obj=obj_str_id)
##########################################################
##########################################################
##########################################################
##########################################################
##########################################################
##########################################################
# get_correlations_fcts = {
# "cryptocurrency" : ["domain", "item"],
# "decoded" : ["domain", "item"],
# "domain": ["cryptocurrency", "decoded", "item", "pgp", "username", "screenshot"],
# "item": ["cryptocurrency", "decoded", "domain", "pgp", "username", "screenshot"],
# "pgp" : ["domain", "item"],
# "username" : ["domain", "item"],
# "screenshot" :{
# "domain": get_correl_screenshot_domain,
# "item": get_correl_screenshot_item,
# },
# }
# }
#
# def build_lsets_obj_types(obj1_type, obj_types):
# return [set(obj1_type, x) for x in subtypes_obj]
#
# ##########################
# subtypes_obj = ['cryptocurrency', 'pgp', 'username']
# lsets_subtypes_obj_domain = build_lsets_obj_types('domain', subtypes_obj)
# lsets_subtypes_obj_item = build_lsets_obj_types('item', subtypes_obj)
# ##########################
# TODO HANDLE CRAWLED ITEMS
def add_correlation(obj1_type, obj1_subtype, obj1_id, obj2_type, obj2_subtype, obj2_id):
set_type = set(ob1_type, ob2_type)
# domain - subtypes objs
if set_type in lsets_subtypes_obj_domain:
if ob1_type == 'domain':
domain = obj1_id
obj_type = obj2_type
obj_subtype = obj2_subtype
obj_id = obj2_id
else:
domain = obj2_id
obj_type = obj1_type
obj_subtype = obj1_subtype
obj_id = obj1_id
r_metadata.sadd(f'domain_{obj_type}_{obj_subtype}:{domain}', obj_id)
r_metadata.sadd(f'set_domain_{obj_type}_{obj_subtype}:{obj_id}', domain)
# TODO HANDLE CRAWLED ITEMS
# item - subtypes objs
elif set_type in lsets_subtypes_obj_item:
if ob1_type == 'item':
item_id = obj1_id
obj_type = obj2_type
obj_subtype = obj2_subtype
obj_id = obj2_id
else:
item_id = obj2_id
obj_type = obj1_type
obj_subtype = obj1_subtype
obj_id = obj1_id
r_metadata.sadd(f'set_{obj_type}_{obj_subtype}:{obj_id}', item_id)
r_metadata.sadd(f'item_{obj_type}_{obj_subtype}:{item_id}', obj_id)
# domain - decoded
elif set_type == set('domain', 'decoded'):
if ob1_type == 'decoded':
decoded_id = ob1_id
domain = obj2_id
else:
decoded_id = obj2_id
domain = ob1_id
r_metadata.sadd(f'hash_domain:{domain}', decoded_id) # domain - hash map
r_metadata.sadd(f'domain_hash:{decoded_id}', domain) # hash - domain map
# item - decoded
elif set_type == set('item', 'decoded'):
if ob1_type == 'decoded':
decoded_id = ob1_id
item_id = obj2_id
else:
decoded_id = obj2_id
item_id = ob1_id
############################################################
# domain - screenshot
elif set_type == set('domain', 'screenshot'):
if ob1_type == 'screenshot':
screenshot_id = ob1_id
domain = obj2_id
else:
screenshot_id = obj2_id
domain = ob1_id
r_crawler.sadd(f'domain_screenshot:{domain}', screenshot_id)
r_crawler.sadd(f'screenshot_domain:{screenshot_id}', domain)
# item - screenshot
elif set_type == set('item', 'screenshot'):
if ob1_type == 'screenshot':
screenshot_id = ob1_id
item_id = obj2_id
else:
screenshot_id = obj2_id
item_id = ob1_id
r_metadata.hset(f'paste_metadata:{item_id}', 'screenshot', screenshot_id)
r_crawler.sadd(f'screenshot:{screenshot_id}', item_id)
# domain - item
elif set_type == set('domain', 'item'):
if ob1_type == 'item':
item_id = ob1_id
domain = obj2_id
else:
item_id = obj2_id
domain = ob1_id
############################################################
# TODO ADD COMPLETE DELETE
# TODO: Handle items crawled
def delete_correlation(obj1_type, obj1_subtype, obj1_id, obj2_type, obj2_subtype, obj2_id):
set_type = set(ob1_type, ob2_type)
# domain - subtypes objs
if set_type in lsets_subtypes_obj_domain:
if ob1_type == 'domain':
domain = obj1_id
obj_type = obj2_type
obj_subtype = obj2_subtype
obj_id = obj2_id
else:
domain = obj2_id
obj_type = obj1_type
obj_subtype = obj1_subtype
obj_id = obj1_id
r_metadata.srem(f'domain_{obj_type}_{obj_subtype}:{domain}', obj_id)
r_metadata.srem(f'set_domain_{obj_type}_{obj_subtype}:{obj_id}', domain)
# TODO ADD COMPLETE DELETE
# item - subtypes objs
elif set_type in lsets_subtypes_obj_item:
if ob1_type == 'item':
item_id = obj1_id
obj_type = obj2_type
obj_subtype = obj2_subtype
obj_id = obj2_id
else:
item_id = obj2_id
obj_type = obj1_type
obj_subtype = obj1_subtype
obj_id = obj1_id
# TODO ADD COMPLETE DELETE
r_metadata.srem(f'set_{obj_type}_{subtype}:{obj_id}', item_id)
r_metadata.srem(f'item_{obj_type}_{subtype}:{item_id}', obj_id)
# TODO ADD COMPLETE DELETE
# domain - decoded
elif set_type == set('domain', 'decoded'):
if ob1_type == 'decoded':
decoded_id = ob1_id
domain = obj2_id
else:
decoded_id = obj2_id
domain = ob1_id
r_metadata.srem(f'hash_domain:{domain}', decoded_id)
r_metadata.srem(f'domain_hash:{decoded_id}', domain)
# item - decoded
elif set_type == set('item', 'decoded'):
if ob1_type == 'decoded':
decoded_id = ob1_id
item_id = obj2_id
else:
decoded_id = obj2_id
item_id = ob1_id
####################################################################
# domain - screenshot
elif set_type == set('domain', 'screenshot'):
if ob1_type == 'screenshot':
screenshot_id = ob1_id
domain = obj2_id
else:
screenshot_id = obj2_id
domain = ob1_id
r_crawler.srem(f'domain_screenshot:{domain}', screenshot_id)
r_crawler.srem(f'screenshot_domain:{screenshot_id}', domain)
# item - screenshot
elif set_type == set('item', 'screenshot'):
if ob1_type == 'screenshot':
screenshot_id = ob1_id
item_id = obj2_id
else:
screenshot_id = obj2_id
item_id = ob1_id
r_metadata.hdel(f'paste_metadata:{item_id}', 'screenshot', screenshot_id)
r_crawler.srem(f'screenshot:{screenshot_id}', item_id)
# domain - item
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
## Subtypes - Cryptocurrency Pgp Username ##
def get_correl_subtypes_obj_domain(obj_type, obj_subtype, obj_id):
r_serv_metadata.smembers(f'set_domain_{obj_type}_{obj_subtype}:{obj_id}')
def get_correl_subtypes_obj_item():
pass
def delete_subtype_domain_correlation(domain, obj_type, subtype, obj_id):
r_metadata.srem(f'domain_{obj_type}_{subtype}:{domain}', obj_id)
r_metadata.srem(f'set_domain_{obj_type}_{subtype}:{obj_id}', domain)
# TODO ADD COMPLETE DELETE
def delete_subtype_item_correlation(obj_type, subtype, obj_id, item_id, item_date):
#self.update_correlation_daterange(subtype, obj_id, item_date) update daterange ! # # TODO:
r_metadata.srem(f'set_{obj_type}_{subtype}:{obj_id}', item_id)
r_metadata.srem(f'item_{obj_type}_{subtype}:{item_id}', obj_id)
# # TODO: FIXME HANDLE SUB Objects Metadata # WARNING:
# res = r_serv_metadata.hincrby('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id, -1)
# if int(res) < 0: # remove last
# r_serv_metadata.hdel('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id)
#
# res = r_serv_metadata.zscore('{}_all:{}'.format(self.correlation_name, subtype), obj_id)
# if int(res) > 0:
# r_serv_metadata.zincrby('{}_all:{}'.format(self.correlation_name, subtype), obj_id, -1)
## Screenshot ##
##-- Screenshot - Domain --##
def add_correl_screenshot_domain(screenshot_id, domain):
r_crawler.sadd(f'domain_screenshot:{domain}', screenshot_id)
r_crawler.sadd(f'screenshot_domain:{screenshot_id}', domain)
def get_correl_screenshot_domain(screenshot_id):
return r_crawler.smembers(f'screenshot_domain:{screenshot_id}')
# def delete_correl_screenshot_domain(screenshot_id, domain):
# r_crawler.srem(f'domain_screenshot:{domain}', screenshot_id)
# r_crawler.srem(f'screenshot_domain:{screenshot_id}', domain)
##-- Screenshot - Item --##
def add_correl_screenshot_item(screenshot_id, item_id):
r_metadata.hset(f'paste_metadata:{item_id}', 'screenshot', screenshot_id)
r_crawler.sadd(f'screenshot:{screenshot_id}', item_id)
def get_correl_screenshot_item(screenshot_id):
r_crawler.smembers(f'screenshot:{screenshot_id}')
# def delete_correl_screenshot_item(screenshot_id, item_id):
# r_metadata.hdel(f'paste_metadata:{item_id}', 'screenshot', screenshot_id)
# r_crawler.srem(f'screenshot:{screenshot_id}', item_id)
## -- ##
def get_correl_item_screenshot(item_id):
res = r_metadata.hget(f'paste_metadata:{item_id}', 'screenshot')
if res:
return set(res)
else:
return set()
## Domain ##
def get_correl_domain_subtypes_obj(domain_id, obj_type, obj_subtype):
return r_serv_metadata.smembers(f'domain_{obj_type}_{obj_subtype}:{domain_id}')
## -- ##
## Item ##
def get_correl_item_subtypes_obj():
pass
## -- ## war game stinger - stranger thing
def _get_object_correlations(obj_type, obj_subtype, obj_id, filter_types=[]): # # TODO: , filter_subtypes=[]
obj_relationships = get_obj_relationships(obj_type)
correlations = []
for correlation_fct in obj_relationship_fcts[obj_type]:
correlations
def get_object_correlations(filter_types, filter_subtypes, lvl=0):
pass
####################################################################
####################################################################
####################################################################
####################################################################
####################################################################
####################################################################
def get_object_correlation(object_type, value, correlation_names=None, correlation_objects=None, requested_correl_type=None):
if object_type == 'domain':
return Domain.get_domain_all_correlation(value, correlation_names=correlation_names)
elif object_type == 'paste' or object_type == 'item':
return Item.get_item_all_correlation(value, correlation_names=correlation_names)
elif object_type == 'decoded':
return Decoded.get_decoded_correlated_object(value, correlation_objects=correlation_objects)
elif object_type == 'pgp':
return Pgp.pgp.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects)
elif object_type == 'cryptocurrency':
return Cryptocurrency.cryptocurrency.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects)
elif object_type == 'username':
return Username.correlation.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects)
elif object_type == 'screenshot' or object_type == 'image':
return Screenshot.get_screenshot_correlated_object(value, correlation_objects=correlation_objects)
return {}
def get_obj_tag_table_keys(object_type):
'''
Warning: use only in flask (dynamic templates)
'''
if object_type=="domain":
return ['id', 'first_seen', 'last_check', 'status'] # # TODO: add root screenshot
def create_obj_relationship(obj1_type, obj1_id, obj2_type, obj2_id, obj1_subtype=None, obj2_subtype=None):
if obj1_type == 'domain':
pass
elif obj1_type == 'item':
pass # son/father + duplicate + domain
elif obj1_type == 'pgp':
Pgp.pgp.save_obj_relationship(obj1_subtype, obj1_id, obj2_type, obj2_id)
elif obj1_type == 'cryptocurrency':
Cryptocurrency.cryptocurrency.save_obj_relationship(obj1_subtype, obj1_type, obj2_type, obj2_id)
elif obj1_type == 'decoded':
Decoded.save_obj_relationship(obj1_id, obj2_type, obj2_id)
elif obj1_type == 'image':
Screenshot.save_obj_relationship(obj1_id, obj2_type, obj2_id)
def delete_obj_relationship(obj1_type, obj1_id, obj2_type, obj2_id, obj1_subtype=None, obj2_subtype=None):
if obj1_type == 'domain':
pass
elif obj1_type == 'item':
pass # son/father + duplicate + domain
elif obj1_type == 'pgp':
Pgp.pgp.delete_obj_relationship(obj1_subtype, obj1_id, obj2_type, obj2_id)
elif obj1_type == 'cryptocurrency':
Cryptocurrency.cryptocurrency.delete_obj_relationship(obj1_subtype, obj1_type, obj2_type, obj2_id)
elif obj1_type == 'decoded':
Decoded.delete_obj_relationship(obj1_id, obj2_type, obj2_id)
elif obj1_type == 'image':
Screenshot.delete_obj_relationship(obj1_id, obj2_type, obj2_id)
def create_graph_links(links_set):
graph_links_list = []
for link in links_set:
graph_links_list.append({"source": link[0], "target": link[1]})
return graph_links_list
def create_graph_nodes(nodes_set, root_node_id, flask_context=True):
graph_nodes_list = []
for node_id in nodes_set:
correlation_name, correlation_type, value = node_id.split(';', 3)
dict_node = {"id": node_id}
dict_node['style'] = get_correlation_node_icon(correlation_name, correlation_type, value)
dict_node['text'] = value
if node_id == root_node_id:
dict_node["style"]["node_color"] = 'orange'
dict_node["style"]["node_radius"] = 7
dict_node['url'] = get_item_url(correlation_name, value, correlation_type, flask_context=flask_context)
graph_nodes_list.append(dict_node)
return graph_nodes_list
def create_node_id(correlation_name, value, correlation_type=''):
if correlation_type is None:
correlation_type = ''
return '{};{};{}'.format(correlation_name, correlation_type, value)
# # TODO: filter by correlation type => bitcoin, mail, ...
def get_graph_node_object_correlation(object_type, root_value, mode, correlation_names, correlation_objects, max_nodes=300, requested_correl_type=None, flask_context=True):
links = set()
nodes = set()
root_node_id = create_node_id(object_type, root_value, requested_correl_type)
nodes.add(root_node_id)
root_correlation = get_object_correlation(object_type, root_value, correlation_names, correlation_objects, requested_correl_type=requested_correl_type)
for correl in root_correlation:
if correl in ('pgp', 'cryptocurrency', 'username'):
for correl_type in root_correlation[correl]:
for correl_val in root_correlation[correl][correl_type]:
# add correlation
correl_node_id = create_node_id(correl, correl_val, correl_type)
if mode=="union":
if len(nodes) > max_nodes:
break
nodes.add(correl_node_id)
links.add((root_node_id, correl_node_id))
# get second correlation
res = get_object_correlation(correl, correl_val, correlation_names, correlation_objects, requested_correl_type=correl_type)
if res:
for corr_obj in res:
for correl_key_val in res[corr_obj]:
#filter root value
if correl_key_val == root_value:
continue
if len(nodes) > max_nodes:
break
new_corel_1 = create_node_id(corr_obj, correl_key_val)
new_corel_2 = create_node_id(correl, correl_val, correl_type)
nodes.add(new_corel_1)
nodes.add(new_corel_2)
links.add((new_corel_1, new_corel_2))
if mode=="inter":
nodes.add(correl_node_id)
links.add((root_node_id, correl_node_id))
if correl in ('decoded', 'screenshot', 'domain', 'paste'):
for correl_val in root_correlation[correl]:
correl_node_id = create_node_id(correl, correl_val)
if mode=="union":
if len(nodes) > max_nodes:
break
nodes.add(correl_node_id)
links.add((root_node_id, correl_node_id))
res = get_object_correlation(correl, correl_val, correlation_names, correlation_objects)
if res:
for corr_obj in res:
if corr_obj in ('decoded', 'domain', 'paste', 'screenshot'):
for correl_key_val in res[corr_obj]:
#filter root value
if correl_key_val == root_value:
continue
if len(nodes) > max_nodes:
break
new_corel_1 = create_node_id(corr_obj, correl_key_val)
new_corel_2 = create_node_id(correl, correl_val)
nodes.add(new_corel_1)
nodes.add(new_corel_2)
links.add((new_corel_1, new_corel_2))
if mode=="inter":
nodes.add(correl_node_id)
links.add((root_node_id, correl_node_id))
if corr_obj in ('pgp', 'cryptocurrency', 'username'):
for correl_key_type in res[corr_obj]:
for correl_key_val in res[corr_obj][correl_key_type]:
#filter root value
if correl_key_val == root_value:
continue
if len(nodes) > max_nodes:
break
new_corel_1 = create_node_id(corr_obj, correl_key_val, correl_key_type)
new_corel_2 = create_node_id(correl, correl_val)
nodes.add(new_corel_1)
nodes.add(new_corel_2)
links.add((new_corel_1, new_corel_2))
if mode=="inter":
nodes.add(correl_node_id)
links.add((root_node_id, correl_node_id))
return {"nodes": create_graph_nodes(nodes, root_node_id, flask_context=flask_context), "links": create_graph_links(links)}
#######################################################################################3