diff --git a/bin/Decoder.py b/bin/Decoder.py index 2dd0abd2..c841876f 100755 --- a/bin/Decoder.py +++ b/bin/Decoder.py @@ -53,7 +53,8 @@ def decode_string(content, item_id, item_date, encoded_list, decoder_name, encod sha1_string = sha1(decoded_file).hexdigest() mimetype = Decoded.get_file_mimetype(file_content) Decoded.save_decoded_file_content(sha1_string, decoded_file, item_date, mimetype=mimetype) - Decoded.save_item_relationship(sha1_string, item_id, decoder_type=decoder_name) + Decoded.save_item_relationship(sha1_string, item_id) + Decoded.create_decoder_matadata(sha1_string, item_id, decoder_name) #remove encoded from paste content content = content.replace(encoded, '', 1) diff --git a/bin/export/MispImport.py b/bin/export/MispImport.py index 11191a54..d3b328e6 100755 --- a/bin/export/MispImport.py +++ b/bin/export/MispImport.py @@ -75,7 +75,6 @@ def unpack_item_obj(map_uuid_global_id, misp_obj): if obj_id and io_content: res = Item.create_item(obj_id, obj_meta, io_content) - #print(res) map_uuid_global_id[misp_obj.uuid] = get_global_id('item', obj_id) @@ -167,18 +166,14 @@ def unpack_file(map_uuid_global_id, misp_obj): def get_misp_import_fct(map_uuid_global_id, misp_obj): if misp_obj.name == 'ail-leak': unpack_item_obj(map_uuid_global_id, misp_obj) - pass elif misp_obj.name == 'domain-ip': pass elif misp_obj.name == 'pgp-meta': unpack_obj_pgp(map_uuid_global_id, misp_obj) - pass elif misp_obj.name == 'coin-address': unpack_obj_cryptocurrency(map_uuid_global_id, misp_obj) - pass elif misp_obj.name == 'file': unpack_file(map_uuid_global_id, misp_obj) - pass # import relationship between objects def create_obj_relationships(map_uuid_global_id, misp_obj): @@ -188,10 +183,11 @@ def create_obj_relationships(map_uuid_global_id, misp_obj): obj_meta_src = get_global_id_from_id(map_uuid_global_id[relationship.object_uuid]) obj_meta_target = get_global_id_from_id(map_uuid_global_id[relationship.referenced_uuid]) - print('000000') - print(obj_meta_src) - print(obj_meta_target) - print('111111') + if obj_meta_src == 'decoded' or obj_meta_src == 'item': + print('000000') + print(obj_meta_src) + print(obj_meta_target) + print('111111') Correlate_object.create_obj_relationship(obj_meta_src['type'], obj_meta_src['id'], obj_meta_target['type'], obj_meta_target['id'], obj1_subtype=obj_meta_src['subtype'], obj2_subtype=obj_meta_target['subtype']) @@ -216,9 +212,11 @@ if __name__ == '__main__': # misp = PyMISP('https://127.0.0.1:8443/', 'uXgcN42b7xuL88XqK5hubwD8Q8596VrrBvkHQzB0', False) - import_objs_from_file('test_import_item.json') + #import_objs_from_file('test_import_item.json') #Screenshot.delete_screenshot('a92d459f70c4dea8a14688f585a5e2364be8b91fbf924290ead361d9b909dcf1') - - #Decoded.delete_decoded('bfd5f1d89e55b10a8b122a9d7ce31667ec1d086a') + #Decoded.delete_decoded('d59a110ab233fe87cefaa0cf5603b047b432ee07') #Pgp.pgp.delete_correlation('key', '0xA4BB02A75E6AF448') + + #Item.delete_item('submitted/2020/02/10/b2485894-4325-469b-bc8f-6ad1c2dbb202.gz') + Item.delete_item('archive/pastebin.com_pro/2020/02/10/K2cerjP4.gz') diff --git a/bin/lib/Correlate_object.py b/bin/lib/Correlate_object.py index d9db9c4e..b60cebae 100755 --- a/bin/lib/Correlate_object.py +++ b/bin/lib/Correlate_object.py @@ -207,7 +207,7 @@ def create_obj_relationship(obj1_type, obj1_id, obj2_type, obj2_id, obj1_subtype if obj1_type == 'domain': pass elif obj1_type == 'item': - pass # son/father + duplicate + pass # son/father + duplicate + domain elif obj1_type == 'pgp': Pgp.pgp.save_obj_relationship(obj1_subtype, obj1_id, obj2_type, obj2_id) elif obj1_type == 'cryptocurrency': @@ -217,6 +217,19 @@ def create_obj_relationship(obj1_type, obj1_id, obj2_type, obj2_id, obj1_subtype elif obj1_type == 'image': Screenshot.save_obj_relationship(obj1_id, obj2_type, obj2_id) +def delete_obj_relationship(obj1_type, obj1_id, obj2_type, obj2_id, obj1_subtype=None, obj2_subtype=None): + if obj1_type == 'domain': + pass + elif obj1_type == 'item': + pass # son/father + duplicate + domain + elif obj1_type == 'pgp': + Pgp.pgp.delete_obj_relationship(obj1_subtype, obj1_id, obj2_type, obj2_id) + elif obj1_type == 'cryptocurrency': + Cryptocurrency.cryptocurrency.delete_obj_relationship(obj1_subtype, obj1_type, obj2_type, obj2_id) + elif obj1_type == 'decoded': + Decoded.delete_obj_relationship(obj1_id, obj2_type, obj2_id) + elif obj1_type == 'image': + Screenshot.delete_obj_relationship(obj1_id, obj2_type, obj2_id) def create_graph_links(links_set): graph_links_list = [] diff --git a/bin/lib/Decoded.py b/bin/lib/Decoded.py index 03b80aa0..d00d14a5 100755 --- a/bin/lib/Decoded.py +++ b/bin/lib/Decoded.py @@ -202,26 +202,18 @@ def get_decoded_correlated_object(sha1_string, correlation_objects=[]): decoded_correlation[correlation_object] = res return decoded_correlation -# # # TODO: check if item and decoded exist -def save_item_relationship(sha1_string, item_id, decoder_type='base64'): +# # TODO: add delete +# delete stats +def create_decoder_matadata(sha1_string, item_id, decoder_type): estimated_type = get_decoded_item_type(sha1_string) if not estimated_type: print('error, unknow sha1_string') - decoder_type = sanitize_decoder_name(decoder_type) item_date = Item.get_item_date(item_id) r_serv_metadata.incrby('{}_decoded:{}'.format(decoder_type, item_date), 1) - r_serv_metadata.zincrby('hash_date:{}'.format(item_date), sha1_string, 1) r_serv_metadata.zincrby('{}_date:{}'.format(decoder_type, item_date), sha1_string, 1) - update_decoded_daterange(sha1_string, item_date) - - # first time we see this hash (all encoding) on this item - if r_serv_metadata.zscore('nb_seen_hash:{}'.format(sha1_string), item_id) is None: - r_serv_metadata.hincrby('metadata_hash:{}'.format(sha1_string), 'nb_seen_in_all_pastes', 1) - r_serv_metadata.sadd('hash_paste:{}'.format(item_id), sha1_string) # item - hash map - # first time we see this hash encoding on this item if r_serv_metadata.zscore('{}_hash:{}'.format(decoder_type, sha1_string), item_id) is None: @@ -235,19 +227,59 @@ def save_item_relationship(sha1_string, item_id, decoder_type='base64'): r_serv_metadata.hincrby('metadata_hash:{}'.format(sha1_string), '{}_decoder'.format(decoder_type), 1) r_serv_metadata.zincrby('{}_type:{}'.format(decoder_type, estimated_type), item_date, 1) # # TODO: # DUP1 - r_serv_metadata.zincrby('nb_seen_hash:{}'.format(sha1_string), item_id, 1)# hash - paste map r_serv_metadata.zincrby('{}_hash:{}'.format(decoder_type, sha1_string), item_id, 1) # number of b64 on this paste +# # # TODO: check if item and decoded exist +def save_item_relationship(sha1_string, item_id): + estimated_type = get_decoded_item_type(sha1_string) + if not estimated_type: + print('error, unknow sha1_string') + + item_date = Item.get_item_date(item_id) + + r_serv_metadata.zincrby('hash_date:{}'.format(item_date), sha1_string, 1) + + update_decoded_daterange(sha1_string, item_date) + + # first time we see this hash (all encoding) on this item + if r_serv_metadata.zscore('nb_seen_hash:{}'.format(sha1_string), item_id) is None: + r_serv_metadata.hincrby('metadata_hash:{}'.format(sha1_string), 'nb_seen_in_all_pastes', 1) #### MOVE IT ???? + + # # FIXME: + r_serv_metadata.zincrby('nb_seen_hash:{}'.format(sha1_string), item_id, 1)# hash - paste map + r_serv_metadata.sadd('hash_paste:{}'.format(item_id), sha1_string) # item - hash map + # domain if Item.is_crawled(item_id): domain = Item.get_item_domain(item_id) save_domain_relationship(domain, sha1_string) - pass + +def delete_item_relationship(sha1_string, item_id): + item_date = Item.get_item_date(item_id) + + #update_decoded_daterange(sha1_string, item_date) 3 # TODO: + r_serv_metadata.srem('hash_paste:{}'.format(item_id), sha1_string) # item - hash map + + res = r_serv_metadata.zincrby('hash_date:{}'.format(item_date), sha1_string, -1) + if int(res) < 1: + r_serv_metadata.zrem('hash_date:{}'.format(item_date), sha1_string) + + res = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'nb_seen_in_all_pastes') + if int(res) > 0: + r_serv_metadata.hincrby('metadata_hash:{}'.format(sha1_string), 'nb_seen_in_all_pastes', -1) + + res = r_serv_metadata.zincrby('nb_seen_hash:{}'.format(sha1_string), item_id, 1)# hash - paste map + if int(res) < 1: + r_serv_metadata.zrem('nb_seen_hash:{}'.format(sha1_string), item_id) def save_domain_relationship(domain, sha1_string): r_serv_metadata.sadd('hash_domain:{}'.format(domain), sha1_string) # domain - hash map r_serv_metadata.sadd('domain_hash:{}'.format(sha1_string), domain) # hash - domain map +def delete_domain_relationship(domain, sha1_string): + r_serv_metadata.srem('hash_domain:{}'.format(domain), sha1_string) # domain - hash map + r_serv_metadata.srem('domain_hash:{}'.format(sha1_string), domain) # hash - domain map + def update_decoded_daterange(obj_id, new_date): new_date = int(new_date) new_date_str = str(new_date) @@ -268,7 +300,13 @@ def save_obj_relationship(obj_id, referenced_obj_type, referenced_obj_id): if referenced_obj_type == 'domain': save_domain_relationship(referenced_obj_id, obj_id) elif referenced_obj_type == 'item': - save_item_relationship(obj_id, referenced_obj_id, decoder_type='base64') # # TODO: handle decoder type + save_item_relationship(obj_id, referenced_obj_id) + +def delete_obj_relationship(obj_id, referenced_obj_type, referenced_obj_id): + if referenced_obj_type == 'domain': + delete_domain_relationship(referenced_obj_id, obj_id) + elif referenced_obj_type == 'item': + delete_item_relationship(obj_id, referenced_obj_id) def get_decoded_file_content(sha1_string, mimetype=None): filepath = get_decoded_filepath(sha1_string, mimetype=mimetype) diff --git a/bin/lib/Screenshot.py b/bin/lib/Screenshot.py index c4e19e68..def97ecd 100755 --- a/bin/lib/Screenshot.py +++ b/bin/lib/Screenshot.py @@ -46,7 +46,6 @@ def get_metadata(sha256_string): def get_screenshot_tags(sha256_string): return Tag.get_obj_tag(sha256_string) - def get_screenshot_items_list(sha256_string): res = r_serv_onion.smembers('screenshot:{}'.format(sha256_string)) if res: @@ -131,23 +130,34 @@ def get_screenshot_correlated_object(sha256_string, correlation_objects=[]): def save_item_relationship(obj_id, item_id): r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'screenshot', obj_id) r_serv_onion.sadd('screenshot:{}'.format(obj_id), item_id) - print('---') - print(item_id) if Item.is_crawled(item_id): domain = Item.get_item_domain(item_id) - print(domain) save_domain_relationship(obj_id, domain) +def delete_item_relationship(obj_id, item_id): + r_serv_metadata.hdel('paste_metadata:{}'.format(item_id), 'screenshot', obj_id) + r_serv_onion.srem('screenshot:{}'.format(obj_id), item_id) + def save_domain_relationship(obj_id, domain): r_serv_onion.sadd('domain_screenshot:{}'.format(domain), obj_id) r_serv_onion.sadd('screenshot_domain:{}'.format(obj_id), domain) +def delete_domain_relationship(obj_id, domain): + r_serv_onion.srem('domain_screenshot:{}'.format(domain), obj_id) + r_serv_onion.sadd('screenshot_domain:{}'.format(obj_id), domain) + def save_obj_relationship(obj_id, obj2_type, obj2_id): if obj2_type == 'domain': save_domain_relationship(obj_id, obj2_id) elif obj2_type == 'item': save_item_relationship(obj_id, obj2_id) +def delete_obj_relationship(obj_id, obj2_type, obj2_id): + if obj2_type == 'domain': + delete_domain_relationship(obj_id, obj2_id) + elif obj2_type == 'item': + delete_item_relationship(obj_id, obj2_id) + def get_screenshot_file_content(sha256_string): filepath = get_screenshot_filepath(sha256_string) with open(filepath, 'rb') as f: diff --git a/bin/packages/Correlation.py b/bin/packages/Correlation.py index 5067df5b..d7bbe941 100755 --- a/bin/packages/Correlation.py +++ b/bin/packages/Correlation.py @@ -333,10 +333,26 @@ class Correlation(object): domain = Item.get_item_domain(item_id) self.save_domain_correlation(domain, subtype, obj_id) + def delete_item_correlation(self, subtype, obj_id, item_id, item_date): + #self.update_correlation_daterange(subtype, obj_id, item_date) update daterange ! # # TODO: + r_serv_metadata.srem('set_{}_{}:{}'.format(self.correlation_name, subtype, obj_id), item_id) + r_serv_metadata.srem('item_{}_{}:{}'.format(self.correlation_name, subtype, item_id), obj_id) + + res = r_serv_metadata.hincrby('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id, -1) + if int(res) < 0: # remove last + r_serv_metadata.hdel('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id) + + res = r_serv_metadata.zscore('{}_all:{}'.format(self.correlation_name, subtype), obj_id) + if int(res) > 0: + r_serv_metadata.zincrby('{}_all:{}'.format(self.correlation_name, subtype), obj_id, -1) + def save_domain_correlation(self, domain, subtype, obj_id): r_serv_metadata.sadd('domain_{}_{}:{}'.format(self.correlation_name, subtype, domain), obj_id) r_serv_metadata.sadd('set_domain_{}_{}:{}'.format(self.correlation_name, subtype, obj_id), domain) + def delete_domain_correlation(self, domain, subtype, obj_id): + r_serv_metadata.srem('domain_{}_{}:{}'.format(self.correlation_name, subtype, domain), obj_id) + r_serv_metadata.srem('set_domain_{}_{}:{}'.format(self.correlation_name, subtype, obj_id), domain) def save_correlation(self, subtype, obj_id, date_range): r_serv_metadata.zincrby('{}_all:{}'.format(self.correlation_name, subtype), obj_id, 0) @@ -351,6 +367,12 @@ class Correlation(object): elif obj2_type == 'item': self.save_item_correlation(subtype, obj_id, obj2_id, Item.get_item_date(obj2_id)) + def delete_obj_relationship(self, subtype, obj_id, obj2_type, obj2_id): + if obj2_type == 'domain': + self.delete_domain_correlation(obj2_id, subtype, obj_id) + elif obj2_type == 'item': + self.delete_item_correlation(subtype, obj_id, obj2_id, Item.get_item_date(obj2_id)) + def create_correlation(self, subtype, obj_id, obj_meta): res = self.sanythise_correlation_types([subtype], r_boolean=True) if not res: diff --git a/bin/packages/Item.py b/bin/packages/Item.py index 9c113571..cff8fc8c 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -395,14 +395,23 @@ def delete_item(obj_id): r_serv_metadata.delete('hive_cases:{}'.format(obj_id)) os.remove(get_item_filename(obj_id)) - return True - # get all correlation - # delete them + # get all correlation + obj_correlations = get_item_all_correlation(obj_id) + for correlation in obj_correlations: + if correlation=='cryptocurrency' or correlation=='pgp': + for obj2_subtype in obj_correlations[correlation]: + for obj2_id in obj_correlations[correlation][obj2_subtype]: + Correlate_object.delete_obj_relationship(correlation, obj2_id, 'item', obj_id, + obj1_subtype=obj2_subtype) + else: + for obj2_id in obj_correlations[correlation]: + Correlate_object.delete_obj_relationship(correlation, obj2_id, 'item', obj_id) + return True ### REQUIRE MORE WORK # delete child/son !!! - ### TODO in inport V2 + ### TODO in inport V2 # delete from tracked items # delete from queue ###