From 25e3022eab56735bd6511a693ddcb517304e9c09 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 11 Feb 2020 15:48:30 +0100 Subject: [PATCH] chg: [MISP import] import files obj: decoded + screenshot --- bin/Decoder.py | 1 - bin/export/MispExport.py | 9 +- bin/export/MispImport.py | 69 ++++++- bin/lib/Correlate_object.py | 2 +- bin/lib/Decoded.py | 64 +++++- bin/lib/Screenshot.py | 2 +- bin/packages/Cryptocurrency.py | 19 +- bin/packages/Item.py | 1 + bin/packages/Pgp.py | 5 +- bin/packages/Tag.py | 20 +- bin/packages/correlation.py | 366 --------------------------------- 11 files changed, 154 insertions(+), 404 deletions(-) delete mode 100755 bin/packages/correlation.py diff --git a/bin/Decoder.py b/bin/Decoder.py index 82133de7..c309a035 100755 --- a/bin/Decoder.py +++ b/bin/Decoder.py @@ -144,7 +144,6 @@ def save_hash_on_disk(decode, type, hash, json_data): f.write(decode) # create hash metadata - serv_metadata.hset('metadata_hash:'+hash, 'saved_path', local_filename_hash) serv_metadata.hset('metadata_hash:'+hash, 'size', os.path.getsize(filename_hash)) with open(filename_json, 'w') as f: diff --git a/bin/export/MispExport.py b/bin/export/MispExport.py index 154cddb4..abefa86a 100755 --- a/bin/export/MispExport.py +++ b/bin/export/MispExport.py @@ -316,11 +316,12 @@ def get_relationship_between_global_obj(obj_global_id_1, obj_global_id_2): if __name__ == '__main__': - l_obj = [#{'id': 'crawled/2019/11/08/6d3zimnpbwbzdgnp.onionf58258c8-c990-4707-b236-762a2b881183', 'type': 'item', 'lvl': 3}, - #{'id': '6d3zimnpbwbzdgnp.onion', 'type': 'domain', 'lvl': 0}, + l_obj = [{'id': 'crawled/2019/11/08/6d3zimnpbwbzdgnp.onionf58258c8-c990-4707-b236-762a2b881183', 'type': 'item', 'lvl': 3}, + {'id': '6d3zimnpbwbzdgnp.onion', 'type': 'domain', 'lvl': 0}, + {'id': 'bfd5f1d89e55b10a8b122a9d7ce31667ec1d086a', 'type': 'decoded', 'lvl': 2}, #{'id': 'a92d459f70c4dea8a14688f585a5e2364be8b91fbf924290ead361d9b909dcf1', 'type': 'image', 'lvl': 3}, - #{'id': 'archive/pastebin.com_pro/2020/01/27/iHjcWhkD.gz', 'type': 'item', 'lvl': 3}, - {'id': '15efuhpw5V9B1opHAgNXKPBPqdYALXP4hc', 'type': 'cryptocurrency', 'subtype': 'bitcoin', 'lvl': 0} + {'id': 'archive/pastebin.com_pro/2020/01/27/iHjcWhkD.gz', 'type': 'item', 'lvl': 1}, + {'id': '15efuhpw5V9B1opHAgNXKPBPqdYALXP4hc', 'type': 'cryptocurrency', 'subtype': 'bitcoin', 'lvl': 1} ] create_list_of_objs_to_export(l_obj, mode='union') diff --git a/bin/export/MispImport.py b/bin/export/MispImport.py index fe335cc5..521e5f1c 100755 --- a/bin/export/MispImport.py +++ b/bin/export/MispImport.py @@ -69,10 +69,6 @@ def unpack_item_obj(map_uuid_global_id, misp_obj): map_uuid_global_id[misp_obj.uuid] = get_global_id('item', obj_id) -def get_obj_relationship(misp_obj): - for item in misp_obj.ObjectReference: - print(item.to_json()) - ## TODO: handle multiple pgp in the same object @@ -118,7 +114,57 @@ def unpack_obj_cryptocurrency(map_uuid_global_id, misp_obj): map_uuid_global_id[misp_obj.uuid] = get_global_id('pgp', obj_id, obj_subtype=obj_subtype) - get_obj_relationship(misp_obj) + #get_obj_relationship(misp_obj) + +def get_obj_type_from_relationship(misp_obj): + obj_uuid = misp_obj.uuid + obj_type = None + + for relation in misp_obj.ObjectReference: + if relation.object_uuid == obj_uuid: + if relation.relationship_type == "screenshot-of": + return 'screenshot' + if relation.relationship_type == "included-in": + obj_type = 'decoded' + return obj_type + +def get_obj_relationship(misp_obj): + for item in misp_obj.ObjectReference: + print(item.to_json()) + + +# # TODO: covert md5 and sha1 to expected +def unpack_file(map_uuid_global_id, misp_obj): + + obj_type = get_obj_type_from_relationship(misp_obj) + if obj_type: + obj_id = None + io_content = None + for attribute in misp_obj.attributes: + # get file content + if attribute.object_relation == 'attachment': + io_content = attribute.data + elif attribute.object_relation == 'malware-sample': + io_content = attribute.data + + # # TODO: use/verify specified mimetype + elif attribute.object_relation == 'mimetype': + print(attribute.value) + + # # TODO: support more + elif attribute.object_relation == 'sha1' and obj_type == 'decoded': + obj_id = attribute.value + elif attribute.object_relation == 'sha256' and obj_type == 'screenshot': + obj_id = attribute.value + + if obj_id and io_content: + print(obj_type) + obj_meta = get_object_metadata(misp_obj) + if obj_type == 'screenshot': + #Screenshot.create_screenshot(obj_id, obj_meta, io_content) + pass + else: #decoded + Decoded.create_decoded(obj_id, obj_meta, io_content) def get_misp_import_fct(map_uuid_global_id, misp_obj): #print(misp_obj.ObjectReference) @@ -129,17 +175,22 @@ def get_misp_import_fct(map_uuid_global_id, misp_obj): #print(misp_obj.name) if misp_obj.name == 'ail-leak': - unpack_item_obj(map_uuid_global_id, misp_obj) + #unpack_item_obj(map_uuid_global_id, misp_obj) #print(misp_obj.to_json()) pass elif misp_obj.name == 'domain-ip': pass elif misp_obj.name == 'pgp-meta': - unpack_obj_pgp(map_uuid_global_id, misp_obj) + #unpack_obj_pgp(map_uuid_global_id, misp_obj) + pass elif misp_obj.name == 'coin-address': + #unpack_obj_cryptocurrency(map_uuid_global_id, misp_obj) pass elif misp_obj.name == 'file': - + unpack_file(map_uuid_global_id, misp_obj) + print() + print('---') + print() #unpack_item_obj(map_uuid_global_id, misp_obj) pass @@ -160,4 +211,4 @@ if __name__ == '__main__': # misp = PyMISP('https://127.0.0.1:8443/', 'uXgcN42b7xuL88XqK5hubwD8Q8596VrrBvkHQzB0', False) #import_objs_from_file('test_import_item.json') - import_objs_from_file('test_export.json') + import_objs_from_file('test_import_item.json') diff --git a/bin/lib/Correlate_object.py b/bin/lib/Correlate_object.py index a7c989c7..fe5599d3 100755 --- a/bin/lib/Correlate_object.py +++ b/bin/lib/Correlate_object.py @@ -44,7 +44,7 @@ def get_all_correlation_objects(): ''' return ['domain', 'paste'] -def exist_object(object_type, correlation_id, type_id=None): +def exist_object(object_type, correlation_id, type_id=None): # => work on object level if object_type == 'domain': return Domain.verify_if_domain_exist(correlation_id) elif object_type == 'paste' or object_type == 'item': diff --git a/bin/lib/Decoded.py b/bin/lib/Decoded.py index b47b7038..dbfcdb78 100755 --- a/bin/lib/Decoded.py +++ b/bin/lib/Decoded.py @@ -2,6 +2,7 @@ # -*-coding:UTF-8 -* import os +import magic import sys import redis @@ -13,7 +14,6 @@ import Date import Tag sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import correlation import ConfigLoader @@ -23,6 +23,13 @@ r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") HASH_DIR = config_loader.get_config_str('Directories', 'hash') config_loader = None +# # TODO: move me in another file +def get_all_correlation_objects(): + ''' + Return a list of all correllated objects + ''' + return ['domain', 'paste'] + def get_decoded_item_type(sha1_string): ''' Retun the estimed type of a given decoded item. @@ -31,6 +38,9 @@ def get_decoded_item_type(sha1_string): ''' return r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'estimated_type') +def get_file_mimetype(bytes_content): + return magic.from_buffer(bytes_content, mime=True) + def nb_decoded_seen_in_item(sha1_string): nb = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'nb_seen_in_all_pastes') if nb is None: @@ -147,7 +157,7 @@ def get_decoded_correlated_object(sha1_string, correlation_objects=[]): :rtype: dict ''' if correlation_objects is None: - correlation_objects = correlation.get_all_correlation_objects() + correlation_objects = get_all_correlation_objects() decoded_correlation = {} for correlation_object in correlation_objects: if correlation_object == 'paste': @@ -170,3 +180,53 @@ def get_decoded_file_content(sha1_string, mimetype=None): with open(filepath, 'rb') as f: file_content = BytesIO(f.read()) return file_content + +# # TODO: check file format +def save_decoded_file_content(sha1_string, io_content, date_range, mimetype=None): + if not mimetype: + if exist_decoded(sha1_string): + mimetype = get_decoded_item_type(sha1_string) + else: + mimetype = get_file_mimetype(io_content.getvalue()) + + + + filepath = get_decoded_filepath(sha1_string, mimetype=mimetype) + if os.path.isfile(filepath): + print('File already exist') + return False + + # create dir + dirname = os.path.dirname(filepath) + if not os.path.exists(dirname): + os.makedirs(dirname) + + with open(filepath, 'wb') as f: + f.write(io_content.getvalue()) + + # create hash metadata + r_serv_metadata.hset('metadata_hash:{}'.format(sha1_string), 'size', os.path.getsize(filepath)) + + r_serv_metadata.hset('metadata_hash:{}'.format(sha1_string), 'first_seen', date_range['date_from']) + r_serv_metadata.hset('metadata_hash:{}'.format(sha1_string), 'last_seen', date_range['date_to']) + + return True + +def delete_decoded_file(obj_id, io_content): + # check if item exists + if not exist_decoded(obj_id): + return False + else: + Tag.delete_obj_tags(obj_id, 'decoded', Tag.get_obj_tag(obj_id)) + os.remove(get_decoded_filepath(sha1_string)) + r_serv_metadata.delete('metadata_hash:{}'.format(obj_id)) + return True + +def create_decoded(obj_id, obj_meta, io_content): + first_seen = obj_meta.get('first_seen', None) + last_seen = obj_meta.get('last_seen', None) + date_range = Date.sanitise_date_range(first_seen, last_seen, separator='', date_type='datetime') + + res = save_decoded_file_content(obj_id, io_content, date_range, mimetype=None) + if res and 'tags' in obj_meta: + Tag.api_add_obj_tags(tags=obj_metadata['tags'], object_id=obj_id, object_type="decoded") diff --git a/bin/lib/Screenshot.py b/bin/lib/Screenshot.py index d96c1a15..326e5368 100755 --- a/bin/lib/Screenshot.py +++ b/bin/lib/Screenshot.py @@ -145,7 +145,7 @@ def save_screenshot_file(sha256_string, io_content): f.write(io_content.getvalue()) return True -def create_screenshot(sha256_string, io_content): +def create_screenshot(sha256_string, obj_meta, io_content): # check if sha256 res = save_screenshot_file(sha256_string, io_content) if res: diff --git a/bin/packages/Cryptocurrency.py b/bin/packages/Cryptocurrency.py index 7e69cbb2..ec1636c3 100755 --- a/bin/packages/Cryptocurrency.py +++ b/bin/packages/Cryptocurrency.py @@ -8,7 +8,8 @@ import redis from hashlib import sha256 sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -import correlation +import Correlation +import Item sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader @@ -19,7 +20,7 @@ config_loader = None digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' -cryptocurrency = correlation.Correlation('cryptocurrency', ['bitcoin', 'ethereum', 'bitcoin-cash', 'litecoin', 'monero', 'zcash', 'dash']) +cryptocurrency = Correlation.Correlation('cryptocurrency', ['bitcoin', 'ethereum', 'bitcoin-cash', 'litecoin', 'monero', 'zcash', 'dash']) # http://rosettacode.org/wiki/Bitcoin/address_validation#Python def decode_base58(bc, length): @@ -74,18 +75,18 @@ def get_cryptocurrency_symbol(crypto_type): return None def get_cryptocurrency_type(crypto_symbol): - if crypto_type=='BTC': + if crypto_symbol=='BTC': return 'bitcoin' - elif crypto_type=='ETH': + elif crypto_symbol=='ETH': return 'ethereum' - elif crypto_type=='BCH': + elif crypto_symbol=='BCH': return 'bitcoin-cash' - elif crypto_type=='LTC': + elif crypto_symbol=='LTC': return 'litecoin' - elif crypto_type=='XMR': + elif crypto_symbol=='XMR': return 'monero' - elif crypto_type=='ZEC': + elif crypto_symbol=='ZEC': return 'zcash' - elif crypto_type=='DASH': + elif crypto_symbol=='DASH': return 'dash' return None diff --git a/bin/packages/Item.py b/bin/packages/Item.py index b14052a5..9c113571 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -402,6 +402,7 @@ def delete_item(obj_id): ### REQUIRE MORE WORK # delete child/son !!! + ### TODO in inport V2 # delete from tracked items # delete from queue ### diff --git a/bin/packages/Pgp.py b/bin/packages/Pgp.py index ef3e558b..4edef07c 100755 --- a/bin/packages/Pgp.py +++ b/bin/packages/Pgp.py @@ -6,7 +6,8 @@ import sys import redis sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -import correlation +import Correlation +import Item sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader @@ -15,7 +16,7 @@ config_loader = ConfigLoader.ConfigLoader() serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") config_loader = None -pgp = correlation.Correlation('pgpdump', ['key', 'mail', 'name']) +pgp = Correlation.Correlation('pgpdump', ['key', 'mail', 'name']) def get_pgp(request_dict, pgp_type): # basic verification diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py index b8394b34..a062da5e 100755 --- a/bin/packages/Tag.py +++ b/bin/packages/Tag.py @@ -12,7 +12,6 @@ import Item sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader -import Correlate_object from pytaxonomies import Taxonomies from pymispgalaxies import Galaxies, Clusters @@ -311,12 +310,15 @@ def update_tag_global_by_obj_type(object_type, tag): # update object global tags r_serv_tags.srem('list_tags:{}'.format(object_type), tag) # update global tags - for obj_type in Correlate_object.get_all_objects(): + for obj_type in get_all_objects(): if r_serv_tags.exists('{}:{}'.format(obj_type, tag)): tag_deleted = False if tag_deleted: r_serv_tags.srem('list_tags', tag) +def get_all_objects(): + return ['domain', 'item', 'pgp', 'cryptocurrency', 'decoded', 'image'] + def add_global_tag(tag, object_type=None): ''' Create a set of all tags used in AIL (all + by object) @@ -422,6 +424,13 @@ def delete_tag(object_type, tag, object_id, obj_date=None): else: return ({'status': 'error', 'reason': 'object id or tag not found', 'value': tag}, 400) +# # TODO: move me +def get_obj_date(object_type, object_id): + if object_type == "item": + return int(Item.get_item_date(object_id)) + else: + return None + # API QUERY def api_delete_obj_tags(tags=[], object_id=None, object_type="item"): if not object_id: @@ -559,10 +568,3 @@ def get_obj_by_tags(object_type, l_tags, date_from=None, date_to=None, nb_obj=50 l_tagged_obj = list(l_tagged_obj) return {"tagged_obj":l_tagged_obj, "page":page, "nb_pages":nb_pages, "nb_first_elem":start+1, "nb_last_elem":stop, "nb_all_elem":nb_all_elem} - - -def get_obj_date(object_type, object_id): # # TODO: move me in another file - if object_type == "item": - return int(Item.get_item_date(object_id)) - else: - return None diff --git a/bin/packages/correlation.py b/bin/packages/correlation.py deleted file mode 100755 index 8adce6ac..00000000 --- a/bin/packages/correlation.py +++ /dev/null @@ -1,366 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import redis - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -import Date -import Item -import Tag - -config_loader = ConfigLoader.ConfigLoader() -r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -config_loader = None - -def get_all_correlation_objects(): - ''' - Return a list of all correllated objects - ''' - return ['domain', 'paste'] - -class Correlation(object): - - def __init__(self, correlation_name, all_correlation_types): - self.correlation_name = correlation_name - self.all_correlation_types = all_correlation_types - - def _exist_corelation_field(self, correlation_type, field_name, item_type='paste'): - if item_type=='paste': - return r_serv_metadata.exists('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) - else: - return r_serv_metadata.exists('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) - - def exist_correlation(self, subtype, obj_id): - res = r_serv_metadata.zscore('{}_all:{}'.format(self.correlation_name, subtype), obj_id) - if res: - return True - else: - return False - - def _get_items(self, correlation_type, field_name): - res = r_serv_metadata.smembers('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) - if res: - return list(res) - else: - return [] - - def get_correlation_first_seen(self, subtype, obj_id, r_int=False): - res = r_serv_metadata.hget('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'first_seen') - if r_int: - if res: - return int(res) - else: - return 99999999 - else: - return res - - def get_correlation_last_seen(self, subtype, obj_id, r_int=False): - res = r_serv_metadata.hget('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'last_seen') - if r_int: - if res: - return int(res) - else: - return 0 - else: - return res - - def _get_metadata(self, subtype, obj_id): - meta_dict = {} - meta_dict['first_seen'] = self.get_correlation_first_seen(subtype, obj_id) - meta_dict['last_seen'] = self.get_correlation_last_seen(subtype, obj_id) - meta_dict['nb_seen'] = r_serv_metadata.scard('set_{}_{}:{}'.format(self.correlation_name, subtype, obj_id)) - return meta_dict - - def get_metadata(self, correlation_type, field_name, date_format='str_date'): - meta_dict = self._get_metadata(correlation_type, field_name) - if date_format == "str_date": - if meta_dict['first_seen']: - meta_dict['first_seen'] = '{}/{}/{}'.format(meta_dict['first_seen'][0:4], meta_dict['first_seen'][4:6], meta_dict['first_seen'][6:8]) - if meta_dict['last_seen']: - meta_dict['last_seen'] = '{}/{}/{}'.format(meta_dict['last_seen'][0:4], meta_dict['last_seen'][4:6], meta_dict['last_seen'][6:8]) - return meta_dict - - def get_nb_object_seen_by_date(self, correlation_type, field_name, date_day): - nb = r_serv_metadata.hget('{}:{}:{}'.format(self.correlation_name, correlation_type, date_day), field_name) - if nb is None: - return 0 - else: - return int(nb) - - def get_list_nb_previous_correlation_object(self, correlation_type, field_name, numDay): - nb_previous_correlation = [] - for date_day in Date.get_previous_date_list(numDay): - nb_previous_correlation.append(self.get_nb_object_seen_by_date(correlation_type, field_name, date_day)) - return nb_previous_correlation - - def _get_correlation_by_date(self, correlation_type, date): - return r_serv_metadata.hkeys('{}:{}:{}'.format(self.correlation_name, correlation_type, date)) - - def verify_correlation_field_request(self, request_dict, correlation_type, item_type='paste'): - if not request_dict: - return ({'status': 'error', 'reason': 'Malformed JSON'}, 400) - - field_name = request_dict.get(correlation_type, None) - if not field_name: - return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 ) - if not self._exist_corelation_field(correlation_type, field_name, item_type=item_type): - return ( {'status': 'error', 'reason': 'Item not found'}, 404 ) - - def get_correlation(self, request_dict, correlation_type, field_name): - dict_resp = {} - - if request_dict.get('items'): - dict_resp['items'] = self._get_items(correlation_type, field_name) - - if request_dict.get('metadata'): - dict_resp['metadata'] = self._get_metadata(correlation_type, field_name) - - dict_resp[correlation_type] = field_name - - return (dict_resp, 200) - - def get_all_correlation_types(self): - ''' - Gel all correlation types - - :return: A list of all the correlation types - :rtype: list - ''' - return self.all_correlation_types - - def get_correlation_obj_type(self): - if self.correlation_name=='pgpdump': - return 'pgp' - else: - return 'cryptocurrency' - - def sanythise_correlation_types(self, correlation_types, r_boolean=False): - ''' - Check if all correlation types in the list are valid. - - :param correlation_types: list of correlation type - :type currency_type: list - - :return: If a type is invalid, return the full list of correlation types else return the provided list - :rtype: list - ''' - if correlation_types is None: - if r_boolean: - return False - else: - return self.get_all_correlation_types() - for correl in correlation_types: # # TODO: # OPTIMIZE: - if correl not in self.get_all_correlation_types(): - if r_boolean: - return False - else: - return self.get_all_correlation_types() - if r_boolean: - return True - else: - return correlation_types - - - def _get_domain_correlation_obj(self, domain, correlation_type): - ''' - Return correlation of a given domain. - - :param domain: crawled domain - :type domain: str - :param correlation_type: correlation type - :type correlation_type: str - - :return: a list of correlation - :rtype: list - ''' - res = r_serv_metadata.smembers('domain_{}_{}:{}'.format(self.correlation_name, correlation_type, domain)) - if res: - return list(res) - else: - return [] - - def get_domain_correlation_dict(self, domain, correlation_type=None, get_nb=False): - ''' - Return all correlation of a given domain. - - :param domain: crawled domain - :param correlation_type: list of correlation types - :type correlation_type: list, optional - - :return: a dictionnary of all the requested correlations - :rtype: dict - ''' - correlation_type = self.sanythise_correlation_types(correlation_type) - dict_correlation = {} - for correl in correlation_type: - res = self._get_domain_correlation_obj(domain, correl) - if res: - dict_correlation[correl] = res - if get_nb: - dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl]) - return dict_correlation - - def _get_correlation_obj_domain(self, field_name, correlation_type): - ''' - Return all domains that contain this correlation. - - :param domain: field name - :type domain: str - :param correlation_type: correlation type - :type correlation_type: str - - :return: a list of correlation - :rtype: list - ''' - res = r_serv_metadata.smembers('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) - if res: - return list(res) - else: - return [] - - def get_correlation_obj_domain(self, field_name, correlation_type=None): - ''' - Return all domain correlation of a given correlation_value. - - :param field_name: field_name - :param correlation_type: list of correlation types - :type correlation_type: list, optional - - :return: a dictionnary of all the requested correlations - :rtype: list - ''' - correlation_type = self.sanythise_correlation_types(correlation_type) - for correl in correlation_type: - res = self._get_correlation_obj_domain(field_name, correl) - if res: - return res - return [] - - - - def _get_item_correlation_obj(self, item_id, correlation_type): - ''' - Return correlation of a given item id. - - :param item_id: item id - :type item_id: str - :param correlation_type: correlation type - :type correlation_type: str - - :return: a list of correlation - :rtype: list - ''' - res = r_serv_metadata.smembers('item_{}_{}:{}'.format(self.correlation_name, correlation_type, item_id)) - if res: - return list(res) - else: - return [] - - def get_item_correlation_dict(self, item_id, correlation_type=None, get_nb=False): - ''' - Return all correlation of a given item id. - - :param item_id: item id - :param correlation_type: list of correlation types - :type correlation_type: list, optional - - :return: a dictionnary of all the requested correlations - :rtype: dict - ''' - correlation_type = self.sanythise_correlation_types(correlation_type) - dict_correlation = {} - for correl in correlation_type: - res = self._get_item_correlation_obj(item_id, correl) - if res: - dict_correlation[correl] = res - if get_nb: - dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl]) - return dict_correlation - - - def get_correlation_all_object(self, correlation_type, correlation_value, correlation_objects=[]): - if correlation_objects is None: - correlation_objects = get_all_correlation_objects() - correlation_obj = {} - for correlation_object in correlation_objects: - if correlation_object == 'paste': - res = self._get_items(correlation_type, correlation_value) - elif correlation_object == 'domain': - res = self.get_correlation_obj_domain(correlation_value, correlation_type=correlation_type) - else: - res = None - if res: - correlation_obj[correlation_object] = res - return correlation_obj - - def update_correlation_daterange(self, subtype, obj_id, date): - date = int(date) - # obj_id don't exit - if not r_serv_metadata.exists('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id)): - r_serv_metadata.hset('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'first_seen', date) - r_serv_metadata.hset('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'last_seen', date) - else: - first_seen = self.get_correlation_last_seen(subtype, obj_id, r_int=True) - last_seen = self.get_correlation_first_seen(subtype, obj_id, r_int=True) - if date < first_seen: - r_serv_metadata.hset('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'first_seen', date) - if date > last_seen: - r_serv_metadata.hset('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'last_seen', date) - - def save_item_correlation(self, subtype, date, obj_id, item_id, item_date): - update_correlation_daterange(subtype, obj_id, item_date) - - # global set - r_serv_metadata.sadd('set_{}_{}:{}'.format(self.correlation_name, subtype, obj_id), item_id) - - # daily - r_serv_metadata.hincrby('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id, 1) - - # all type - r_serv_metadata.zincrby('{}_all:{}'.format(self.correlation_name, subtype), obj_id, 1) - - ## object_metadata - # item - r_serv_metadata.sadd('item_{}_{}:{}'.format(self.correlation_name, subtype, item_id), obj_id) - - # domain - if Item.is_crawled(item_id): - domain = Item.get_item_domain(item_id) - self.save_domain_correlation(domain, subtype, obj_id) - - def save_domain_correlation(self, domain, subtype, obj_id): - r_serv_metadata.sadd('domain_{}_{}:{}'.format(self.correlation_name, subtype, domain), obj_id) - r_serv_metadata.sadd('set_domain_{}_{}:{}'.format(self.correlation_name, subtype, obj_id), domain) - - - def save_correlation(self, subtype, obj_id, date_range): - r_serv_metadata.zincrby('{}_all:{}'.format(self.correlation_name, subtype), obj_id, 0) - self.update_correlation_daterange(subtype, obj_id, date_range['date_from']) - if date_range['date_from'] != date_range['date_to']: - self.update_correlation_daterange(subtype, obj_id, date_range['date_to']) - return True - - def create_correlation(self, subtype, obj_id, obj_meta): - res = self.sanythise_correlation_types([subtype], r_boolean=True) - if not res: - print('invalid subtype') - return False - first_seen = obj_meta.get('first_seen', None) - last_seen = obj_meta.get('last_seen', None) - date_range = Date.sanitise_date_range(first_seen, last_seen, separator='', date_type='datetime') - print(date_range) - res = self.save_correlation(subtype, obj_id, date_range) - if res and 'tags' in obj_meta: - # # TODO: handle mixed tags: taxonomies and Galaxies - Tag.api_add_obj_tags(tags=obj_meta['tags'], object_id=obj_id, object_type=self.get_correlation_obj_type()) - return True - -######## API EXPOSED ######## - - -######## ########