From 720ffa1c9c4df4c94a3d3455035c66ee3ef77fa7 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 26 Mar 2020 17:03:57 +0100 Subject: [PATCH] chg: [MISP export] export domain as domain-crawled object --- bin/export/MispExport.py | 14 +++++++------- bin/export/MispImport.py | 2 +- bin/lib/Domain.py | 33 +++++++++++++++++++++++++++++---- 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/bin/export/MispExport.py b/bin/export/MispExport.py index b35099f6..fe1fc304 100755 --- a/bin/export/MispExport.py +++ b/bin/export/MispExport.py @@ -75,23 +75,23 @@ def export_ail_item(item_id): tag_misp_object_attributes(l_obj_attr, dict_metadata['tags']) return obj -# # TODO: create domain-port-history object def export_domain(domain): domain_obj = Domain.Domain(domain) dict_metadata = domain_obj.get_domain_metadata(tags=True) - dict_metadata['ports'] = ['80', '223', '443'] # create domain-ip obj - obj = MISPObject('domain-ip', standalone=True) + obj = MISPObject('domain-crawled', standalone=True) obj.first_seen = dict_metadata['first_seen'] obj.last_seen = dict_metadata['last_check'] l_obj_attr = [] - l_obj_attr.append( obj.add_attribute('first-seen', value=dict_metadata['first_seen']) ) - l_obj_attr.append( obj.add_attribute('last-seen', value=dict_metadata['last_check']) ) l_obj_attr.append( obj.add_attribute('domain', value=domain) ) - for port in dict_metadata['ports']: - l_obj_attr.append( obj.add_attribute('port', value=port) ) + dict_all_url = Domain.get_domain_all_url(domain, domain_obj.get_domain_type()) + for crawled_url in dict_all_url: + attribute = obj.add_attribute('url', value=crawled_url) + attribute.first_seen = str(dict_all_url[crawled_url]['first_seen']) + attribute.last_seen = str(dict_all_url[crawled_url]['last_seen']) + l_obj_attr.append( attribute ) # add tags if dict_metadata['tags']: diff --git a/bin/export/MispImport.py b/bin/export/MispImport.py index d371de85..d62a486f 100755 --- a/bin/export/MispImport.py +++ b/bin/export/MispImport.py @@ -190,7 +190,7 @@ def unpack_file(map_uuid_global_id, misp_obj): def get_misp_import_fct(map_uuid_global_id, misp_obj): if misp_obj.name == 'ail-leak': unpack_item_obj(map_uuid_global_id, misp_obj) - elif misp_obj.name == 'domain-ip': + elif misp_obj.name == 'domain-crawled': pass elif misp_obj.name == 'pgp-meta': unpack_obj_pgp(map_uuid_global_id, misp_obj) diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index 8f2500e6..b8ba49e3 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -327,6 +327,28 @@ def get_all_domain_up_by_type(domain_type): else: return ({"status": "error", "reason": "Invalid domain type"}, 400) +def get_domain_all_url(domain, domain_type, domain_ports=None): + if not domain_ports: + domain_ports = get_domain_all_ports(domain, domain_type) + all_url = {} + for port in domain_ports: + for dict_history in get_domain_history_with_status(domain, domain_type, port, add_root_item=True): + if dict_history['status']: # domain UP + crawled_items = get_domain_items(domain, dict_history['root_item']) + for item_id in crawled_items: + item_url = Item.get_item_link(item_id) + item_date = int(Item.get_item_date(item_id)) + if item_url: + if item_url not in all_url: + all_url[item_url] = {'first_seen': item_date,'last_seen': item_date} + else: # update first_seen / last_seen + if item_date < all_url[item_url]['first_seen']: + all_url[item_url]['first_seen'] = item_date + if item_date > all_url[item_url]['last_seen']: + all_url[item_url]['last_seen'] = item_date + return all_url + + def get_domain_items(domain, root_item_id): dom_item = get_domain_item_children(domain, root_item_id) dom_item.append(root_item_id) @@ -605,7 +627,7 @@ def get_domain_history(domain, domain_type, port): # TODO: add date_range: from ''' return r_serv_onion.zrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), 0, -1, withscores=True) -def get_domain_history_with_status(domain, domain_type, port): # TODO: add date_range: from to + nb_elem +def get_domain_history_with_status(domain, domain_type, port, add_root_item=False): # TODO: add date_range: from to + nb_elem ''' Retun . @@ -619,14 +641,17 @@ def get_domain_history_with_status(domain, domain_type, port): # TODO: add date_ history = get_domain_history(domain, domain_type, port) for root_item, epoch_val in history: epoch_val = int(epoch_val) # force int + dict_history = {"epoch": epoch_val, "date": time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(epoch_val))} # domain down, root_item==epoch_val try: int(root_item) - status = False + dict_history['status'] = False # domain up, root_item=str except ValueError: - status = True - l_history.append({"epoch": epoch_val, "date": time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(epoch_val)), "status": status}) + dict_history['status'] = True + if add_root_item: + dict_history['root_item'] = root_item + l_history.append(dict_history) return l_history def verify_if_domain_exist(domain):