diff --git a/OVERVIEW.md b/OVERVIEW.md index 40eefa41..f677da42 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -198,8 +198,6 @@ Redis and ARDB overview ##### Hset: | Key | Field | Value | | ------ | ------ | ------ | -| per_paste_**epoch** | **term** | **nb_seen** | -| | | | tag_metadata:**tag** | first_seen | **date** | | tag_metadata:**tag** | last_seen | **date** | @@ -207,13 +205,20 @@ Redis and ARDB overview | Key | Value | | ------ | ------ | | list_tags | **tag** | +| list_tags:**object_type** | **tag** | +| list_tags:domain | **tag** | +|| | active_taxonomies | **taxonomie** | | active_galaxies | **galaxie** | | active_tag_**taxonomie or galaxy** | **tag** | | synonym_tag_misp-galaxy:**galaxy** | **tag synonym** | | list_export_tags | **user_tag** | +|| | **tag**:**date** | **paste** | - +| **object_type**:**tag** | **object_id** | +|| +| DB7 | +| tag:**object_id** | **tag** | ##### old: | Key | Value | diff --git a/bin/Tags.py b/bin/Tags.py index a707d259..88e0ef0e 100755 --- a/bin/Tags.py +++ b/bin/Tags.py @@ -8,29 +8,11 @@ The Tags Module This module create tags. """ -import redis - import time -import datetime from pubsublogger import publisher from Helper import Process -from packages import Paste -from packages import Item - - -def get_item_date(item_filename): - l_directory = item_filename.split('/') - return '{}{}{}'.format(l_directory[-4], l_directory[-3], l_directory[-2]) - -def set_tag_metadata(tag, date): - # First time we see this tag ## TODO: filter paste from the paste ? - if not server.hexists('tag_metadata:{}'.format(tag), 'first_seen'): - server.hset('tag_metadata:{}'.format(tag), 'first_seen', date) - # Check and Set tag last_seen - last_seen = server.hget('tag_metadata:{}'.format(tag), 'last_seen') - if last_seen is None or date > last_seen: - server.hset('tag_metadata:{}'.format(tag), 'last_seen', date) +from packages import Tag if __name__ == '__main__': @@ -45,18 +27,6 @@ if __name__ == '__main__': # Setup the I/O queues p = Process(config_section) - server = redis.StrictRedis( - host=p.config.get("ARDB_Tags", "host"), - port=p.config.get("ARDB_Tags", "port"), - db=p.config.get("ARDB_Tags", "db"), - decode_responses=True) - - server_metadata = redis.StrictRedis( - host=p.config.get("ARDB_Metadata", "host"), - port=p.config.get("ARDB_Metadata", "port"), - db=p.config.get("ARDB_Metadata", "db"), - decode_responses=True) - # Sent to the logging a description of the module publisher.info("Tags module started") @@ -71,27 +41,8 @@ if __name__ == '__main__': continue else: - tag, path = message.split(';') - # add the tag to the tags word_list - res = server.sadd('list_tags', tag) - if res == 1: - print("new tags added : {}".format(tag)) - # add the path to the tag set - #curr_date = datetime.date.today().strftime("%Y%m%d") - item_date = get_item_date(path) - res = server.sadd('{}:{}'.format(tag, item_date), path) - if res == 1: - print("new paste: {}".format(path)) - print(" tagged: {}".format(tag)) - set_tag_metadata(tag, item_date) - server_metadata.sadd('tag:{}'.format(path), tag) + print(message) + tag, item_id = message.split(';') - # Domain Object - if Item.is_crawled(path) and tag!='infoleak:submission="crawler"': - domain = Item.get_item_domain(path) - server_metadata.sadd('tag:{}'.format(domain), tag) - server.sadd('domain:{}:{}'.format(tag, item_date), domain) - - curr_date = datetime.date.today().strftime("%Y%m%d") - server.hincrby('daily_tags:{}'.format(item_date), tag, 1) + Tag.add_tag("item", tag, item_id) p.populate_set_out(message, 'MISP_The_Hive_feeder') diff --git a/bin/lib/Correlate_object.py b/bin/lib/Correlate_object.py index df483bbc..32cb382a 100755 --- a/bin/lib/Correlate_object.py +++ b/bin/lib/Correlate_object.py @@ -23,6 +23,15 @@ config_loader = ConfigLoader.ConfigLoader() r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") config_loader = None +def is_valid_object_type(object_type): + if object_type in ['domain', 'item', 'image']: + return True + else: + return False + +def get_all_objects(): + return ['domain', 'paste', 'pgp', 'cryptocurrency', 'decoded', 'screenshot'] + def get_all_correlation_names(): ''' Return a list of all available correlations @@ -178,11 +187,21 @@ def get_item_url(correlation_name, value, correlation_type=None): elif correlation_name == 'domain': endpoint = 'crawler_splash.showDomain' url = url_for(endpoint, domain=value) - elif correlation_name == 'paste': + elif correlation_name == 'item': + endpoint = 'showsavedpastes.showsavedpaste' + url = url_for(endpoint, paste=value) + elif correlation_name == 'paste': ### # TODO: remove me endpoint = 'showsavedpastes.showsavedpaste' url = url_for(endpoint, paste=value) return url +def get_obj_tag_table_keys(object_type): + ''' + Warning: use only in flask (dynamic templates) + ''' + if object_type=="domain": + return ['id', 'first_seen', 'last_check', 'status'] # # TODO: add root screenshot + def create_graph_links(links_set): graph_links_list = [] @@ -310,6 +329,7 @@ def get_graph_node_object_correlation(object_type, root_value, mode, correlation ######## API EXPOSED ######## - - +def sanitize_object_type(object_type): + if not is_valid_object_type(object_type): + return ({'status': 'error', 'reason': 'Incorrect object_type'}, 400) ######## ######## diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index c2ab508e..e6cd5472 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -292,7 +292,7 @@ def get_domain_items_crawled(domain, domain_type, port, epoch=None, items_link=F if item_screenshot: dict_item['screenshot'] = Item.get_item_screenshot(item) if item_tag: - dict_item['tags'] = Tag.get_item_tags_minimal(item) + dict_item['tags'] = Tag.get_obj_tags_minimal(item) item_crawled['items'].append(dict_item) return item_crawled @@ -365,7 +365,7 @@ def get_domain_tags(domain): :param domain: crawled domain ''' - return Tag.get_item_tags(domain) + return Tag.get_obj_tag(domain) def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False): ''' diff --git a/bin/lib/Screenshot.py b/bin/lib/Screenshot.py index fa85c5f2..d646d839 100755 --- a/bin/lib/Screenshot.py +++ b/bin/lib/Screenshot.py @@ -43,13 +43,16 @@ def get_screenshot_items_list(sha256_string): else: return [] +def get_item_screenshot(item_id): + return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot') + def get_item_screenshot_list(item_id): ''' Retun all decoded item of a given item id. :param item_id: item id ''' - screenshot = r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot') + screenshot = get_item_screenshot(item_id) if screenshot: return [screenshot] else: diff --git a/bin/packages/Date.py b/bin/packages/Date.py index 6b44c942..74563cb5 100644 --- a/bin/packages/Date.py +++ b/bin/packages/Date.py @@ -79,6 +79,9 @@ class Date(object): comp_day = str(computed_date.day).zfill(2) return comp_year + comp_month + comp_day +def get_today_date_str(): + return datetime.date.today().strftime("%Y%m%d") + def date_add_day(date, num_day=1): new_date = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:8])) + datetime.timedelta(num_day) new_date = str(new_date).replace('-', '') diff --git a/bin/packages/Item.py b/bin/packages/Item.py index 156115b0..b1722209 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -104,7 +104,7 @@ def get_item(request_dict): dict_item['date'] = get_item_date(item_id, add_separator=add_separator) tags = request_dict.get('tags', True) if tags: - dict_item['tags'] = Tag.get_item_tags(item_id) + dict_item['tags'] = Tag.get_obj_tag(item_id) size = request_dict.get('size', False) if size: @@ -242,7 +242,7 @@ def get_item_pgp_correlation(item_id): def get_item_list_desc(list_item_id): desc_list = [] for item_id in list_item_id: - desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_item_tags(item_id)} ) + desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_obj_tag(item_id)} ) return desc_list # # TODO: add an option to check the tag diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py index d523bb9e..4e86c2aa 100755 --- a/bin/packages/Tag.py +++ b/bin/packages/Tag.py @@ -4,13 +4,15 @@ import os import sys import redis +import datetime +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Date import Item sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader -import Domain +import Correlate_object from pytaxonomies import Taxonomies from pymispgalaxies import Galaxies, Clusters @@ -35,6 +37,19 @@ def build_unsafe_tags(): # set of unsafe tags unsafe_tags = build_unsafe_tags() +def is_tags_safe(ltags): + ''' + Check if a list of tags contain an unsafe tag (CE, ...) + + :param ltags: list of tags + :type ltags: list + :return: is a tag in the unsafe set + :rtype: boolean + ''' + return unsafe_tags.isdisjoint(ltags) + +#### Taxonomies - Galaxies #### + def get_taxonomie_from_tag(tag): return tag.split(':')[0] @@ -49,6 +64,12 @@ def get_active_taxonomies(): def get_active_galaxies(): return r_serv_tags.smembers('active_galaxies') +def get_all_taxonomies_tags(): # # TODO: add + REMOVE + Update + return r_serv_tags.smembers('active_taxonomies_tags') + +def get_all_galaxies_tags(): # # TODO: add + REMOVE + Update + return r_serv_tags.smembers('active_galaxies_tags') + def is_taxonomie_tag_enabled(taxonomie, tag): if tag in r_serv_tags.smembers('active_tag_' + taxonomie): return True @@ -79,6 +100,7 @@ def enable_taxonomy(taxonomie, enable_tags=True): # activate taxonomie tags for tag in taxonomie_info.machinetags(): r_serv_tags.sadd('active_tag_{}'.format(taxonomie), tag) + #r_serv_tags.sadd('active_taxonomies_tags', tag) else: print('Error: {}, please update pytaxonomies'.format(taxonomie)) @@ -105,21 +127,7 @@ def is_valid_tags_taxonomies_galaxy(list_tags, list_tags_galaxy): return False return True -def get_tag_metadata(tag): - first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'first_seen') - last_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') - return {'tag': tag, 'first_seen': first_seen, 'last_seen': last_seen} - -def is_tags_safe(ltags): - ''' - Check if a list of tags contain an unsafe tag (CE, ...) - - :param ltags: list of tags - :type ltags: list - :return: is a tag in the unsafe set - :rtype: boolean - ''' - return unsafe_tags.isdisjoint(ltags) +#### #### def is_tag_in_all_tag(tag): if r_serv_tags.sismember('list_tags', tag): @@ -127,20 +135,6 @@ def is_tag_in_all_tag(tag): else: return False -def get_all_tags(): - return list(r_serv_tags.smembers('list_tags')) - -def get_item_tags(item_id): - ''' - Retun all the tags of a given item. - :param item_id: (Paste or domain) - ''' - tags = r_serv_metadata.smembers('tag:{}'.format(item_id)) - if tags: - return list(tags) - else: - return [] - def get_min_tag(tag): tag = tag.split('=') if len(tag) > 1: @@ -154,8 +148,8 @@ def get_min_tag(tag): tag = tag[0] return tag -def get_item_tags_minimal(item_id): - return [ {"tag": tag, "min_tag": get_min_tag(tag)} for tag in get_item_tags(item_id) ] +def get_obj_tags_minimal(item_id): + return [ {"tag": tag, "min_tag": get_min_tag(tag)} for tag in get_obj_tag(item_id) ] def unpack_str_tags_list(str_tags_list): str_tags_list = str_tags_list.replace('"','\"') @@ -164,159 +158,92 @@ def unpack_str_tags_list(str_tags_list): else: return [] +# used by modal +def get_modal_add_tags(item_id, object_type='item'): + ''' + Modal: add tags to domain or Paste + ''' + return {"active_taxonomies": get_active_taxonomies(), "active_galaxies": get_active_galaxies(), + "object_id": item_id, "object_type": object_type} -# TEMPLATE + API QUERY -def add_items_tag(tags=[], galaxy_tags=[], item_id=None): ## TODO: remove me - res_dict = {} - if item_id == None: - return ({'status': 'error', 'reason': 'Item id not found'}, 404) - if not tags and not galaxy_tags: - return ({'status': 'error', 'reason': 'Tags or Galaxy not specified'}, 400) - - res_dict['tags'] = [] - for tag in tags: - taxonomie = get_taxonomie_from_tag(tag) - if is_taxonomie_tag_enabled(taxonomie, tag): - add_item_tag(tag, item_id) - res_dict['tags'].append(tag) +######## NEW VERSION ######## +def get_tag_first_seen(tag, r_int=False): + ''' + Get tag first seen (current: item only) + ''' + res = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'first_seen') + if r_int: + if res is None: + return 99999999 else: - return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled'}, 400) + return int(res) + return res - for tag in galaxy_tags: - galaxy = get_galaxy_from_tag(tag) - if is_galaxy_tag_enabled(galaxy, tag): - add_item_tag(tag, item_id) - res_dict['tags'].append(tag) +def get_tag_last_seen(tag, r_int=False): + ''' + Get tag last seen (current: item only) + ''' + res = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') + if r_int: + if res is None: + return 0 else: - return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled'}, 400) + return int(res) + return res - res_dict['id'] = item_id - return (res_dict, 200) +def get_tag_metadata(tag, r_int=False): + ''' + Get tag metadata (current: item only) + ''' + tag_metadata = {"tag": tag} + tag_metadata['first_seen'] = get_tag_first_seen(tag) + tag_metadata['last_seen'] = get_tag_last_seen(tag) + return tag_metadata - -# TEMPLATE + API QUERY -def add_items_tags(tags=[], galaxy_tags=[], item_id=None, item_type="paste"): - res_dict = {} - if item_id == None: - return ({'status': 'error', 'reason': 'Item id not found'}, 404) - if not tags and not galaxy_tags: - return ({'status': 'error', 'reason': 'Tags or Galaxy not specified'}, 400) - if item_type not in ('paste', 'domain'): - return ({'status': 'error', 'reason': 'Incorrect item_type'}, 400) - - res_dict['tags'] = [] - for tag in tags: - if tag: - taxonomie = get_taxonomie_from_tag(tag) - if is_taxonomie_tag_enabled(taxonomie, tag): - add_item_tag(tag, item_id, item_type=item_type) - res_dict['tags'].append(tag) - else: - return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled'}, 400) - - for tag in galaxy_tags: - if tag: - galaxy = get_galaxy_from_tag(tag) - if is_galaxy_tag_enabled(galaxy, tag): - add_item_tag(tag, item_id, item_type=item_type) - res_dict['tags'].append(tag) - else: - return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled'}, 400) - - res_dict['id'] = item_id - res_dict['type'] = item_type - return (res_dict, 200) - -def add_domain_tag(tag, domain, item_date): - r_serv_tags.sadd('list_tags:domain', tag) - r_serv_metadata.sadd('tag:{}'.format(domain), tag) - r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain) - -def add_item_tag(tag, item_path, item_type="paste", tag_date=None): - - if item_type=="paste": - item_date = int(Item.get_item_date(item_path)) - - #add tag - r_serv_metadata.sadd('tag:{}'.format(item_path), tag) - r_serv_tags.sadd('{}:{}'.format(tag, item_date), item_path) - - if Item.is_crawled(item_path): - domain = Item.get_item_domain(item_path) - r_serv_metadata.sadd('tag:{}'.format(domain), tag) - r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain) - # domain item +def get_tags_min_last_seen(l_tags, r_int=False): + ''' + Get max last seen from a list of tags (current: item only) + ''' + min_last_seen = 99999999 + for tag in l_tags: + last_seen = get_tag_last_seen(tag, r_int=True) + if last_seen < min_last_seen: + min_last_seen = last_seen + if r_int: + return min_last_seen else: - item_date = int(Domain.get_domain_last_check(item_path, r_format="int")) - add_domain_tag(tag, item_path, item_date) + return str(min_last_seen) - r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1) +def is_obj_tagged(object_id, tag): + ''' + Check if a object is tagged - tag_first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') - if tag_first_seen is None: - tag_first_seen = 99999999 + :param object_id: object id + :type domain: str + :param tag: object type + :type domain: str + + :return: is object tagged + :rtype: boolean + ''' + return r_serv_metadata.sismember('tag:{}'.format(object_id), tag) + +def get_all_tags(): + return list(r_serv_tags.smembers('list_tags')) + +def get_all_obj_tags(object_type): + return list(r_serv_tags.smembers('list_tags:{}'.format(object_type))) + +def get_obj_tag(object_id): + ''' + Retun all the tags of a given object. + :param object_id: (item_id, domain, ...) + ''' + res = r_serv_metadata.smembers('tag:{}'.format(object_id)) + if res: + return list(res) else: - tag_first_seen = int(tag_first_seen) - tag_last_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') - if tag_last_seen is None: - tag_last_seen = 0 - else: - tag_last_seen = int(tag_last_seen) - - #add new tag in list of all used tags - r_serv_tags.sadd('list_tags', tag) - - # update fisrt_seen/last_seen - if item_date < tag_first_seen: - r_serv_tags.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date) - - # update metadata last_seen - if item_date > tag_last_seen: - r_serv_tags.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date) - -# API QUERY -def remove_item_tags(tags=[], item_id=None): - if item_id == None: - return ({'status': 'error', 'reason': 'Item id not found'}, 404) - if not tags: - return ({'status': 'error', 'reason': 'No Tag(s) specified'}, 400) - - dict_res = {} - dict_res['tags'] = [] - for tag in tags: - res = remove_item_tag(tag, item_id) - if res[1] != 200: - return res - else: - dict_res['tags'].append(tag) - dict_res['id'] = item_id - return (dict_res, 200) - -# TEMPLATE + API QUERY -def remove_item_tag(tag, item_id): - item_date = int(Item.get_item_date(item_id)) - - #remove tag - r_serv_metadata.srem('tag:{}'.format(item_id), tag) - res = r_serv_tags.srem('{}:{}'.format(tag, item_date), item_id) - - if res ==1: - # no tag for this day - if int(r_serv_tags.hget('daily_tags:{}'.format(item_date), tag)) == 1: - r_serv_tags.hdel('daily_tags:{}'.format(item_date), tag) - else: - r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, -1) - - tag_first_seen = int(r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')) - tag_last_seen = int(r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')) - # update fisrt_seen/last_seen - if item_date == tag_first_seen: - update_tag_first_seen(tag, tag_first_seen, tag_last_seen) - if item_date == tag_last_seen: - update_tag_last_seen(tag, tag_first_seen, tag_last_seen) - return ({'status': 'success'}, 200) - else: - return ({'status': 'error', 'reason': 'Item id or tag not found'}, 400) + return [] def update_tag_first_seen(tag, tag_first_seen, tag_last_seen): if tag_first_seen == tag_last_seen: @@ -324,7 +251,6 @@ def update_tag_first_seen(tag, tag_first_seen, tag_last_seen): r_serv_tags.hset('tag_metadata:{}'.format(tag), 'first_seen', tag_first_seen) # no tag in db else: - r_serv_tags.srem('list_tags', tag) r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'first_seen') r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'last_seen') else: @@ -340,7 +266,6 @@ def update_tag_last_seen(tag, tag_first_seen, tag_last_seen): r_serv_tags.hset('tag_metadata:{}'.format(tag), 'last_seen', tag_last_seen) # no tag in db else: - r_serv_tags.srem('list_tags', tag) r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'first_seen') r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'last_seen') else: @@ -350,11 +275,285 @@ def update_tag_last_seen(tag, tag_first_seen, tag_last_seen): tag_last_seen = Date.date_substract_day(tag_last_seen) update_tag_last_seen(tag, tag_first_seen, tag_last_seen) +def update_tag_metadata(tag, tag_date, object_type=None, add_tag=True): + ''' + Update tag metadata (current: item only) + ''' + if object_type=="item": + # get object metadata + tag_metadata = get_tag_metadata(tag, r_int=True) + ############# + ## ADD tag ## + if add_tag: + # update fisrt_seen + if tag_date < tag_metadata['first_seen']: + r_serv_tags.hset('tag_metadata:{}'.format(tag), 'first_seen', tag_date) + # update last_seen + if tag_date > tag_metadata['last_seen']: + r_serv_tags.hset('tag_metadata:{}'.format(tag), 'last_seen', tag_date) + ################ + ## REMOVE tag ## + else: + if tag_date == tag_metadata['first_seen']: + update_tag_first_seen(object_type, tag, tag_metadata['first_seen'], tag_metadata['last_seen']) + if tag_date == tag_metadata['last_seen']: + update_tag_last_seen(tag, tag_metadata['first_seen'], tag_metadata['last_seen']) -# used by modal -def get_modal_add_tags(item_id, tag_type='paste'): +def update_tag_global_by_obj_type(object_type, tag): + tag_deleted = False + if object_type=='item': + if not r_serv_tags.exists('tag_metadata:{}'.format(tag)): + tag_deleted = True + else: + if not r_serv_tags.exists('{}:{}'.format(object_type, tag)): + tag_deleted = True + if tag_deleted: + # update object global tags + r_serv_tags.srem('list_tags:{}'.format(object_type), tag) + # update global tags + for obj_type in Correlate_object.get_all_objects(): + if r_serv_tags.exists('{}:{}'.format(obj_type, tag)): + tag_deleted = False + if tag_deleted: + r_serv_tags.srem('list_tags', tag) + +def add_global_tag(tag, object_type=None): ''' - Modal: add tags to domain or Paste + Create a set of all tags used in AIL (all + by object) + + :param tag: tag + :type domain: str + :param object_type: object type + :type domain: str ''' - return {"active_taxonomies": get_active_taxonomies(), "active_galaxies": get_active_galaxies(), - "item_id": item_id, "type": tag_type} + r_serv_tags.sadd('list_tags', tag) + if object_type: + r_serv_tags.sadd('list_tags:{}'.format(object_type), tag) + +def add_obj_tags(object_id, object_type, tags=[], galaxy_tags=[]): + obj_date = get_obj_date(object_type, object_id) + for tag in tags: + if tag: + taxonomie = get_taxonomie_from_tag(tag) + if is_taxonomie_tag_enabled(taxonomie, tag): + add_tag(object_type, tag, object_id, obj_date=obj_date) + else: + return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled', 'value': tag}, 400) + + for tag in galaxy_tags: + if tag: + galaxy = get_galaxy_from_tag(tag) + if is_galaxy_tag_enabled(galaxy, tag): + add_tag(object_type, tag, object_id, obj_date=obj_date) + else: + return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled', 'value': tag}, 400) + +# TEMPLATE + API QUERY +def api_add_obj_tags(tags=[], galaxy_tags=[], object_id=None, object_type="item"): + res_dict = {} + if object_id == None: + return ({'status': 'error', 'reason': 'object_id id not found'}, 404) + if not tags and not galaxy_tags: + return ({'status': 'error', 'reason': 'Tags or Galaxy not specified'}, 400) + if object_type not in ('item', 'domain'): # # TODO: put me in another file + return ({'status': 'error', 'reason': 'Incorrect object_type'}, 400) + + # remove empty tags + tags = list(filter(bool, tags)) + galaxy_tags = list(filter(bool, galaxy_tags)) + + res = add_obj_tags(object_id, object_type, tags=tags, galaxy_tags=galaxy_tags) + if res: + return res + + res_dict['tags'] = tags + galaxy_tags + res_dict['id'] = object_id + res_dict['type'] = object_type + return (res_dict, 200) + +def add_obj_tag(object_type, object_id, tag, obj_date=None): + if object_type=="item": # # TODO: # FIXME: # REVIEW: rename me !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + if obj_date is None: + raise ValueError("obj_date is None") + + # add tag + r_serv_metadata.sadd('tag:{}'.format(object_id), tag) + r_serv_tags.sadd('{}:{}'.format(tag, obj_date), object_id) + + # add domain tag + if Item.is_crawled(object_id) and tag!='infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"': + domain = Item.get_item_domain(object_id) + add_tag("domain", tag, domain) + else: + r_serv_metadata.sadd('tag:{}'.format(object_id), tag) + r_serv_tags.sadd('{}:{}'.format(object_type, tag), object_id) + +def add_tag(object_type, tag, object_id, obj_date=None): + # new tag + if not is_obj_tagged(object_id, tag): + # # TODO: # FIXME: sanityze object_type + if not obj_date: + obj_date = get_obj_date(object_type, object_id) + add_global_tag(tag, object_type=object_type) + add_obj_tag(object_type, object_id, tag, obj_date=obj_date) + update_tag_metadata(tag, obj_date) + + # create tags stats # # TODO: put me in cache + r_serv_tags.hincrby('daily_tags:{}'.format(datetime.date.today().strftime("%Y%m%d")), tag, 1) + +def delete_obj_tag(object_type, object_id, tag, obj_date): + if object_type=="item": # # TODO: # FIXME: # REVIEW: rename me !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + obj_date = get_obj_date(object_type, object_id) + r_serv_metadata.srem('tag:{}'.format(object_id), tag) + r_serv_tags.srem('{}:{}'.format(tag, obj_date), object_id) + else: + r_serv_metadata.srem('tag:{}'.format(object_id), tag) + r_serv_tags.srem('{}:{}'.format(object_type, tag), object_id) + +def delete_tag(object_type, tag, object_id, obj_date=None): + # tag exist + if is_obj_tagged(object_id, tag): + if not obj_date: + obj_date = get_obj_date(object_type, object_id) + delete_obj_tag(object_type, object_id, tag, obj_date) + update_tag_metadata(tag, obj_date, object_type=object_type, add_tag=False) + update_tag_global_by_obj_type(object_type, tag) + + else: + return ({'status': 'error', 'reason': 'object id or tag not found', 'value': tag}, 400) + +# API QUERY +def api_delete_obj_tags(tags=[], object_id=None, object_type="item"): + if not object_id: + return ({'status': 'error', 'reason': 'object id not found'}, 404) + if not tags: + return ({'status': 'error', 'reason': 'No Tag(s) specified'}, 400) + + res = delete_obj_tags(object_id, object_type, tags=tags) + if res: + return res + + dict_res = {} + dict_res['tags'] = tags + dict_res['id'] = object_id + return (dict_res, 200) + +def delete_obj_tags(object_id, object_type, tags=[]): + obj_date = get_obj_date(object_type, object_id) + for tag in tags: + res = delete_tag(object_type, tag, object_id, obj_date=obj_date) + if res: + return res + +def sanitise_tags_date_range(l_tags, date_from=None, date_to=None): + if date_from and date_to is None: + date_from = get_tags_min_last_seen(l_tags, r_int=False) + date_to = date_from + return Date.sanitise_date_range(date_from, date_to) + + +# # TODO: verify tags + object_type +# get set_keys: intersection +def get_obj_keys_by_tags(object_type, l_tags, date_day=None): + l_set_keys = [] + if object_type=='item': + for tag in l_tags: + l_set_keys.append('{}:{}'.format(tag, date_day)) + else: + for tag in l_tags: + l_set_keys.append('{}:{}'.format(object_type, tag)) + return l_set_keys + +def get_obj_by_tag(key_tag): + return r_serv_tags.smembers(key_tag) + +def get_obj_by_tags(object_type, l_tags, date_from=None, date_to=None, nb_obj=50, page=1): # remove old object + # with daterange + l_tagged_obj = [] + if object_type=='item': + #sanityze date + date_range = sanitise_tags_date_range(l_tags, date_from=date_from, date_to=date_to) + l_dates = Date.substract_date(date_from, date_to) + + for date_day in l_dates: + l_set_keys = get_obj_keys_by_tags(object_type, l_tags, date_day) + # if len(l_set_keys) > nb_obj: + # return l_tagged_obj + if len(l_set_keys) < 2: + date_day_obj = get_obj_by_tag(l_set_keys[0]) + else: + date_day_obj = r_serv_tags.sinter(l_set_keys[0], *l_set_keys[1:]) + + # next_nb_start = len(l_tagged_obj) + len(date_day_obj) - nb_obj + # if next_nb_start > 0: + # get + filter nb_start + l_tagged_obj.extend( date_day_obj ) + + # handle pagination + nb_all_elem = len(l_tagged_obj) + nb_pages = nb_all_elem / nb_obj + if not nb_pages.is_integer(): + nb_pages = int(nb_pages)+1 + else: + nb_pages = int(nb_pages) + if page > nb_pages: + page = nb_pages + + # select index + start = nb_obj*(page -1) + stop = (nb_obj*page) -1 + l_tagged_obj = l_tagged_obj[start:stop] + + return {"tagged_obj":l_tagged_obj, "page":page, "nb_pages":nb_pages, "nb_first_elem":start+1, "nb_last_elem":stop+1, "nb_all_elem":nb_all_elem} + + # without daterange + else: + l_set_keys = get_obj_keys_by_tags(object_type, l_tags) + if len(l_set_keys) < 2: + l_tagged_obj = get_obj_by_tag(l_set_keys[0]) + else: + l_tagged_obj = r_serv_tags.sinter(l_set_keys[0], *l_set_keys[1:]) + + if not l_tagged_obj: + return {"tagged_obj":l_tagged_obj, "page":0, "nb_pages":0} + + # handle pagination + nb_all_elem = len(l_tagged_obj) + nb_pages = nb_all_elem / nb_obj + if not nb_pages.is_integer(): + nb_pages = int(nb_pages)+1 + else: + nb_pages = int(nb_pages) + if page > nb_pages: + page = nb_pages + + # multiple pages + if nb_pages > 1: + start = nb_obj*(page -1) + stop = (nb_obj*page) -1 + current_index = 0 + l_obj = [] + for elem in l_tagged_obj: + if current_index > stop: + break + if start <= current_index and stop >= current_index: + l_obj.append(elem) + current_index += 1 + l_tagged_obj = l_obj + stop += 1 + if stop > nb_all_elem: + stop = nb_all_elem + # only one page + else: + start = 0 + stop = nb_all_elem + l_tagged_obj = list(l_tagged_obj) + + return {"tagged_obj":l_tagged_obj, "page":page, "nb_pages":nb_pages, "nb_first_elem":start+1, "nb_last_elem":stop, "nb_all_elem":nb_all_elem} + + +def get_obj_date(object_type, object_id): # # TODO: move me in another file + if object_type == "item": + return Item.get_item_date(object_id) + else: + return None diff --git a/bin/submit_paste.py b/bin/submit_paste.py index cae9c0ed..1aec936a 100755 --- a/bin/submit_paste.py +++ b/bin/submit_paste.py @@ -16,6 +16,9 @@ import sflock from Helper import Process from pubsublogger import publisher +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) +import Tag + sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader @@ -50,10 +53,10 @@ def create_paste(uuid, paste_content, ltags, ltagsgalaxies, name): # add tags for tag in ltags: - add_item_tag(tag, rel_item_path) + Tag.add_tag('item', tag, rel_item_path) for tag in ltagsgalaxies: - add_item_tag(tag, rel_item_path) + Tag.add_tag('item', tag, rel_item_path) r_serv_log_submit.incr(uuid + ':nb_end') r_serv_log_submit.incr(uuid + ':nb_sucess') @@ -108,37 +111,6 @@ def get_item_date(item_filename): l_directory = item_filename.split('/') return '{}{}{}'.format(l_directory[-4], l_directory[-3], l_directory[-2]) -def add_item_tag(tag, item_path): - item_date = int(get_item_date(item_path)) - - #add tag - r_serv_metadata.sadd('tag:{}'.format(item_path), tag) - r_serv_tags.sadd('{}:{}'.format(tag, item_date), item_path) - - r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1) - - tag_first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') - if tag_first_seen is None: - tag_first_seen = 99999999 - else: - tag_first_seen = int(tag_first_seen) - tag_last_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') - if tag_last_seen is None: - tag_last_seen = 0 - else: - tag_last_seen = int(tag_last_seen) - - #add new tag in list of all used tags - r_serv_tags.sadd('list_tags', tag) - - # update fisrt_seen/last_seen - if item_date < tag_first_seen: - r_serv_tags.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date) - - # update metadata last_seen - if item_date > tag_last_seen: - r_serv_tags.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date) - def verify_extention_filename(filename): if not '.' in filename: return True diff --git a/bin/update-background.py b/bin/update-background.py index a75eb60d..d1ec6eaf 100755 --- a/bin/update-background.py +++ b/bin/update-background.py @@ -86,11 +86,31 @@ if __name__ == "__main__": update_file = os.path.join(os.environ['AIL_HOME'], 'update', new_version, 'Update_screenshots.py') process = subprocess.run(['python' ,update_file]) - update_progress = r_serv_db.get('ail:current_background_script_stat') + update_progress = r_serv.get('ail:current_background_script_stat') if update_progress: if int(update_progress) == 100: r_serv.delete('ail:update_in_progress') r_serv.delete('ail:current_background_script') r_serv.delete('ail:current_background_script_stat') r_serv.delete('ail:current_background_update') - r_serv_db.srem('ail:to_update', new_version) + r_serv.srem('ail:to_update', new_version) + + elif r_serv.sismember('ail:to_update', 'v2.7'): + new_version = 'v2.7' + r_serv.delete('ail:update_error') + r_serv.delete('ail:current_background_script_stat') + r_serv.set('ail:update_in_progress', new_version) + r_serv.set('ail:current_background_update', new_version) + r_serv.set('ail:current_background_script', 'domain tags update') + + update_file = os.path.join(os.environ['AIL_HOME'], 'update', new_version, 'Update_domain_tags.py') + process = subprocess.run(['python' ,update_file]) + + update_progress = r_serv.get('ail:current_background_script_stat') + if update_progress: + if int(update_progress) == 100: + r_serv.delete('ail:update_in_progress') + r_serv.delete('ail:current_background_script') + r_serv.delete('ail:current_background_script_stat') + r_serv.delete('ail:current_background_update') + r_serv.srem('ail:to_update', new_version) diff --git a/tests/testApi.py b/tests/testApi.py index db65cdec..6d9fc191 100644 --- a/tests/testApi.py +++ b/tests/testApi.py @@ -128,7 +128,7 @@ class TestApiV1(unittest.TestCase): # POST api/v1/add/item/tag def test_0007_api_add_item_tag(self): tags_to_add = ["infoleak:analyst-detection=\"api-key\""] - current_item_tag = Tag.get_item_tags(self.__class__.item_id) + current_item_tag = Tag.get_obj_tag(self.__class__.item_id) current_item_tag.append(tags_to_add[0]) #galaxy_to_add = ["misp-galaxy:stealer=\"Vidar\""] @@ -138,7 +138,7 @@ class TestApiV1(unittest.TestCase): item_tags = req_json['tags'] self.assertEqual(item_tags, tags_to_add) - new_item_tag = Tag.get_item_tags(self.__class__.item_id) + new_item_tag = Tag.get_obj_tag(self.__class__.item_id) self.assertCountEqual(new_item_tag, current_item_tag) # DELETE api/v1/delete/item/tag @@ -149,7 +149,7 @@ class TestApiV1(unittest.TestCase): req_json = parse_response(self, req) item_tags = req_json['tags'] self.assertCountEqual(item_tags, tags_to_delete) - current_item_tag = Tag.get_item_tags(self.__class__.item_id) + current_item_tag = Tag.get_obj_tag(self.__class__.item_id) if tags_to_delete[0] in current_item_tag: self.fail('Tag no deleted') diff --git a/update/v1.5/Update-ARDB_Tags.py b/update/v1.5/Update-ARDB_Tags.py index f94fc03c..76baa414 100755 --- a/update/v1.5/Update-ARDB_Tags.py +++ b/update/v1.5/Update-ARDB_Tags.py @@ -65,6 +65,9 @@ if __name__ == '__main__': tag_metadata[tag]['last_seen'] = int(tag_metadata[tag]['last_seen']) nb_tags_to_update += r_serv_tag.scard(tag) + if nb_tags_to_update == 0: + nb_tags_to_update = 1 + for tag in tags_list: all_item = r_serv_tag.smembers(tag) diff --git a/update/v2.4/Update_domain.py b/update/v2.4/Update_domain.py index 94d3407a..addaedb6 100755 --- a/update/v2.4/Update_domain.py +++ b/update/v2.4/Update_domain.py @@ -28,9 +28,9 @@ def update_update_stats(): def update_domain_by_item(domain_obj, item_id): domain_name = domain_obj.get_domain_name() # update domain tags - for tag in Tag.get_item_tags(item_id): + for tag in Tag.get_obj_tag(item_id): if tag != 'infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"': - Tag.add_domain_tag(tag, domain_name, Item.get_item_date(item_id)) + Tag.add_tag("domain", tag, domain_name, obj_date=Item.get_item_date(item_id)) # update domain correlation item_correlation = Item.get_item_all_correlation(item_id) diff --git a/update/v2.7/Update.py b/update/v2.7/Update.py new file mode 100755 index 00000000..1f4ead2c --- /dev/null +++ b/update/v2.7/Update.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import re +import sys +import time +import redis +import datetime + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +new_version = 'v2.7' + +if __name__ == '__main__': + + start_deb = time.time() + + config_loader = ConfigLoader.ConfigLoader() + r_serv = config_loader.get_redis_conn("ARDB_DB") + r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") + config_loader = None + + #Set current update_in_progress + r_serv.set('ail:update_in_progress', new_version) + r_serv.set('ail:current_background_update', new_version) + + r_serv.sadd('ail:to_update', new_version) + + #### Update tags #### + r_serv_tags.sunionstore('list_tags:item', 'list_tags', []) + r_serv_onion.sunionstore('domain_update_v2.7', 'full_onion_up', []) + r_serv_onion.delete('incorrect_domain') + r_serv.set('ail:update_v2.7:deletetagrange', 1) + #### #### + + #Set current ail version + r_serv.set('ail:version', new_version) + + #Set current ail version + r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v2.7/Update.sh b/update/v2.7/Update.sh new file mode 100755 index 00000000..8f9a4efd --- /dev/null +++ b/update/v2.7/Update.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; + +export PATH=$AIL_HOME:$PATH +export PATH=$AIL_REDIS:$PATH +export PATH=$AIL_ARDB:$PATH +export PATH=$AIL_BIN:$PATH +export PATH=$AIL_FLASK:$PATH + +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" + +echo -e $GREEN"Shutting down AIL ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +bash ${AIL_BIN}/LAUNCH.sh -lav & +wait +echo "" + +echo "" +echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT +echo "" +python ${AIL_HOME}/update/v2.7/Update.py +wait +echo "" +echo "" + +echo "" +echo -e $GREEN"Shutting down ARDB ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +exit 0 diff --git a/update/v2.7/Update_domain_tags.py b/update/v2.7/Update_domain_tags.py new file mode 100755 index 00000000..937110af --- /dev/null +++ b/update/v2.7/Update_domain_tags.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import time +import redis +import datetime + +from hashlib import sha256 + +from pyfaup.faup import Faup + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) +import Date + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader +import Tag + +def sanitize_domain(domain): + faup.decode(domain) + domain_sanitized = faup.get() + domain_sanitized = domain_sanitized['domain'] + try: + domain_sanitized = domain_sanitized.decode() + except: + pass + return domain_sanitized.lower() + +def delete_domain_tag_daterange(): + all_domains_tags = Tag.get_all_obj_tags('domain') + nb_updated = 0 + nb_to_update = len(all_domains_tags) + if nb_to_update == 0: + nb_to_update = 1 + refresh_time = time.time() + l_dates = Date.substract_date('20191008', Date.get_today_date_str()) + for tag in all_domains_tags: + for date_day in l_dates: + r_serv_tags.delete('domain:{}:{}'.format(tag, date_day)) + nb_updated += 1 + refresh_time = update_progress(refresh_time, nb_updated, nb_to_update) + r_serv_db.delete('ail:update_v2.7:deletetagrange') + +def update_domain_tags(domain): + domain_sanitized = sanitize_domain(domain) + if domain != domain_sanitized: + r_serv_onion.sadd('incorrect_domain', domain) + domain = domain_sanitized + + domain_tags = Tag.get_obj_tag(domain) + for tag in domain_tags: + # delete incorrect tags + if tag == 'infoleak:submission="crawler"' or tag == 'infoleak:submission="manual"': + r_serv_metadata.srem('tag:{}'.format(domain), tag) + else: + Tag.add_global_tag(tag, object_type='domain') + r_serv_tags.sadd('{}:{}'.format('domain', tag), domain) + +def update_progress(refresh_time, nb_updated, nb_elem_to_update): + if time.time() - refresh_time > 10: + progress = int((nb_updated * 100) / nb_elem_to_update) + print('{}/{} updated {}%'.format(nb_updated, nb_elem_to_update, progress)) + r_serv_db.set('ail:current_background_script_stat', progress) + refresh_time = time.time() + + return refresh_time + +def update_db(): + nb_updated = 0 + nb_to_update = r_serv_onion.scard('domain_update_v2.7') + refresh_time = time.time() + r_serv_db.set('ail:current_background_script_stat', 0) + r_serv_db.set('ail:current_background_script', 'domain tags update') + domain = r_serv_onion.spop('domain_update_v2.7') + while domain is not None: + update_domain_tags(domain) + nb_updated += 1 + refresh_time = update_progress(refresh_time, nb_updated, nb_to_update) + domain = r_serv_onion.spop('domain_update_v2.7') + if r_serv_db.exists('ail:update_v2.7:deletetagrange'): + r_serv_db.set('ail:current_background_script_stat', 0) + r_serv_db.set('ail:current_background_script', 'tags: remove deprecated keys') + delete_domain_tag_daterange() + + # sort all crawled domain + r_serv_onion.sort('full_onion_up', alpha=True) + r_serv_onion.sort('full_regular_up', alpha=True) + +if __name__ == '__main__': + + start_deb = time.time() + faup = Faup() + + config_loader = ConfigLoader.ConfigLoader() + + r_serv_db = config_loader.get_redis_conn("ARDB_DB") + r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") + r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") + config_loader = None + + update_version = 'v2.7' + + r_serv_db.set('ail:update_in_progress', update_version) + r_serv_db.set('ail:current_background_update', update_version) + + r_serv_db.set('ail:current_background_script_stat', 0) + r_serv_db.set('ail:current_background_script', 'tags update') + + update_db() + + r_serv_db.set('ail:current_background_script_stat', 100) + + + end = time.time() + print('ALL domains tags updated in {} s'.format(end - start_deb)) + + r_serv_db.delete('ail:update_in_progress') + r_serv_db.delete('ail:current_background_script') + r_serv_db.delete('ail:current_background_script_stat') + r_serv_db.delete('ail:current_background_update') + r_serv_db.srem('ail:to_update', update_version) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 728222ca..201c080d 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -39,6 +39,8 @@ import Flask_config from blueprints.root import root from blueprints.crawler_splash import crawler_splash from blueprints.correlation import correlation +from blueprints.tags_ui import tags_ui + Flask_dir = os.environ['AIL_FLASK'] @@ -85,6 +87,7 @@ app.config['MAX_CONTENT_LENGTH'] = 900 * 1024 * 1024 app.register_blueprint(root, url_prefix=baseUrl) app.register_blueprint(crawler_splash, url_prefix=baseUrl) app.register_blueprint(correlation, url_prefix=baseUrl) +app.register_blueprint(tags_ui, url_prefix=baseUrl) # ========= =========# # ========= session ======== diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index bb728a65..155b3090 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -45,13 +45,18 @@ def api_validator(api_response): # ============= ROUTES ============== # add route : /crawlers/show_domain -@crawler_splash.route('/crawlers/showDomain') +@crawler_splash.route('/crawlers/showDomain', methods=['GET', 'POST']) @login_required @login_read_only def showDomain(): - domain_name = request.args.get('domain') - epoch = request.args.get('epoch') - port = request.args.get('port') + if request.method == 'POST': + domain_name = request.form.get('in_show_domain') + epoch = None + port = None + else: + domain_name = request.args.get('domain') + epoch = request.args.get('epoch') + port = request.args.get('port') res = api_validator(Domain.api_verify_if_domain_exist(domain_name)) if res: @@ -73,4 +78,4 @@ def showDomain(): dict_domain['crawler_history']['random_item'] = random.choice(dict_domain['crawler_history']['items']) return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label, - modal_add_tags=Tag.get_modal_add_tags(dict_domain['domain'], tag_type="domain")) + modal_add_tags=Tag.get_modal_add_tags(dict_domain['domain'], object_type="domain")) diff --git a/var/www/blueprints/tags_ui.py b/var/www/blueprints/tags_ui.py new file mode 100644 index 00000000..9f5e8e7b --- /dev/null +++ b/var/www/blueprints/tags_ui.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys +import json +import random + +from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response +from flask_login import login_required, current_user, login_user, logout_user + +sys.path.append('modules') +import Flask_config + +# Import Role_Manager +from Role_Manager import create_user_db, check_password_strength, check_user_role_integrity +from Role_Manager import login_admin, login_analyst, login_read_only + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) +import Date +import Tag + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) +import Correlate_object + +r_cache = Flask_config.r_cache +r_serv_db = Flask_config.r_serv_db +r_serv_tags = Flask_config.r_serv_tags +bootstrap_label = Flask_config.bootstrap_label + +# ============ BLUEPRINT ============ +tags_ui = Blueprint('tags_ui', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/tags')) + +# ============ VARIABLES ============ + + + +# ============ FUNCTIONS ============ + + +# ============= ROUTES ============== +@tags_ui.route('/tag/add_tags') +@login_required +@login_analyst +def add_tags(): + + tags = request.args.get('tags') + tagsgalaxies = request.args.get('tagsgalaxies') + object_id = request.args.get('object_id') + object_type = request.args.get('object_type') + + list_tag = tags.split(',') + list_tag_galaxies = tagsgalaxies.split(',') + + res = Tag.api_add_obj_tags(tags=list_tag, galaxy_tags=list_tag_galaxies, object_id=object_id, object_type=object_type) + # error + if res[1] != 200: + return str(res[0]) + + return redirect(Correlate_object.get_item_url(object_type, object_id)) + +@tags_ui.route('/tag/delete_tag') +@login_required +@login_analyst +def delete_tag(): + + object_type = request.args.get('object_type') + object_id = request.args.get('object_id') + tag = request.args.get('tag') + + res = Tag.api_delete_obj_tags(tags=[tag], object_id=object_id, object_type=object_type) + if res[1] != 200: + return str(res[0]) + return redirect(Correlate_object.get_item_url(object_type, object_id)) + + +@tags_ui.route('/tag/get_all_tags') +@login_required +@login_read_only +def get_all_tags(): + return jsonify(Tag.get_all_tags()) + +@tags_ui.route('/tag/get_all_obj_tags') +@login_required +@login_read_only +def get_all_obj_tags(): + object_type = request.args.get('object_type') + res = Correlate_object.sanitize_object_type(object_type) + if res: + return jsonify(res) + return jsonify(Tag.get_all_obj_tags(object_type)) + +@tags_ui.route('/tag/search/domain') +@login_required +@login_read_only +def tags_search_domains(): + object_type = 'domain' + dict_tagged = {"object_type":object_type, "object_name":object_type.title() + "s"} + return render_template("tags/search_obj_by_tags.html", bootstrap_label=bootstrap_label, dict_tagged=dict_tagged) + +@tags_ui.route('/tag/search/image') +@login_required +@login_read_only +def tags_search_images(): + object_type = 'image' + dict_tagged = {"object_type":object_type, "object_name":object_type.title() + "s"} + return render_template("tags/search_obj_by_tags.html", bootstrap_label=bootstrap_label, dict_tagged=dict_tagged) + +@tags_ui.route('/tag/search/get_obj_by_tags') +@login_required +@login_read_only +def get_obj_by_tags(): + + # # TODO: sanityze all + object_type = request.args.get('object_type') + ltags = request.args.get('ltags') + page = request.args.get('page') + date_from = request.args.get('ltags') + date_to = request.args.get('ltags') + + # unpack tags + list_tags = ltags.split(',') + list_tag = [] + for tag in list_tags: + list_tag.append(tag.replace('"','\"')) + + # object_type + res = Correlate_object.sanitize_object_type(object_type) + if res: + return jsonify(res) + + # page + try: + page = int(page) + except: + page = 1 + + dict_obj = Tag.get_obj_by_tags(object_type, list_tag, page=page) + + if dict_obj['tagged_obj']: + dict_tagged = {"object_type":object_type, "object_name":object_type.title() + "s", + "tagged_obj":[], "page":dict_obj['page'] ,"nb_pages":dict_obj['nb_pages'], + "nb_first_elem":dict_obj['nb_first_elem'], "nb_last_elem":dict_obj['nb_last_elem'], "nb_all_elem":dict_obj['nb_all_elem']} + for obj_id in dict_obj['tagged_obj']: + obj_metadata = Correlate_object.get_object_metadata(object_type, obj_id) + obj_metadata['id'] = obj_id + dict_tagged["tagged_obj"].append(obj_metadata) + + dict_tagged['tab_keys'] = Correlate_object.get_obj_tag_table_keys(object_type) + + if len(list_tag) == 1: + dict_tagged['current_tags'] = [ltags.replace('"', '\"')] + else: + dict_tagged['current_tags'] = list_tag + dict_tagged['current_tags_str'] = ltags + + #return jsonify(dict_tagged) + else: + dict_tagged = {"object_type":object_type, "object_name":object_type.title() + "s"} + + return render_template("tags/search_obj_by_tags.html", bootstrap_label=bootstrap_label, dict_tagged=dict_tagged) diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index fcedd7e7..6e4e17cc 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -90,7 +90,9 @@ dict_update_description = {'v1.5':{'nb_background_update': 5, 'update_warning_me 'v2.4':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Tags/Correlation can be', 'update_warning_message_notice_me': 'missing from the UI.'}, 'v2.6':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Tags/Correlation can be', - 'update_warning_message_notice_me': 'missing from the UI.'} + 'update_warning_message_notice_me': 'missing from the UI.'}, + 'v2.7':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Tags can be', + 'update_warning_message_notice_me': 'missing from the UI.'} } UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted') diff --git a/var/www/modules/Tags/Flask_Tags.py b/var/www/modules/Tags/Flask_Tags.py index cd97e99d..d2252877 100644 --- a/var/www/modules/Tags/Flask_Tags.py +++ b/var/www/modules/Tags/Flask_Tags.py @@ -369,15 +369,14 @@ def get_tags_galaxy(): @Tags.route("/Tags/remove_tag") @login_required @login_analyst -def remove_tag(): +def remove_tag(): #TODO remove me , used by showpaste - #TODO verify input path = request.args.get('paste') tag = request.args.get('tag') - res = Tag.remove_item_tag(tag, path) + res = Tag.api_delete_obj_tags(tags=[tag], object_id=path, object_type="item") if res[1] != 200: - str(res[0]) + return str(res[0]) return redirect(url_for('showsavedpastes.showsavedpaste', paste=path)) @Tags.route("/Tags/confirm_tag") @@ -390,11 +389,11 @@ def confirm_tag(): tag = request.args.get('tag') if(tag[9:28] == 'automatic-detection'): - Tag.remove_item_tag(tag, path) + Tag.api_delete_obj_tags(tags=[tag], object_id=path, object_type="item") tag = tag.replace('automatic-detection','analyst-detection', 1) #add analyst tag - Tag.add_item_tag(tag, path) + Tag.add_tag('item', tag, path) return redirect(url_for('showsavedpastes.showsavedpaste', paste=path)) @@ -422,49 +421,6 @@ def tag_validation(): else: return 'input error' -@Tags.route("/Tags/addTags") -@login_required -@login_analyst -def addTags(): - - tags = request.args.get('tags') - tagsgalaxies = request.args.get('tagsgalaxies') - path = request.args.get('path') - - list_tag = tags.split(',') - list_tag_galaxies = tagsgalaxies.split(',') - - res = Tag.add_items_tags(list_tag, list_tag_galaxies, item_id=path) - print(res) - # error - if res[1] != 200: - return str(res[0]) - # success - return redirect(url_for('showsavedpastes.showsavedpaste', paste=path)) - -@Tags.route("/Tags/add_item_tags") -@login_required -@login_analyst -def add_item_tags(): - - tags = request.args.get('tags') - tagsgalaxies = request.args.get('tagsgalaxies') - item_id = request.args.get('item_id') - item_type = request.args.get('type') - - list_tag = tags.split(',') - list_tag_galaxies = tagsgalaxies.split(',') - - res = Tag.add_items_tags(tags=list_tag, galaxy_tags=list_tag_galaxies, item_id=item_id, item_type=item_type) - # error - if res[1] != 200: - return str(res[0]) - # success - if item_type=='domain': - return redirect(url_for('crawler_splash.showDomain', domain=item_id)) - else: - return redirect(url_for('showsavedpastes.showsavedpaste', paste=item_id)) - @Tags.route("/Tags/taxonomies") @login_required @login_read_only diff --git a/var/www/modules/Tags/templates/Tags.html b/var/www/modules/Tags/templates/Tags.html index d538269c..b4fb85c8 100644 --- a/var/www/modules/Tags/templates/Tags.html +++ b/var/www/modules/Tags/templates/Tags.html @@ -217,8 +217,8 @@ var last_clicked_paste; var can_change_modal_content = true; $(document).ready(function(){ - $("#nav_quick_search").removeClass("text-muted"); - $("#nav_tag_{{tag_nav}}").addClass("active"); + $("#nav_tags_search").removeClass("text-muted"); + $("#nav_tags_search_item").addClass("active"); search_table = $('#myTable_').DataTable({ "order": [[ 0, "asc" ]] }); // Use to bind the button with the new displayed data diff --git a/var/www/modules/hiddenServices/templates/Crawler_dashboard.html b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html index 769beb7f..d72daea6 100644 --- a/var/www/modules/hiddenServices/templates/Crawler_dashboard.html +++ b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html @@ -105,6 +105,29 @@ +
+
+
+
Show Domain:
+
+
+ +
+ +
+
+
+
+
+
+ + + {% with object_type='domain' %} + {% include 'tags/block_obj_tags_search.html' %} + {% endwith %} + diff --git a/var/www/modules/restApi/Flask_restApi.py b/var/www/modules/restApi/Flask_restApi.py index 8f777790..308e8146 100644 --- a/var/www/modules/restApi/Flask_restApi.py +++ b/var/www/modules/restApi/Flask_restApi.py @@ -246,11 +246,11 @@ def add_item_tags(): if not data: return Response(json.dumps({'status': 'error', 'reason': 'Malformed JSON'}, indent=2, sort_keys=True), mimetype='application/json'), 400 - item_id = data.get('id', None) + object_id = data.get('id', None) tags = data.get('tags', []) galaxy = data.get('galaxy', []) - res = Tag.add_items_tag(tags, galaxy, item_id) + res = Tag.api_add_obj_tags(tags=tags, galaxy_tags=galaxy, object_id=object_id, object_type="item") return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -275,10 +275,10 @@ def delete_item_tags(): if not data: return Response(json.dumps({'status': 'error', 'reason': 'Malformed JSON'}, indent=2, sort_keys=True), mimetype='application/json'), 400 - item_id = data.get('id', None) + object_id = data.get('id', None) tags = data.get('tags', []) - res = Tag.remove_item_tags(tags, item_id) + res = Tag.api_delete_obj_tags(tags=tags, object_id=object_id, object_type="item") return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # diff --git a/var/www/modules/showpaste/templates/show_saved_paste.html b/var/www/modules/showpaste/templates/show_saved_paste.html index 7b2b265b..b2c0a8bf 100644 --- a/var/www/modules/showpaste/templates/show_saved_paste.html +++ b/var/www/modules/showpaste/templates/show_saved_paste.html @@ -570,7 +570,7 @@ var tags = ltags.getValue() var tagsgalaxy = ltagsgalaxies.getValue() var path = '{{ request.args.get('paste') }}' - window.location.replace("{{ url_for('Tags.addTags') }}?tags=" + tags + "&tagsgalaxies=" + tagsgalaxy + "&path=" + path); + window.location.replace("{{ url_for('tags_ui.add_tags') }}?tags=" + tags + "&tagsgalaxies=" + tagsgalaxy + "&object_id=" + path + "&object_type=item"); } diff --git a/var/www/templates/crawler/crawler_splash/showDomain.html b/var/www/templates/crawler/crawler_splash/showDomain.html index 0026cb25..80912dcc 100644 --- a/var/www/templates/crawler/crawler_splash/showDomain.html +++ b/var/www/templates/crawler/crawler_splash/showDomain.html @@ -79,10 +79,13 @@
+ {% include 'modals/edit_tag.html' %} {% for tag in dict_domain['tags'] %} - - {{ tag }} - + + {% endfor %}
{% include 'modals/add_tags.html' %} diff --git a/var/www/templates/modals/add_tags.html b/var/www/templates/modals/add_tags.html index 98cb5479..2f06811b 100644 --- a/var/www/templates/modals/add_tags.html +++ b/var/www/templates/modals/add_tags.html @@ -126,6 +126,6 @@ jQuery("#all-tags-galaxies").click(function(e){ function addTags() { var tags = ltags.getValue() var tagsgalaxy = ltagsgalaxies.getValue() - window.location.replace("{{ url_for('Tags.add_item_tags') }}?tags=" + tags + "&tagsgalaxies=" + tagsgalaxy + "&item_id={{ modal_add_tags['item_id'] }}&type={{ modal_add_tags['type'] }}"); + window.location.replace("{{ url_for('tags_ui.add_tags') }}?tags=" + tags + "&tagsgalaxies=" + tagsgalaxy + "&object_id={{ modal_add_tags['object_id'] }}&object_type={{ modal_add_tags['object_type'] }}"); } diff --git a/var/www/templates/modals/edit_tag.html b/var/www/templates/modals/edit_tag.html new file mode 100644 index 00000000..c7455631 --- /dev/null +++ b/var/www/templates/modals/edit_tag.html @@ -0,0 +1,49 @@ + + + diff --git a/var/www/templates/pagination.html b/var/www/templates/pagination.html new file mode 100644 index 00000000..3da2e289 --- /dev/null +++ b/var/www/templates/pagination.html @@ -0,0 +1,50 @@ +
+
+ +
+ + {%if nb_all_elem%} +
+ + {{object_name}}:  + {{nb_first_elem}}-{{nb_last_elem}} + / + {{nb_all_elem}} + +
+
+
+
+ {%endif%} +
diff --git a/var/www/templates/tags/block_obj_tags_search.html b/var/www/templates/tags/block_obj_tags_search.html new file mode 100644 index 00000000..8669f4db --- /dev/null +++ b/var/www/templates/tags/block_obj_tags_search.html @@ -0,0 +1,103 @@ +
+
+
Search {{object_name}} by Tags :
+
+
+ + + +
+
+ +
+ +
+ + + +
+
+ + + + + diff --git a/var/www/templates/tags/menu_sidebar.html b/var/www/templates/tags/menu_sidebar.html index 2183e3a7..2bcff464 100644 --- a/var/www/templates/tags/menu_sidebar.html +++ b/var/www/templates/tags/menu_sidebar.html @@ -6,6 +6,29 @@