diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py index 08265ad8..1e311a02 100755 --- a/bin/packages/Tag.py +++ b/bin/packages/Tag.py @@ -20,6 +20,21 @@ r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") config_loader = None +def build_unsafe_tags(): + unsafe_tags = set() + ## CE content + unsafe_tags.add('dark-web:topic="pornography-child-exploitation"') + # add copine-scale tags + taxonomies = Taxonomies() + copine_scale = taxonomies.get('copine-scale') + if copine_scale: + for tag in copine_scale.machinetags(): + unsafe_tags.add(tag) + return unsafe_tags + +# set of unsafe tags +unsafe_tags = build_unsafe_tags() + def get_taxonomie_from_tag(tag): return tag.split(':')[0] @@ -95,6 +110,17 @@ def get_tag_metadata(tag): last_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') return {'tag': tag, 'first_seen': first_seen, 'last_seen': last_seen} +def is_tags_safe(ltags): + ''' + Check if a list of tags contain an unsafe tag (CE, ...) + + :param ltags: list of tags + :type ltags: list + :return: is a tag in the unsafe set + :rtype: boolean + ''' + return unsafe_tags.isdisjoint(ltags) + def is_tag_in_all_tag(tag): if r_serv_tags.sismember('list_tags', tag): return True diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index 2f142a9c..3b8c68b1 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -21,7 +21,7 @@ from Role_Manager import create_user_db, check_password_strength, check_user_rol from Role_Manager import login_admin, login_analyst sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -from Tag import get_modal_add_tags +import Tag sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) import Domain @@ -65,9 +65,10 @@ def showDomain(): dict_domain = {**dict_domain, **domain.get_domain_correlation()} dict_domain['origin_item'] = domain.get_domain_last_origin() dict_domain['tags'] = domain.get_domain_tags() + dict_domain['tags_safe'] = Tag.is_tags_safe(dict_domain['tags']) dict_domain['history'] = domain.get_domain_history_with_status() dict_domain['crawler_history'] = domain.get_domain_items_crawled(items_link=True, epoch=epoch, item_screenshot=True, item_tag=True) # # TODO: handle multiple port dict_domain['crawler_history']['random_item'] = random.choice(dict_domain['crawler_history']['items']) return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label, - modal_add_tags=get_modal_add_tags(dict_domain['domain'], tag_type="domain")) + modal_add_tags=Tag.get_modal_add_tags(dict_domain['domain'], tag_type="domain")) diff --git a/var/www/templates/crawler/crawler_splash/showDomain.html b/var/www/templates/crawler/crawler_splash/showDomain.html index 75bc88b6..90e87fee 100644 --- a/var/www/templates/crawler/crawler_splash/showDomain.html +++ b/var/www/templates/crawler/crawler_splash/showDomain.html @@ -366,12 +366,18 @@