From f44c5509da842be5ec0756d042fad0d5d7d0a005 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 23 Aug 2023 11:16:22 +0200 Subject: [PATCH] chg: [titles] add yara tracker on title + tags domains if unsafe title tags --- bin/crawlers/Crawler.py | 9 +++++++++ bin/lib/Tracker.py | 4 ++-- bin/lib/ail_core.py | 2 +- bin/lib/objects/Titles.py | 3 ++- var/www/templates/hunter/tracker_add.html | 4 ++++ 5 files changed, 18 insertions(+), 4 deletions(-) diff --git a/bin/crawlers/Crawler.py b/bin/crawlers/Crawler.py index 7f2c3df9..c22f6ccf 100755 --- a/bin/crawlers/Crawler.py +++ b/bin/crawlers/Crawler.py @@ -22,6 +22,7 @@ from lib.objects.Domains import Domain from lib.objects.Items import Item from lib.objects import Screenshots from lib.objects import Titles +from trackers.Tracker_Yara import Tracker_Yara logging.config.dictConfig(ail_logger.get_config(name='crawlers')) @@ -35,6 +36,8 @@ class Crawler(AbstractModule): # Waiting time in seconds between to message processed self.pending_seconds = 1 + self.tracker_yara = Tracker_Yara(queue=False) + config_loader = ConfigLoader() self.default_har = config_loader.get_config_boolean('Crawler', 'default_har') @@ -283,6 +286,12 @@ class Crawler(AbstractModule): if title_content: title = Titles.create_title(title_content) title.add(item.get_date(), item_id) + # Tracker + self.tracker_yara.compute(title.get_id(), obj_type=title.get_type()) + if not title.is_tags_safe(): + unsafe_tag = 'dark-web:topic="pornography-child-exploitation"' + self.domain.add_tag(unsafe_tag) + item.add_tag(unsafe_tag) # SCREENSHOT if self.screenshot: diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py index f1ea8905..c06e303d 100755 --- a/bin/lib/Tracker.py +++ b/bin/lib/Tracker.py @@ -923,7 +923,7 @@ def api_add_tracker(dict_input, user_id): # Filters # TODO MOVE ME filters = dict_input.get('filters', {}) if filters: - if filters.keys() == {'decoded', 'item', 'pgp'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}: + if filters.keys() == {'decoded', 'item', 'pgp', 'title'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}: filters = {} for obj_type in filters: if obj_type not in get_objects_tracked(): @@ -998,7 +998,7 @@ def api_edit_tracker(dict_input, user_id): # Filters # TODO MOVE ME filters = dict_input.get('filters', {}) if filters: - if filters.keys() == {'decoded', 'item', 'pgp'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}: + if filters.keys() == {'decoded', 'item', 'pgp', 'title'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}: if not filters['decoded'] and not filters['item']: filters = {} for obj_type in filters: diff --git a/bin/lib/ail_core.py b/bin/lib/ail_core.py index 75520a2b..9a7d9557 100755 --- a/bin/lib/ail_core.py +++ b/bin/lib/ail_core.py @@ -50,7 +50,7 @@ def get_object_all_subtypes(obj_type): return [] def get_objects_tracked(): - return ['decoded', 'item', 'pgp'] + return ['decoded', 'item', 'pgp', 'title'] def get_objects_retro_hunted(): return ['decoded', 'item'] diff --git a/bin/lib/objects/Titles.py b/bin/lib/objects/Titles.py index 9f88426c..1a29d58e 100755 --- a/bin/lib/objects/Titles.py +++ b/bin/lib/objects/Titles.py @@ -45,6 +45,8 @@ class Title(AbstractDaterangeObject): def get_content(self, r_type='str'): if r_type == 'str': return self._get_field('content') + elif r_type == 'bytes': + return self._get_field('content').encode() def get_link(self, flask_context=False): if flask_context: @@ -122,4 +124,3 @@ class Titles(AbstractDaterangeObjects): # # print(r) # r = titles.search_by_id('f7d57B', r_pos=True, case_sensitive=False) # print(r) - diff --git a/var/www/templates/hunter/tracker_add.html b/var/www/templates/hunter/tracker_add.html index 7cc690ba..05266fa4 100644 --- a/var/www/templates/hunter/tracker_add.html +++ b/var/www/templates/hunter/tracker_add.html @@ -132,6 +132,10 @@ +
+ + +
{#
#} {# #}