chg: [titles] add yara tracker on title + tags domains if unsafe title tags

This commit is contained in:
Terrtia 2023-08-23 11:16:22 +02:00
parent 045aab6f34
commit f44c5509da
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
5 changed files with 18 additions and 4 deletions

View file

@ -22,6 +22,7 @@ from lib.objects.Domains import Domain
from lib.objects.Items import Item
from lib.objects import Screenshots
from lib.objects import Titles
from trackers.Tracker_Yara import Tracker_Yara
logging.config.dictConfig(ail_logger.get_config(name='crawlers'))
@ -35,6 +36,8 @@ class Crawler(AbstractModule):
# Waiting time in seconds between to message processed
self.pending_seconds = 1
self.tracker_yara = Tracker_Yara(queue=False)
config_loader = ConfigLoader()
self.default_har = config_loader.get_config_boolean('Crawler', 'default_har')
@ -283,6 +286,12 @@ class Crawler(AbstractModule):
if title_content:
title = Titles.create_title(title_content)
title.add(item.get_date(), item_id)
# Tracker
self.tracker_yara.compute(title.get_id(), obj_type=title.get_type())
if not title.is_tags_safe():
unsafe_tag = 'dark-web:topic="pornography-child-exploitation"'
self.domain.add_tag(unsafe_tag)
item.add_tag(unsafe_tag)
# SCREENSHOT
if self.screenshot:

View file

@ -923,7 +923,7 @@ def api_add_tracker(dict_input, user_id):
# Filters # TODO MOVE ME
filters = dict_input.get('filters', {})
if filters:
if filters.keys() == {'decoded', 'item', 'pgp'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
if filters.keys() == {'decoded', 'item', 'pgp', 'title'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
filters = {}
for obj_type in filters:
if obj_type not in get_objects_tracked():
@ -998,7 +998,7 @@ def api_edit_tracker(dict_input, user_id):
# Filters # TODO MOVE ME
filters = dict_input.get('filters', {})
if filters:
if filters.keys() == {'decoded', 'item', 'pgp'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
if filters.keys() == {'decoded', 'item', 'pgp', 'title'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
if not filters['decoded'] and not filters['item']:
filters = {}
for obj_type in filters:

View file

@ -50,7 +50,7 @@ def get_object_all_subtypes(obj_type):
return []
def get_objects_tracked():
return ['decoded', 'item', 'pgp']
return ['decoded', 'item', 'pgp', 'title']
def get_objects_retro_hunted():
return ['decoded', 'item']

View file

@ -45,6 +45,8 @@ class Title(AbstractDaterangeObject):
def get_content(self, r_type='str'):
if r_type == 'str':
return self._get_field('content')
elif r_type == 'bytes':
return self._get_field('content').encode()
def get_link(self, flask_context=False):
if flask_context:
@ -122,4 +124,3 @@ class Titles(AbstractDaterangeObjects):
# # print(r)
# r = titles.search_by_id('f7d57B', r_pos=True, case_sensitive=False)
# print(r)

View file

@ -132,6 +132,10 @@
</div>
</div>
</div>
<div class="custom-control custom-switch mt-1">
<input class="custom-control-input" type="checkbox" name="title_obj" id="title_obj" checked="">
<label class="custom-control-label" for="title_obj"><i class="fas fa-lock-open"></i>&nbsp;Decoded <i class="fas fa-heading text-info" data-toggle="tooltip" data-placement="right" title="Title that has been extracted from a HTML page"></i></label>
</div>
{# <div class="custom-control custom-switch mt-1">#}
{# <input class="custom-control-input" type="checkbox" name="level" id="screenshot_obj" checked="">#}