mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
chg: [titles] add yara tracker on title + tags domains if unsafe title tags
This commit is contained in:
parent
045aab6f34
commit
f44c5509da
5 changed files with 18 additions and 4 deletions
|
@ -22,6 +22,7 @@ from lib.objects.Domains import Domain
|
|||
from lib.objects.Items import Item
|
||||
from lib.objects import Screenshots
|
||||
from lib.objects import Titles
|
||||
from trackers.Tracker_Yara import Tracker_Yara
|
||||
|
||||
logging.config.dictConfig(ail_logger.get_config(name='crawlers'))
|
||||
|
||||
|
@ -35,6 +36,8 @@ class Crawler(AbstractModule):
|
|||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 1
|
||||
|
||||
self.tracker_yara = Tracker_Yara(queue=False)
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
|
||||
self.default_har = config_loader.get_config_boolean('Crawler', 'default_har')
|
||||
|
@ -283,6 +286,12 @@ class Crawler(AbstractModule):
|
|||
if title_content:
|
||||
title = Titles.create_title(title_content)
|
||||
title.add(item.get_date(), item_id)
|
||||
# Tracker
|
||||
self.tracker_yara.compute(title.get_id(), obj_type=title.get_type())
|
||||
if not title.is_tags_safe():
|
||||
unsafe_tag = 'dark-web:topic="pornography-child-exploitation"'
|
||||
self.domain.add_tag(unsafe_tag)
|
||||
item.add_tag(unsafe_tag)
|
||||
|
||||
# SCREENSHOT
|
||||
if self.screenshot:
|
||||
|
|
|
@ -923,7 +923,7 @@ def api_add_tracker(dict_input, user_id):
|
|||
# Filters # TODO MOVE ME
|
||||
filters = dict_input.get('filters', {})
|
||||
if filters:
|
||||
if filters.keys() == {'decoded', 'item', 'pgp'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
||||
if filters.keys() == {'decoded', 'item', 'pgp', 'title'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
||||
filters = {}
|
||||
for obj_type in filters:
|
||||
if obj_type not in get_objects_tracked():
|
||||
|
@ -998,7 +998,7 @@ def api_edit_tracker(dict_input, user_id):
|
|||
# Filters # TODO MOVE ME
|
||||
filters = dict_input.get('filters', {})
|
||||
if filters:
|
||||
if filters.keys() == {'decoded', 'item', 'pgp'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
||||
if filters.keys() == {'decoded', 'item', 'pgp', 'title'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
||||
if not filters['decoded'] and not filters['item']:
|
||||
filters = {}
|
||||
for obj_type in filters:
|
||||
|
|
|
@ -50,7 +50,7 @@ def get_object_all_subtypes(obj_type):
|
|||
return []
|
||||
|
||||
def get_objects_tracked():
|
||||
return ['decoded', 'item', 'pgp']
|
||||
return ['decoded', 'item', 'pgp', 'title']
|
||||
|
||||
def get_objects_retro_hunted():
|
||||
return ['decoded', 'item']
|
||||
|
|
|
@ -45,6 +45,8 @@ class Title(AbstractDaterangeObject):
|
|||
def get_content(self, r_type='str'):
|
||||
if r_type == 'str':
|
||||
return self._get_field('content')
|
||||
elif r_type == 'bytes':
|
||||
return self._get_field('content').encode()
|
||||
|
||||
def get_link(self, flask_context=False):
|
||||
if flask_context:
|
||||
|
@ -122,4 +124,3 @@ class Titles(AbstractDaterangeObjects):
|
|||
# # print(r)
|
||||
# r = titles.search_by_id('f7d57B', r_pos=True, case_sensitive=False)
|
||||
# print(r)
|
||||
|
||||
|
|
|
@ -132,6 +132,10 @@
|
|||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="custom-control custom-switch mt-1">
|
||||
<input class="custom-control-input" type="checkbox" name="title_obj" id="title_obj" checked="">
|
||||
<label class="custom-control-label" for="title_obj"><i class="fas fa-lock-open"></i> Decoded <i class="fas fa-heading text-info" data-toggle="tooltip" data-placement="right" title="Title that has been extracted from a HTML page"></i></label>
|
||||
</div>
|
||||
|
||||
{# <div class="custom-control custom-switch mt-1">#}
|
||||
{# <input class="custom-control-input" type="checkbox" name="level" id="screenshot_obj" checked="">#}
|
||||
|
|
Loading…
Reference in a new issue