mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-26 15:57:16 +00:00
chg: [crawler] add unsafe tag if domain contain unsafe screenshot
This commit is contained in:
parent
37c71b8438
commit
6b60041db2
1 changed files with 11 additions and 2 deletions
|
@ -15,6 +15,7 @@ from modules.abstract_module import AbstractModule
|
|||
from lib import crawlers
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib.objects.Domains import Domain
|
||||
from lib.objects.Items import Item
|
||||
from lib.objects import Screenshots
|
||||
|
||||
|
||||
|
@ -53,6 +54,9 @@ class Crawler(AbstractModule):
|
|||
self.items_dir = None
|
||||
self.domain = None
|
||||
|
||||
# TODO Replace with warning list ???
|
||||
self.placeholder_screenshots = {'27e14ace10b0f96acd2bd919aaa98a964597532c35b6409dff6cc8eec8214748'}
|
||||
|
||||
# Send module state to logs
|
||||
self.redis_logger.info('Crawler initialized')
|
||||
|
||||
|
@ -248,8 +252,13 @@ class Crawler(AbstractModule):
|
|||
if 'png' in entries and entries['png']:
|
||||
screenshot = Screenshots.create_screenshot(entries['png'], b64=False)
|
||||
if screenshot:
|
||||
# Remove Errors pages # TODO Replace with warning list ???
|
||||
if screenshot.id not in ['27e14ace10b0f96acd2bd919aaa98a964597532c35b6409dff6cc8eec8214748']:
|
||||
if not screenshot.is_tags_safe():
|
||||
unsafe_tag = 'dark-web:topic="pornography-child-exploitation"'
|
||||
self.domain.add_tag(unsafe_tag)
|
||||
item = Item(item_id)
|
||||
item.add_tag(unsafe_tag)
|
||||
# Remove Placeholder pages # TODO Replace with warning list ???
|
||||
if screenshot.id not in self.placeholder_screenshots:
|
||||
# Create Correlations
|
||||
screenshot.add_correlation('item', '', item_id)
|
||||
screenshot.add_correlation('domain', '', self.domain.id)
|
||||
|
|
Loading…
Reference in a new issue