chg: [ail queues] merge

This commit is contained in:
terrtia 2023-09-08 10:52:55 +02:00
commit c19b1f34e3
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
131 changed files with 4383 additions and 3039 deletions

View file

@ -27,7 +27,7 @@ fi
export PATH=$AIL_VENV/bin:$PATH export PATH=$AIL_VENV/bin:$PATH
export PATH=$AIL_HOME:$PATH export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH export PATH=$AIL_KVROCKS:$PATH
export PATH=$AIL_BIN:$PATH export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH export PATH=$AIL_FLASK:$PATH
@ -685,9 +685,6 @@ while [ "$1" != "" ]; do
-lrv | --launchRedisVerify ) launch_redis; -lrv | --launchRedisVerify ) launch_redis;
wait_until_redis_is_ready; wait_until_redis_is_ready;
;; ;;
-lav | --launchARDBVerify ) launch_ardb;
wait_until_ardb_is_ready;
;;
-lkv | --launchKVORCKSVerify ) launch_kvrocks; -lkv | --launchKVORCKSVerify ) launch_kvrocks;
wait_until_kvrocks_is_ready; wait_until_kvrocks_is_ready;
;; ;;

View file

@ -17,6 +17,7 @@ from lib import ail_logger
from lib import crawlers from lib import crawlers
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
from lib.objects import CookiesNames from lib.objects import CookiesNames
from lib.objects import Etags
from lib.objects.Domains import Domain from lib.objects.Domains import Domain
from lib.objects.Items import Item from lib.objects.Items import Item
from lib.objects import Screenshots from lib.objects import Screenshots
@ -59,6 +60,7 @@ class Crawler(AbstractModule):
self.root_item = None self.root_item = None
self.date = None self.date = None
self.items_dir = None self.items_dir = None
self.original_domain = None
self.domain = None self.domain = None
# TODO Replace with warning list ??? # TODO Replace with warning list ???
@ -121,11 +123,19 @@ class Crawler(AbstractModule):
if capture: if capture:
try: try:
status = self.lacus.get_capture_status(capture.uuid) status = self.lacus.get_capture_status(capture.uuid)
if status != crawlers.CaptureStatus.DONE: # TODO ADD GLOBAL TIMEOUT-> Save start time ### print start time if status == crawlers.CaptureStatus.DONE:
return capture
elif status == crawlers.CaptureStatus.UNKNOWN:
capture_start = capture.get_start_time(r_str=False)
if int(time.time()) - capture_start > 600: # TODO ADD in new crawler config
task = capture.get_task()
task.reset()
capture.delete()
else:
capture.update(status)
else:
capture.update(status) capture.update(status)
print(capture.uuid, crawlers.CaptureStatus(status).name, int(time.time())) print(capture.uuid, crawlers.CaptureStatus(status).name, int(time.time()))
else:
return capture
except ConnectionError: except ConnectionError:
print(capture.uuid) print(capture.uuid)
@ -181,6 +191,7 @@ class Crawler(AbstractModule):
print(domain) print(domain)
self.domain = Domain(domain) self.domain = Domain(domain)
self.original_domain = Domain(domain)
epoch = int(time.time()) epoch = int(time.time())
parent_id = task.get_parent() parent_id = task.get_parent()
@ -203,12 +214,20 @@ class Crawler(AbstractModule):
# Origin + History + tags # Origin + History + tags
if self.root_item: if self.root_item:
self.domain.set_last_origin(parent_id) self.domain.set_last_origin(parent_id)
self.domain.add_history(epoch, root_item=self.root_item)
# Tags # Tags
for tag in task.get_tags(): for tag in task.get_tags():
self.domain.add_tag(tag) self.domain.add_tag(tag)
elif self.domain.was_up(): self.domain.add_history(epoch, root_item=self.root_item)
self.domain.add_history(epoch, root_item=epoch)
if self.domain != self.original_domain:
self.original_domain.update_daterange(self.date.replace('/', ''))
if self.root_item:
self.original_domain.set_last_origin(parent_id)
# Tags
for tag in task.get_tags():
self.domain.add_tag(tag)
self.original_domain.add_history(epoch, root_item=self.root_item)
crawlers.update_last_crawled_domain(self.original_domain.get_domain_type(), self.original_domain.id, epoch)
crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch) crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch)
print('capture:', capture.uuid, 'completed') print('capture:', capture.uuid, 'completed')
@ -263,7 +282,7 @@ class Crawler(AbstractModule):
title_content = crawlers.extract_title_from_html(entries['html']) title_content = crawlers.extract_title_from_html(entries['html'])
if title_content: if title_content:
title = Titles.create_title(title_content) title = Titles.create_title(title_content)
title.add(item.get_date(), item_id) title.add(item.get_date(), item)
# SCREENSHOT # SCREENSHOT
if self.screenshot: if self.screenshot:
@ -287,7 +306,12 @@ class Crawler(AbstractModule):
for cookie_name in crawlers.extract_cookies_names_from_har(entries['har']): for cookie_name in crawlers.extract_cookies_names_from_har(entries['har']):
print(cookie_name) print(cookie_name)
cookie = CookiesNames.create(cookie_name) cookie = CookiesNames.create(cookie_name)
cookie.add(self.date.replace('/', ''), self.domain.id) cookie.add(self.date.replace('/', ''), self.domain)
for etag_content in crawlers.extract_etag_from_har(entries['har']):
print(etag_content)
etag = Etags.create(etag_content)
etag.add(self.date.replace('/', ''), self.domain)
crawlers.extract_hhhash(entries['har'], self.domain.id, self.date.replace('/', ''))
# Next Children # Next Children
entries_children = entries.get('children') entries_children = entries.get('children')

View file

@ -8,9 +8,12 @@ Import Content
""" """
import os import os
import logging
import logging.config
import sys import sys
from abc import ABC from abc import ABC
from ssl import create_default_context
import smtplib import smtplib
from email.mime.multipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
@ -22,17 +25,22 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from lib import ail_logger
from exporter.abstract_exporter import AbstractExporter from exporter.abstract_exporter import AbstractExporter
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
# from lib.objects.abstract_object import AbstractObject # from lib.objects.abstract_object import AbstractObject
# from lib.Tracker import Tracker # from lib.Tracker import Tracker
logging.config.dictConfig(ail_logger.get_config(name='modules'))
class MailExporter(AbstractExporter, ABC): class MailExporter(AbstractExporter, ABC):
def __init__(self, host=None, port=None, password=None, user='', sender=''): def __init__(self, host=None, port=None, password=None, user='', sender='', cert_required=None, ca_file=None):
super().__init__() super().__init__()
config_loader = ConfigLoader() config_loader = ConfigLoader()
self.logger = logging.getLogger(f'{self.__class__.__name__}')
if host: if host:
self.host = host self.host = host
self.port = port self.port = port
@ -45,6 +53,15 @@ class MailExporter(AbstractExporter, ABC):
self.pw = config_loader.get_config_str("Notifications", "sender_pw") self.pw = config_loader.get_config_str("Notifications", "sender_pw")
if self.pw == 'None': if self.pw == 'None':
self.pw = None self.pw = None
if cert_required is not None:
self.cert_required = bool(cert_required)
self.ca_file = ca_file
else:
self.cert_required = config_loader.get_config_boolean("Notifications", "cert_required")
if self.cert_required:
self.ca_file = config_loader.get_config_str("Notifications", "ca_file")
else:
self.ca_file = None
if user: if user:
self.user = user self.user = user
else: else:
@ -67,8 +84,12 @@ class MailExporter(AbstractExporter, ABC):
smtp_server = smtplib.SMTP(self.host, self.port) smtp_server = smtplib.SMTP(self.host, self.port)
smtp_server.starttls() smtp_server.starttls()
except smtplib.SMTPNotSupportedError: except smtplib.SMTPNotSupportedError:
print("The server does not support the STARTTLS extension.") self.logger.info(f"The server {self.host}:{self.port} does not support the STARTTLS extension.")
smtp_server = smtplib.SMTP_SSL(self.host, self.port) if self.cert_required:
context = create_default_context(cafile=self.ca_file)
else:
context = None
smtp_server = smtplib.SMTP_SSL(self.host, self.port, context=context)
smtp_server.ehlo() smtp_server.ehlo()
if self.user is not None: if self.user is not None:
@ -80,7 +101,7 @@ class MailExporter(AbstractExporter, ABC):
return smtp_server return smtp_server
# except Exception as err: # except Exception as err:
# traceback.print_tb(err.__traceback__) # traceback.print_tb(err.__traceback__)
# logger.warning(err) # self.logger.warning(err)
def _export(self, recipient, subject, body): def _export(self, recipient, subject, body):
mime_msg = MIMEMultipart() mime_msg = MIMEMultipart()
@ -95,8 +116,8 @@ class MailExporter(AbstractExporter, ABC):
smtp_client.quit() smtp_client.quit()
# except Exception as err: # except Exception as err:
# traceback.print_tb(err.__traceback__) # traceback.print_tb(err.__traceback__)
# logger.warning(err) # self.logger.warning(err)
print(f'Send notification: {subject} to {recipient}') self.logger.info(f'Send notification: {subject} to {recipient}')
class MailExporterTracker(MailExporter): class MailExporterTracker(MailExporter):

View file

@ -87,12 +87,15 @@ class FeederImporter(AbstractImporter):
feeder_name = feeder.get_name() feeder_name = feeder.get_name()
print(f'importing: {feeder_name} feeder') print(f'importing: {feeder_name} feeder')
item_id = feeder.get_item_id() item_id = feeder.get_item_id() # TODO replace me with object global id
# process meta # process meta
if feeder.get_json_meta(): if feeder.get_json_meta():
feeder.process_meta() feeder.process_meta()
gzip64_content = feeder.get_gzip64_content()
if feeder_name == 'telegram':
return item_id # TODO support UI dashboard
else:
gzip64_content = feeder.get_gzip64_content()
return f'{feeder_name} {item_id} {gzip64_content}' return f'{feeder_name} {item_id} {gzip64_content}'

View file

@ -47,7 +47,12 @@ class PystemonImporter(AbstractImporter):
if not content: if not content:
return None return None
return self.create_message(item_id, content, source='pystemon') if full_item_path[-3:] == '.gz':
gzipped = True
else:
gzipped = False
return self.create_message(item_id, content, gzipped=gzipped, source='pystemon')
except IOError as e: except IOError as e:
print(f'Error: {full_item_path}, IOError') print(f'Error: {full_item_path}, IOError')

View file

@ -89,7 +89,7 @@ class AbstractImporter(ABC): # TODO ail queues
if not gzipped: if not gzipped:
content = self.b64_gzip(content) content = self.b64_gzip(content)
elif not b64: elif not b64:
content = self.b64(gzipped) content = self.b64(content)
if not content: if not content:
return None return None
if isinstance(content, bytes): if isinstance(content, bytes):

View file

@ -17,7 +17,7 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
from importer.feeders.Default import DefaultFeeder from importer.feeders.Default import DefaultFeeder
from lib.objects.Usernames import Username from lib.objects.Usernames import Username
from lib import item_basic from lib.objects.Items import Item
class JabberFeeder(DefaultFeeder): class JabberFeeder(DefaultFeeder):
@ -36,7 +36,7 @@ class JabberFeeder(DefaultFeeder):
self.item_id = f'{item_id}.gz' self.item_id = f'{item_id}.gz'
return self.item_id return self.item_id
def process_meta(self): def process_meta(self): # TODO replace me by message
""" """
Process JSON meta field. Process JSON meta field.
""" """
@ -44,10 +44,12 @@ class JabberFeeder(DefaultFeeder):
# item_basic.add_map_obj_id_item_id(jabber_id, item_id, 'jabber_id') ############################################## # item_basic.add_map_obj_id_item_id(jabber_id, item_id, 'jabber_id') ##############################################
to = str(self.json_data['meta']['jabber:to']) to = str(self.json_data['meta']['jabber:to'])
fr = str(self.json_data['meta']['jabber:from']) fr = str(self.json_data['meta']['jabber:from'])
date = item_basic.get_item_date(item_id)
item = Item(self.item_id)
date = item.get_date()
user_to = Username(to, 'jabber') user_to = Username(to, 'jabber')
user_fr = Username(fr, 'jabber') user_fr = Username(fr, 'jabber')
user_to.add(date, self.item_id) user_to.add(date, item)
user_fr.add(date, self.item_id) user_fr.add(date, item)
return None return None

View file

@ -16,8 +16,28 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from importer.feeders.Default import DefaultFeeder from importer.feeders.Default import DefaultFeeder
from lib.ConfigLoader import ConfigLoader
from lib.objects.Chats import Chat
from lib.objects import Messages
from lib.objects import UsersAccount
from lib.objects.Usernames import Username from lib.objects.Usernames import Username
from lib import item_basic
import base64
import io
import gzip
def gunzip_bytes_obj(bytes_obj):
gunzipped_bytes_obj = None
try:
in_ = io.BytesIO()
in_.write(bytes_obj)
in_.seek(0)
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
gunzipped_bytes_obj = fo.read()
except Exception as e:
print(f'Global; Invalid Gzip file: {e}')
return gunzipped_bytes_obj
class TelegramFeeder(DefaultFeeder): class TelegramFeeder(DefaultFeeder):
@ -26,31 +46,90 @@ class TelegramFeeder(DefaultFeeder):
self.name = 'telegram' self.name = 'telegram'
# define item id # define item id
def get_item_id(self): def get_item_id(self): # TODO rename self.item_id
# TODO use telegram message date # Get message date
date = datetime.date.today().strftime("%Y/%m/%d") timestamp = self.json_data['meta']['date']['timestamp'] # TODO CREATE DEFAULT TIMESTAMP
channel_id = str(self.json_data['meta']['channel_id']) # if self.json_data['meta'].get('date'):
message_id = str(self.json_data['meta']['message_id']) # date = datetime.datetime.fromtimestamp( self.json_data['meta']['date']['timestamp'])
item_id = f'{channel_id}_{message_id}' # date = date.strftime('%Y/%m/%d')
item_id = os.path.join('telegram', date, item_id) # else:
self.item_id = f'{item_id}.gz' # date = datetime.date.today().strftime("%Y/%m/%d")
chat_id = str(self.json_data['meta']['chat']['id'])
message_id = str(self.json_data['meta']['id'])
self.item_id = Messages.create_obj_id('telegram', chat_id, message_id, timestamp) # TODO rename self.item_id
return self.item_id return self.item_id
def process_meta(self): def process_meta(self):
""" """
Process JSON meta field. Process JSON meta field.
""" """
# channel_id = str(self.json_data['meta']['channel_id']) # message chat
# message_id = str(self.json_data['meta']['message_id']) meta = self.json_data['meta']
# telegram_id = f'{channel_id}_{message_id}' mess_id = self.json_data['meta']['id']
# item_basic.add_map_obj_id_item_id(telegram_id, item_id, 'telegram_id') ######################################### if meta.get('reply_to'):
user = None reply_to_id = meta['reply_to']['id']
if self.json_data['meta'].get('user'): else:
user = str(self.json_data['meta']['user']) reply_to_id = None
elif self.json_data['meta'].get('channel'):
user = str(self.json_data['meta']['channel'].get('username')) timestamp = meta['date']['timestamp']
if user: date = datetime.datetime.fromtimestamp(timestamp)
date = item_basic.get_item_date(self.item_id) date = date.strftime('%Y%m%d')
username = Username(user, 'telegram')
username.add(date, self.item_id) if self.json_data.get('translation'):
translation = self.json_data['translation']
else:
translation = None
decoded = base64.standard_b64decode(self.json_data['data'])
content = gunzip_bytes_obj(decoded)
message = Messages.create(self.item_id, content, translation=translation)
if meta.get('chat'):
chat = Chat(meta['chat']['id'], 'telegram')
if meta['chat'].get('username'):
chat_username = Username(meta['chat']['username'], 'telegram')
chat.update_username_timeline(chat_username.get_global_id(), timestamp)
# Chat---Message
chat.add(date)
chat.add_message(message.get_global_id(), timestamp, mess_id, reply_id=reply_to_id)
else:
chat = None
# message sender
if meta.get('sender'): # TODO handle message channel forward - check if is user
user_id = meta['sender']['id']
user_account = UsersAccount.UserAccount(user_id, 'telegram')
# UserAccount---Message
user_account.add(date, obj=message)
# UserAccount---Chat
user_account.add_correlation(chat.type, chat.get_subtype(r_str=True), chat.id)
if meta['sender'].get('firstname'):
user_account.set_first_name(meta['sender']['firstname'])
if meta['sender'].get('lastname'):
user_account.set_last_name(meta['sender']['lastname'])
if meta['sender'].get('phone'):
user_account.set_phone(meta['sender']['phone'])
if meta['sender'].get('username'):
username = Username(meta['sender']['username'], 'telegram')
# TODO timeline or/and correlation ????
user_account.add_correlation(username.type, username.get_subtype(r_str=True), username.id)
user_account.update_username_timeline(username.get_global_id(), timestamp)
# Username---Message
username.add(date) # TODO # correlation message ???
# if chat: # TODO Chat---Username correlation ???
# # Chat---Username
# chat.add_correlation(username.type, username.get_subtype(r_str=True), username.id)
# if meta.get('fwd_from'):
# if meta['fwd_from'].get('post_author') # user first name
# TODO reply threads ????
# message edit ????
return None return None

View file

@ -17,7 +17,7 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
from importer.feeders.Default import DefaultFeeder from importer.feeders.Default import DefaultFeeder
from lib.objects.Usernames import Username from lib.objects.Usernames import Username
from lib import item_basic from lib.objects.Items import Item
class TwitterFeeder(DefaultFeeder): class TwitterFeeder(DefaultFeeder):
@ -40,9 +40,9 @@ class TwitterFeeder(DefaultFeeder):
''' '''
# tweet_id = str(self.json_data['meta']['twitter:tweet_id']) # tweet_id = str(self.json_data['meta']['twitter:tweet_id'])
# item_basic.add_map_obj_id_item_id(tweet_id, item_id, 'twitter_id') ############################################ # item_basic.add_map_obj_id_item_id(tweet_id, item_id, 'twitter_id') ############################################
item = Item(self.item_id)
date = item_basic.get_item_date(self.item_id) date = item.get_date()
user = str(self.json_data['meta']['twitter:id']) user = str(self.json_data['meta']['twitter:id'])
username = Username(user, 'twitter') username = Username(user, 'twitter')
username.add(date, item_id) username.add(date, item)
return None return None

View file

@ -235,18 +235,27 @@ class Investigation(object):
objs.append(dict_obj) objs.append(dict_obj)
return objs return objs
def get_objects_comment(self, obj_global_id):
return r_tracking.hget(f'investigations:objs:comment:{self.uuid}', obj_global_id)
def set_objects_comment(self, obj_global_id, comment):
if comment:
r_tracking.hset(f'investigations:objs:comment:{self.uuid}', obj_global_id, comment)
# # TODO: def register_object(self, Object): in OBJECT CLASS # # TODO: def register_object(self, Object): in OBJECT CLASS
def register_object(self, obj_id, obj_type, subtype): def register_object(self, obj_id, obj_type, subtype, comment=''):
r_tracking.sadd(f'investigations:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}') r_tracking.sadd(f'investigations:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
r_tracking.sadd(f'obj:investigations:{obj_type}:{subtype}:{obj_id}', self.uuid) r_tracking.sadd(f'obj:investigations:{obj_type}:{subtype}:{obj_id}', self.uuid)
if comment:
self.set_objects_comment(f'{obj_type}:{subtype}:{obj_id}', comment)
timestamp = int(time.time()) timestamp = int(time.time())
self.set_last_change(timestamp) self.set_last_change(timestamp)
def unregister_object(self, obj_id, obj_type, subtype): def unregister_object(self, obj_id, obj_type, subtype):
r_tracking.srem(f'investigations:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}') r_tracking.srem(f'investigations:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
r_tracking.srem(f'obj:investigations:{obj_type}:{subtype}:{obj_id}', self.uuid) r_tracking.srem(f'obj:investigations:{obj_type}:{subtype}:{obj_id}', self.uuid)
r_tracking.hdel(f'investigations:objs:comment:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
timestamp = int(time.time()) timestamp = int(time.time())
self.set_last_change(timestamp) self.set_last_change(timestamp)
@ -351,7 +360,7 @@ def get_investigations_selector():
for investigation_uuid in get_all_investigations(): for investigation_uuid in get_all_investigations():
investigation = Investigation(investigation_uuid) investigation = Investigation(investigation_uuid)
name = investigation.get_info() name = investigation.get_info()
l_investigations.append({"id":investigation_uuid, "name": name}) l_investigations.append({"id": investigation_uuid, "name": name})
return l_investigations return l_investigations
#{id:'8dc4b81aeff94a9799bd70ba556fa345',name:"Paris"} #{id:'8dc4b81aeff94a9799bd70ba556fa345',name:"Paris"}
@ -453,7 +462,11 @@ def api_register_object(json_dict):
if subtype == 'None': if subtype == 'None':
subtype = '' subtype = ''
obj_id = json_dict.get('id', '').replace(' ', '') obj_id = json_dict.get('id', '').replace(' ', '')
res = investigation.register_object(obj_id, obj_type, subtype)
comment = json_dict.get('comment', '')
# if comment:
# comment = escape(comment)
res = investigation.register_object(obj_id, obj_type, subtype, comment=comment)
return res, 200 return res, 200
def api_unregister_object(json_dict): def api_unregister_object(json_dict):

View file

@ -338,7 +338,7 @@ def get_galaxy_meta(galaxy_name, nb_active_tags=False):
else: else:
meta['icon'] = f'fas fa-{icon}' meta['icon'] = f'fas fa-{icon}'
if nb_active_tags: if nb_active_tags:
meta['nb_active_tags'] = get_galaxy_nb_tags_enabled(galaxy) meta['nb_active_tags'] = get_galaxy_nb_tags_enabled(galaxy.type)
meta['nb_tags'] = len(get_galaxy_tags(galaxy.type)) meta['nb_tags'] = len(get_galaxy_tags(galaxy.type))
return meta return meta

View file

@ -207,6 +207,13 @@ class Tracker:
if filters: if filters:
self._set_field('filters', json.dumps(filters)) self._set_field('filters', json.dumps(filters))
def del_filters(self, tracker_type, to_track):
filters = self.get_filters()
for obj_type in filters:
r_tracker.srem(f'trackers:objs:{tracker_type}:{obj_type}', to_track)
r_tracker.srem(f'trackers:uuid:{tracker_type}:{to_track}', f'{self.uuid}:{obj_type}')
r_tracker.hdel(f'tracker:{self.uuid}', 'filters')
def get_tracked(self): def get_tracked(self):
return self._get_field('tracked') return self._get_field('tracked')
@ -513,6 +520,7 @@ class Tracker:
self._set_mails(mails) self._set_mails(mails)
# Filters # Filters
self.del_filters(old_type, old_to_track)
if not filters: if not filters:
filters = {} filters = {}
for obj_type in get_objects_tracked(): for obj_type in get_objects_tracked():
@ -522,9 +530,6 @@ class Tracker:
for obj_type in filters: for obj_type in filters:
r_tracker.sadd(f'trackers:objs:{tracker_type}:{obj_type}', to_track) r_tracker.sadd(f'trackers:objs:{tracker_type}:{obj_type}', to_track)
r_tracker.sadd(f'trackers:uuid:{tracker_type}:{to_track}', f'{self.uuid}:{obj_type}') r_tracker.sadd(f'trackers:uuid:{tracker_type}:{to_track}', f'{self.uuid}:{obj_type}')
if tracker_type != old_type:
r_tracker.srem(f'trackers:objs:{old_type}:{obj_type}', old_to_track)
r_tracker.srem(f'trackers:uuid:{old_type}:{old_to_track}', f'{self.uuid}:{obj_type}')
# Refresh Trackers # Refresh Trackers
trigger_trackers_refresh(tracker_type) trigger_trackers_refresh(tracker_type)
@ -650,14 +655,14 @@ def get_user_trackers_meta(user_id, tracker_type=None):
metas = [] metas = []
for tracker_uuid in get_user_trackers(user_id, tracker_type=tracker_type): for tracker_uuid in get_user_trackers(user_id, tracker_type=tracker_type):
tracker = Tracker(tracker_uuid) tracker = Tracker(tracker_uuid)
metas.append(tracker.get_meta(options={'mails', 'sparkline', 'tags'})) metas.append(tracker.get_meta(options={'description', 'mails', 'sparkline', 'tags'}))
return metas return metas
def get_global_trackers_meta(tracker_type=None): def get_global_trackers_meta(tracker_type=None):
metas = [] metas = []
for tracker_uuid in get_global_trackers(tracker_type=tracker_type): for tracker_uuid in get_global_trackers(tracker_type=tracker_type):
tracker = Tracker(tracker_uuid) tracker = Tracker(tracker_uuid)
metas.append(tracker.get_meta(options={'mails', 'sparkline', 'tags'})) metas.append(tracker.get_meta(options={'description', 'mails', 'sparkline', 'tags'}))
return metas return metas
def get_users_trackers_meta(): def get_users_trackers_meta():

View file

@ -247,7 +247,10 @@ class User(UserMixin):
self.id = "__anonymous__" self.id = "__anonymous__"
def exists(self): def exists(self):
return self.id != "__anonymous__" if self.id == "__anonymous__":
return False
else:
return r_serv_db.exists(f'ail:user:metadata:{self.id}')
# return True or False # return True or False
# def is_authenticated(): # def is_authenticated():
@ -287,3 +290,6 @@ class User(UserMixin):
return True return True
else: else:
return False return False
def get_role(self):
return r_serv_db.hget(f'ail:user:metadata:{self.id}', 'role')

View file

@ -15,8 +15,8 @@ config_loader = ConfigLoader()
r_serv_db = config_loader.get_db_conn("Kvrocks_DB") r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None config_loader = None
AIL_OBJECTS = sorted({'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'favicon', 'item', 'pgp', AIL_OBJECTS = sorted({'chat', 'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'etag', 'favicon', 'hhhash', 'item',
'screenshot', 'title', 'username'}) 'pgp', 'screenshot', 'title', 'user-account', 'username'})
def get_ail_uuid(): def get_ail_uuid():
ail_uuid = r_serv_db.get('ail:uuid') ail_uuid = r_serv_db.get('ail:uuid')
@ -38,9 +38,11 @@ def get_all_objects():
return AIL_OBJECTS return AIL_OBJECTS
def get_objects_with_subtypes(): def get_objects_with_subtypes():
return ['cryptocurrency', 'pgp', 'username'] return ['chat', 'cryptocurrency', 'pgp', 'username']
def get_object_all_subtypes(obj_type): def get_object_all_subtypes(obj_type):
if obj_type == 'chat':
return ['discord', 'jabber', 'telegram']
if obj_type == 'cryptocurrency': if obj_type == 'cryptocurrency':
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash'] return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash']
if obj_type == 'pgp': if obj_type == 'pgp':
@ -66,6 +68,14 @@ def get_all_objects_with_subtypes_tuple():
str_objs.append((obj_type, '')) str_objs.append((obj_type, ''))
return str_objs return str_objs
def unpack_obj_global_id(global_id, r_type='tuple'):
if r_type == 'dict':
obj = global_id.split(':', 2)
return {'type': obj[0], 'subtype': obj[1], 'id': obj['2']}
else: # tuple(type, subtype, id)
return global_id.split(':', 2)
##-- AIL OBJECTS --## ##-- AIL OBJECTS --##
#### Redis #### #### Redis ####

View file

@ -15,38 +15,15 @@ config_loader = ConfigLoader()
r_db = config_loader.get_db_conn("Kvrocks_DB") r_db = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None config_loader = None
BACKGROUND_UPDATES = { # # # # # # # #
'v1.5': { # #
'nb_updates': 5, # UPDATE #
'message': 'Tags and Screenshots' # #
}, # # # # # # # #
'v2.4': {
'nb_updates': 1,
'message': ' Domains Tags and Correlations'
},
'v2.6': {
'nb_updates': 1,
'message': 'Domains Tags and Correlations'
},
'v2.7': {
'nb_updates': 1,
'message': 'Domains Tags'
},
'v3.4': {
'nb_updates': 1,
'message': 'Domains Languages'
},
'v3.7': {
'nb_updates': 1,
'message': 'Trackers first_seen/last_seen'
}
}
def get_ail_version(): def get_ail_version():
return r_db.get('ail:version') return r_db.get('ail:version')
def get_ail_float_version(): def get_ail_float_version():
version = get_ail_version() version = get_ail_version()
if version: if version:
@ -55,6 +32,179 @@ def get_ail_float_version():
version = 0 version = 0
return version return version
# # # - - # # #
# # # # # # # # # # # #
# #
# UPDATE BACKGROUND #
# #
# # # # # # # # # # # #
BACKGROUND_UPDATES = {
'v5.2': {
'message': 'Compress HAR',
'scripts': ['compress_har.py']
},
}
class AILBackgroundUpdate:
"""
AIL Background Update.
"""
def __init__(self, version):
self.version = version
def _get_field(self, field):
return r_db.hget('ail:update:background', field)
def _set_field(self, field, value):
r_db.hset('ail:update:background', field, value)
def get_version(self):
return self.version
def get_message(self):
return BACKGROUND_UPDATES.get(self.version, {}).get('message', '')
def get_error(self):
return self._get_field('error')
def set_error(self, error): # TODO ADD LOGS
self._set_field('error', error)
def get_nb_scripts(self):
return int(len(BACKGROUND_UPDATES.get(self.version, {}).get('scripts', [''])))
def get_scripts(self):
return BACKGROUND_UPDATES.get(self.version, {}).get('scripts', [])
def get_nb_scripts_done(self):
done = self._get_field('done')
try:
done = int(done)
except (TypeError, ValueError):
done = 0
return done
def inc_nb_scripts_done(self):
self._set_field('done', self.get_nb_scripts_done() + 1)
def get_script(self):
return self._get_field('script')
def get_script_path(self):
path = os.path.basename(self.get_script())
if path:
return os.path.join(os.environ['AIL_HOME'], 'update', self.version, path)
def get_nb_to_update(self): # TODO use cache ?????
nb_to_update = self._get_field('nb_to_update')
if not nb_to_update:
nb_to_update = 1
return int(nb_to_update)
def set_nb_to_update(self, nb):
self._set_field('nb_to_update', int(nb))
def get_nb_updated(self): # TODO use cache ?????
nb_updated = self._get_field('nb_updated')
if not nb_updated:
nb_updated = 0
return int(nb_updated)
def inc_nb_updated(self): # TODO use cache ?????
r_db.hincrby('ail:update:background', 'nb_updated', 1)
def get_progress(self): # TODO use cache ?????
return self._get_field('progress')
def set_progress(self, progress):
self._set_field('progress', progress)
def update_progress(self):
nb_updated = self.get_nb_updated()
nb_to_update = self.get_nb_to_update()
if nb_updated == nb_to_update:
progress = 100
elif nb_updated > nb_to_update:
progress = 99
else:
progress = int((nb_updated * 100) / nb_to_update)
self.set_progress(progress)
print(f'{nb_updated}/{nb_to_update} updated {progress}%')
return progress
def is_running(self):
return r_db.hget('ail:update:background', 'version') == self.version
def get_meta(self, options=set()):
meta = {'version': self.get_version(),
'error': self.get_error(),
'script': self.get_script(),
'script_progress': self.get_progress(),
'nb_update': self.get_nb_scripts(),
'nb_completed': self.get_nb_scripts_done()}
meta['progress'] = int(meta['nb_completed'] * 100 / meta['nb_update'])
if 'message' in options:
meta['message'] = self.get_message()
return meta
def start(self):
self._set_field('version', self.version)
r_db.hdel('ail:update:background', 'error')
def start_script(self, script):
self.clear()
self._set_field('script', script)
self.set_progress(0)
def end_script(self):
self.set_progress(100)
self.inc_nb_scripts_done()
def clear(self):
r_db.hdel('ail:update:background', 'error')
r_db.hdel('ail:update:background', 'progress')
r_db.hdel('ail:update:background', 'nb_updated')
r_db.hdel('ail:update:background', 'nb_to_update')
def end(self):
r_db.delete('ail:update:background')
r_db.srem('ail:updates:background', self.version)
# To Add in update script
def add_background_update(version):
r_db.sadd('ail:updates:background', version)
def is_update_background_running():
return r_db.exists('ail:update:background')
def get_update_background_version():
return r_db.hget('ail:update:background', 'version')
def get_update_background_meta(options=set()):
version = get_update_background_version()
if version:
return AILBackgroundUpdate(version).get_meta(options=options)
else:
return {}
def get_update_background_to_launch():
to_launch = []
updates = r_db.smembers('ail:updates:background')
for version in BACKGROUND_UPDATES:
if version in updates:
to_launch.append(version)
return to_launch
# # # - - # # #
##########################################################################################
##########################################################################################
##########################################################################################
def get_ail_all_updates(date_separator='-'): def get_ail_all_updates(date_separator='-'):
dict_update = r_db.hgetall('ail:update_date') dict_update = r_db.hgetall('ail:update_date')
@ -87,111 +237,6 @@ def check_version(version):
return True return True
#### UPDATE BACKGROUND ####
def exits_background_update_to_launch():
return r_db.scard('ail:update:to_update') != 0
def is_version_in_background_update(version):
return r_db.sismember('ail:update:to_update', version)
def get_all_background_updates_to_launch():
return r_db.smembers('ail:update:to_update')
def get_current_background_update():
return r_db.get('ail:update:update_in_progress')
def get_current_background_update_script():
return r_db.get('ail:update:current_background_script')
def get_current_background_update_script_path(version, script_name):
return os.path.join(os.environ['AIL_HOME'], 'update', version, script_name)
def get_current_background_nb_update_completed():
return r_db.scard('ail:update:update_in_progress:completed')
def get_current_background_update_progress():
progress = r_db.get('ail:update:current_background_script_stat')
if not progress:
progress = 0
return int(progress)
def get_background_update_error():
return r_db.get('ail:update:error')
def add_background_updates_to_launch(version):
return r_db.sadd('ail:update:to_update', version)
def start_background_update(version):
r_db.delete('ail:update:error')
r_db.set('ail:update:update_in_progress', version)
def set_current_background_update_script(script_name):
r_db.set('ail:update:current_background_script', script_name)
r_db.set('ail:update:current_background_script_stat', 0)
def set_current_background_update_progress(progress):
r_db.set('ail:update:current_background_script_stat', progress)
def set_background_update_error(error):
r_db.set('ail:update:error', error)
def end_background_update_script():
r_db.sadd('ail:update:update_in_progress:completed')
def end_background_update(version):
r_db.delete('ail:update:update_in_progress')
r_db.delete('ail:update:current_background_script')
r_db.delete('ail:update:current_background_script_stat')
r_db.delete('ail:update:update_in_progress:completed')
r_db.srem('ail:update:to_update', version)
def clear_background_update():
r_db.delete('ail:update:error')
r_db.delete('ail:update:update_in_progress')
r_db.delete('ail:update:current_background_script')
r_db.delete('ail:update:current_background_script_stat')
r_db.delete('ail:update:update_in_progress:completed')
def get_update_background_message(version):
return BACKGROUND_UPDATES[version]['message']
# TODO: Detect error in subprocess
def get_update_background_metadata():
dict_update = {}
version = get_current_background_update()
if version:
dict_update['version'] = version
dict_update['script'] = get_current_background_update_script()
dict_update['script_progress'] = get_current_background_update_progress()
dict_update['nb_update'] = BACKGROUND_UPDATES[dict_update['version']]['nb_updates']
dict_update['nb_completed'] = get_current_background_nb_update_completed()
dict_update['progress'] = int(dict_update['nb_completed'] * 100 / dict_update['nb_update'])
dict_update['error'] = get_background_update_error()
return dict_update
##-- UPDATE BACKGROUND --##
if __name__ == '__main__': if __name__ == '__main__':
res = check_version('v3.1..1') res = check_version('v3.1..1')
print(res) print(res)

View file

@ -41,17 +41,22 @@ config_loader = None
################################## ##################################
CORRELATION_TYPES_BY_OBJ = { CORRELATION_TYPES_BY_OBJ = {
"chat": ["user-account"], # message or direct correlation like cve, bitcoin, ... ???
"cookie-name": ["domain"], "cookie-name": ["domain"],
"cryptocurrency": ["domain", "item"], "cryptocurrency": ["domain", "item", "message"],
"cve": ["domain", "item"], "cve": ["domain", "item", "message"],
"decoded": ["domain", "item"], "decoded": ["domain", "item", "message"],
"domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "favicon", "item", "pgp", "title", "screenshot", "username"], "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
"etag": ["domain"],
"favicon": ["domain", "item"], # TODO Decoded "favicon": ["domain", "item"], # TODO Decoded
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], "hhhash": ["domain"],
"pgp": ["domain", "item"], "item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
"message": ["cve", "cryptocurrency", "decoded", "pgp", "user-account"], # chat ??
"pgp": ["domain", "item", "message"],
"screenshot": ["domain", "item"], "screenshot": ["domain", "item"],
"title": ["domain", "item"], "title": ["domain", "item"],
"username": ["domain", "item"], "user-account": ["chat", "message"],
"username": ["domain", "item", "message"], # TODO chat-user/account
} }
def get_obj_correl_types(obj_type): def get_obj_correl_types(obj_type):
@ -63,6 +68,8 @@ def sanityze_obj_correl_types(obj_type, correl_types):
correl_types = set(correl_types).intersection(obj_correl_types) correl_types = set(correl_types).intersection(obj_correl_types)
if not correl_types: if not correl_types:
correl_types = obj_correl_types correl_types = obj_correl_types
if not correl_types:
return []
return correl_types return correl_types
def get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type): def get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type):
@ -169,18 +176,18 @@ def get_obj_str_id(obj_type, subtype, obj_id):
subtype = '' subtype = ''
return f'{obj_type}:{subtype}:{obj_id}' return f'{obj_type}:{subtype}:{obj_id}'
def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, flask_context=False): def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, objs_hidden=set(), flask_context=False):
links = set() links = set()
nodes = set() nodes = set()
meta = {'complete': True, 'objs': set()} meta = {'complete': True, 'objs': set()}
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id) obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
_get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=filter_types, previous_str_obj='') _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj='')
return obj_str_id, nodes, links, meta return obj_str_id, nodes, links, meta
def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], previous_str_obj=''): def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], objs_hidden=set(), previous_str_obj=''):
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id) obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
meta['objs'].add(obj_str_id) meta['objs'].add(obj_str_id)
nodes.add(obj_str_id) nodes.add(obj_str_id)
@ -191,6 +198,10 @@ def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id,
for str_obj in obj_correlations[correl_type]: for str_obj in obj_correlations[correl_type]:
subtype2, obj2_id = str_obj.split(':', 1) subtype2, obj2_id = str_obj.split(':', 1)
obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id) obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id)
# filter objects to hide
if obj2_str_id in objs_hidden:
continue
meta['objs'].add(obj2_str_id) meta['objs'].add(obj2_str_id)
if obj2_str_id == previous_str_obj: if obj2_str_id == previous_str_obj:
@ -204,5 +215,5 @@ def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id,
if level > 0: if level > 0:
next_level = level - 1 next_level = level - 1
_get_correlations_graph_node(links, nodes, meta, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, previous_str_obj=obj_str_id) _get_correlations_graph_node(links, nodes, meta, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj=obj_str_id)

View file

@ -39,6 +39,7 @@ from packages import git_status
from packages import Date from packages import Date
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
from lib.objects.Domains import Domain from lib.objects.Domains import Domain
from lib.objects import HHHashs
from lib.objects.Items import Item from lib.objects.Items import Item
config_loader = ConfigLoader() config_loader = ConfigLoader()
@ -134,7 +135,7 @@ def unpack_url(url):
# # # # # # # # TODO CREATE NEW OBJECT # # # # # # # # TODO CREATE NEW OBJECT
def get_favicon_from_html(html, domain, url): def get_favicon_from_html(html, domain, url):
favicon_urls = extract_favicon_from_html(html, url) favicon_urls, favicons = extract_favicon_from_html(html, url)
# add root favicon # add root favicon
if not favicon_urls: if not favicon_urls:
favicon_urls.add(f'{urlparse(url).scheme}://{domain}/favicon.ico') favicon_urls.add(f'{urlparse(url).scheme}://{domain}/favicon.ico')
@ -162,7 +163,6 @@ def extract_favicon_from_html(html, url):
# - <meta name="msapplication-TileColor" content="#aaaaaa"> <meta name="theme-color" content="#ffffff"> # - <meta name="msapplication-TileColor" content="#aaaaaa"> <meta name="theme-color" content="#ffffff">
# - <meta name="msapplication-config" content="/icons/browserconfig.xml"> # - <meta name="msapplication-config" content="/icons/browserconfig.xml">
# Root Favicon # Root Favicon
f = get_faup() f = get_faup()
f.decode(url) f.decode(url)
@ -244,13 +244,6 @@ def extract_description_from_html(html):
return description['content'] return description['content']
return '' return ''
def extract_description_from_html(html):
soup = BeautifulSoup(html, 'html.parser')
description = soup.find('meta', attrs={'name': 'description'})
if description:
return description['content']
return ''
def extract_keywords_from_html(html): def extract_keywords_from_html(html):
soup = BeautifulSoup(html, 'html.parser') soup = BeautifulSoup(html, 'html.parser')
keywords = soup.find('meta', attrs={'name': 'keywords'}) keywords = soup.find('meta', attrs={'name': 'keywords'})
@ -264,6 +257,7 @@ def extract_author_from_html(html):
if keywords: if keywords:
return keywords['content'] return keywords['content']
return '' return ''
# # # - - # # # # # # - - # # #
@ -275,7 +269,7 @@ def extract_author_from_html(html):
def create_har_id(date, item_id): def create_har_id(date, item_id):
item_id = item_id.split('/')[-1] item_id = item_id.split('/')[-1]
return os.path.join(date, f'{item_id}.json') return os.path.join(date, f'{item_id}.json.gz')
def save_har(har_id, har_content): def save_har(har_id, har_content):
# create dir # create dir
@ -284,8 +278,8 @@ def save_har(har_id, har_content):
os.makedirs(har_dir) os.makedirs(har_dir)
# save HAR # save HAR
filename = os.path.join(get_har_dir(), har_id) filename = os.path.join(get_har_dir(), har_id)
with open(filename, 'w') as f: with gzip.open(filename, 'wb') as f:
f.write(json.dumps(har_content)) f.write(json.dumps(har_content).encode())
def get_all_har_ids(): def get_all_har_ids():
har_ids = [] har_ids = []
@ -299,6 +293,7 @@ def get_all_har_ids():
except (TypeError, ValueError): except (TypeError, ValueError):
pass pass
if os.path.exists(today_root_dir):
for file in [f for f in os.listdir(today_root_dir) if os.path.isfile(os.path.join(today_root_dir, f))]: for file in [f for f in os.listdir(today_root_dir) if os.path.isfile(os.path.join(today_root_dir, f))]:
har_id = os.path.relpath(os.path.join(today_root_dir, file), HAR_DIR) har_id = os.path.relpath(os.path.join(today_root_dir, file), HAR_DIR)
har_ids.append(har_id) har_ids.append(har_id)
@ -312,14 +307,17 @@ def get_all_har_ids():
har_ids.append(har_id) har_ids.append(har_id)
return har_ids return har_ids
def extract_cookies_names_from_har_by_har_id(har_id): def get_har_content(har_id):
har_path = os.path.join(HAR_DIR, har_id) har_path = os.path.join(HAR_DIR, har_id)
with open(har_path) as f:
try: try:
har_content = json.loads(f.read()) with gzip.open(har_path) as f:
try:
return json.loads(f.read())
except json.decoder.JSONDecodeError: except json.decoder.JSONDecodeError:
har_content = {} return {}
return extract_cookies_names_from_har(har_content) except Exception as e:
print(e) # TODO LOGS
return {}
def extract_cookies_names_from_har(har): def extract_cookies_names_from_har(har):
cookies = set() cookies = set()
@ -334,17 +332,110 @@ def extract_cookies_names_from_har(har):
cookies.add(name) cookies.add(name)
return cookies return cookies
def _reprocess_all_hars(): def _reprocess_all_hars_cookie_name():
from lib.objects import CookiesNames from lib.objects import CookiesNames
for har_id in get_all_har_ids(): for har_id in get_all_har_ids():
domain = har_id.split('/')[-1] domain = har_id.split('/')[-1]
domain = domain[:-41] domain = domain[:-44]
date = har_id.split('/') date = har_id.split('/')
date = f'{date[-4]}{date[-3]}{date[-2]}' date = f'{date[-4]}{date[-3]}{date[-2]}'
for cookie_name in extract_cookies_names_from_har_by_har_id(har_id): for cookie_name in extract_cookies_names_from_har(get_har_content(har_id)):
print(domain, date, cookie_name) print(domain, date, cookie_name)
cookie = CookiesNames.create(cookie_name) cookie = CookiesNames.create(cookie_name)
cookie.add(date, domain) cookie.add(date, Domain(domain))
def extract_etag_from_har(har): # TODO check response url
etags = set()
for entrie in har.get('log', {}).get('entries', []):
for header in entrie.get('response', {}).get('headers', []):
if header.get('name') == 'etag':
# print(header)
etag = header.get('value')
if etag:
etags.add(etag)
return etags
def _reprocess_all_hars_etag():
from lib.objects import Etags
for har_id in get_all_har_ids():
domain = har_id.split('/')[-1]
domain = domain[:-44]
date = har_id.split('/')
date = f'{date[-4]}{date[-3]}{date[-2]}'
for etag_content in extract_etag_from_har(get_har_content(har_id)):
print(domain, date, etag_content)
etag = Etags.create(etag_content)
etag.add(date, Domain(domain))
def extract_hhhash_by_id(har_id, domain, date):
return extract_hhhash(get_har_content(har_id), domain, date)
def extract_hhhash(har, domain, date):
hhhashs = set()
urls = set()
for entrie in har.get('log', {}).get('entries', []):
url = entrie.get('request').get('url')
if url not in urls:
# filter redirect
if entrie.get('response').get('status') == 200: # != 301:
# print(url, entrie.get('response').get('status'))
f = get_faup()
f.decode(url)
domain_url = f.get().get('domain')
if domain_url == domain:
headers = entrie.get('response').get('headers')
hhhash_header = HHHashs.build_hhhash_headers(headers)
hhhash = HHHashs.hhhash_headers(hhhash_header)
if hhhash not in hhhashs:
print('', url, hhhash)
# -----
obj = HHHashs.create(hhhash_header, hhhash)
obj.add(date, Domain(domain))
hhhashs.add(hhhash)
urls.add(url)
print()
print()
print('HHHASH:')
for hhhash in hhhashs:
print(hhhash)
return hhhashs
def _reprocess_all_hars_hhhashs():
for har_id in get_all_har_ids():
print()
print(har_id)
domain = har_id.split('/')[-1]
domain = domain[:-44]
date = har_id.split('/')
date = f'{date[-4]}{date[-3]}{date[-2]}'
extract_hhhash_by_id(har_id, domain, date)
def _gzip_har(har_id):
har_path = os.path.join(HAR_DIR, har_id)
new_id = f'{har_path}.gz'
if not har_id.endswith('.gz'):
if not os.path.exists(new_id):
with open(har_path, 'rb') as f:
content = f.read()
if content:
with gzip.open(new_id, 'wb') as f:
r = f.write(content)
print(r)
if os.path.exists(new_id) and os.path.exists(har_path):
os.remove(har_path)
print('delete:', har_path)
def _gzip_all_hars():
for har_id in get_all_har_ids():
_gzip_har(har_id)
# # # - - # # # # # # - - # # #
@ -662,8 +753,7 @@ class Cookie:
meta[field] = value meta[field] = value
if r_json: if r_json:
data = json.dumps(meta, indent=4, sort_keys=True) data = json.dumps(meta, indent=4, sort_keys=True)
meta = {'data': data} meta = {'data': data, 'uuid': self.uuid}
meta['uuid'] = self.uuid
return meta return meta
def edit(self, cookie_dict): def edit(self, cookie_dict):
@ -1225,8 +1315,13 @@ class CrawlerCapture:
if task_uuid: if task_uuid:
return CrawlerTask(task_uuid) return CrawlerTask(task_uuid)
def get_start_time(self): def get_start_time(self, r_str=True):
return self.get_task().get_start_time() start_time = self.get_task().get_start_time()
if r_str:
return start_time
else:
start_time = datetime.strptime(start_time, "%Y/%m/%d - %H:%M.%S").timestamp()
return int(start_time)
def get_status(self): def get_status(self):
status = r_cache.hget(f'crawler:capture:{self.uuid}', 'status') status = r_cache.hget(f'crawler:capture:{self.uuid}', 'status')
@ -1239,7 +1334,8 @@ class CrawlerCapture:
def create(self, task_uuid): def create(self, task_uuid):
if self.exists(): if self.exists():
raise Exception(f'Error: Capture {self.uuid} already exists') print(f'Capture {self.uuid} already exists') # TODO LOGS
return None
launch_time = int(time.time()) launch_time = int(time.time())
r_crawler.hset(f'crawler:task:{task_uuid}', 'capture', self.uuid) r_crawler.hset(f'crawler:task:{task_uuid}', 'capture', self.uuid)
r_crawler.hset('crawler:captures:tasks', self.uuid, task_uuid) r_crawler.hset('crawler:captures:tasks', self.uuid, task_uuid)
@ -1492,6 +1588,11 @@ class CrawlerTask:
def start(self): def start(self):
self._set_field('start_time', datetime.now().strftime("%Y/%m/%d - %H:%M.%S")) self._set_field('start_time', datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
def reset(self):
priority = 49
r_crawler.hdel(f'crawler:task:{self.uuid}', 'start_time')
self.add_to_db_crawler_queue(priority)
# Crawler # Crawler
def remove(self): # zrem cache + DB def remove(self): # zrem cache + DB
capture_uuid = self.get_capture() capture_uuid = self.get_capture()
@ -1622,14 +1723,16 @@ def api_add_crawler_task(data, user_id=None):
if frequency: if frequency:
# TODO verify user # TODO verify user
return create_schedule(frequency, user_id, url, depth=depth_limit, har=har, screenshot=screenshot, header=None, task_uuid = create_schedule(frequency, user_id, url, depth=depth_limit, har=har, screenshot=screenshot, header=None,
cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags), 200 cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags)
else: else:
# TODO HEADERS # TODO HEADERS
# TODO USER AGENT # TODO USER AGENT
return create_task(url, depth=depth_limit, har=har, screenshot=screenshot, header=None, task_uuid = create_task(url, depth=depth_limit, har=har, screenshot=screenshot, header=None,
cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags, cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags,
parent='manual', priority=90), 200 parent='manual', priority=90)
return {'uuid': task_uuid}, 200
#### #### #### ####
@ -1702,13 +1805,13 @@ class CrawlerProxy:
self.uuid = proxy_uuid self.uuid = proxy_uuid
def get_description(self): def get_description(self):
return r_crawler.hgrt(f'crawler:proxy:{self.uuif}', 'description') return r_crawler.hget(f'crawler:proxy:{self.uuid}', 'description')
# Host # Host
# Port # Port
# Type -> need test # Type -> need test
def get_url(self): def get_url(self):
return r_crawler.hgrt(f'crawler:proxy:{self.uuif}', 'url') return r_crawler.hget(f'crawler:proxy:{self.uuid}', 'url')
#### CRAWLER LACUS #### #### CRAWLER LACUS ####
@ -1770,7 +1873,11 @@ def ping_lacus():
ping = False ping = False
req_error = {'error': 'Lacus URL undefined', 'status_code': 400} req_error = {'error': 'Lacus URL undefined', 'status_code': 400}
else: else:
try:
ping = lacus.is_up ping = lacus.is_up
except:
req_error = {'error': 'Failed to connect Lacus URL', 'status_code': 400}
ping = False
update_lacus_connection_status(ping, req_error=req_error) update_lacus_connection_status(ping, req_error=req_error)
return ping return ping
@ -1830,7 +1937,7 @@ def api_set_crawler_max_captures(data):
save_nb_max_captures(nb_captures) save_nb_max_captures(nb_captures)
return nb_captures, 200 return nb_captures, 200
## TEST ## ## TEST ##
def is_test_ail_crawlers_successful(): def is_test_ail_crawlers_successful():
return r_db.hget('crawler:tor:test', 'success') == 'True' return r_db.hget('crawler:tor:test', 'success') == 'True'
@ -1903,14 +2010,16 @@ def test_ail_crawlers():
# TODO MOVE ME IN CRAWLER OR FLASK # TODO MOVE ME IN CRAWLER OR FLASK
load_blacklist() load_blacklist()
# if __name__ == '__main__': if __name__ == '__main__':
# delete_captures() # delete_captures()
# item_id = 'crawled/2023/02/20/data.gz'
# item = Item(item_id)
# content = item.get_content()
# temp_url = ''
# r = extract_favicon_from_html(content, temp_url)
# print(r)
# _reprocess_all_hars()
# item_id = 'crawled/2023/02/20/data.gz'
# item = Item(item_id)
# content = item.get_content()
# temp_url = ''
# r = extract_favicon_from_html(content, temp_url)
# print(r)
# _reprocess_all_hars_cookie_name()
# _reprocess_all_hars_etag()
# _gzip_all_hars()
_reprocess_all_hars_hhhashs()

View file

@ -129,7 +129,7 @@ def get_item_url(item_id):
def get_item_har(item_id): def get_item_har(item_id):
har = '/'.join(item_id.rsplit('/')[-4:]) har = '/'.join(item_id.rsplit('/')[-4:])
har = f'{har}.json' har = f'{har}.json.gz'
path = os.path.join(ConfigLoader.get_hars_dir(), har) path = os.path.join(ConfigLoader.get_hars_dir(), har)
if os.path.isfile(path): if os.path.isfile(path):
return har return har

View file

@ -104,9 +104,13 @@ def _get_word_regex(word):
def convert_byte_offset_to_string(b_content, offset): def convert_byte_offset_to_string(b_content, offset):
byte_chunk = b_content[:offset + 1] byte_chunk = b_content[:offset + 1]
try:
string_chunk = byte_chunk.decode() string_chunk = byte_chunk.decode()
offset = len(string_chunk) - 1 offset = len(string_chunk) - 1
return offset return offset
except UnicodeDecodeError as e:
logger.error(f'Yara offset converter error, {str(e)}\n{offset}/{len(b_content)}')
return convert_byte_offset_to_string(b_content, offset - 1)
# TODO RETRO HUNTS # TODO RETRO HUNTS

309
bin/lib/objects/Chats.py Executable file
View file

@ -0,0 +1,309 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
from datetime import datetime
from flask import url_for
# from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ail_core
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
from lib.data_retention_engine import update_obj_date
from lib.objects import ail_objects
from lib.timeline_engine import Timeline
from lib.correlations_engine import get_correlation_by_correl_type
config_loader = ConfigLoader()
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
r_object = config_loader.get_db_conn("Kvrocks_Objects")
r_cache = config_loader.get_redis_conn("Redis_Cache")
config_loader = None
################################################################################
################################################################################
################################################################################
class Chat(AbstractSubtypeObject): # TODO # ID == username ?????
"""
AIL Chat Object. (strings)
"""
def __init__(self, id, subtype):
super(Chat, self).__init__('chat', id, subtype)
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True),
# 'compress': 'gzip'}
# return payload
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
# # TODO:
pass
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
return url
def get_svg_icon(self): # TODO
# if self.subtype == 'telegram':
# style = 'fab'
# icon = '\uf2c6'
# elif self.subtype == 'discord':
# style = 'fab'
# icon = '\uf099'
# else:
# style = 'fas'
# icon = '\uf007'
style = 'fas'
icon = '\uf086'
return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius': 5}
def get_meta(self, options=set()):
meta = self._get_meta(options=options)
meta['id'] = self.id
meta['subtype'] = self.subtype
meta['tags'] = self.get_tags(r_list=True)
return meta
def get_misp_object(self):
# obj_attrs = []
# if self.subtype == 'telegram':
# obj = MISPObject('telegram-account', standalone=True)
# obj_attrs.append(obj.add_attribute('username', value=self.id))
#
# elif self.subtype == 'twitter':
# obj = MISPObject('twitter-account', standalone=True)
# obj_attrs.append(obj.add_attribute('name', value=self.id))
#
# else:
# obj = MISPObject('user-account', standalone=True)
# obj_attrs.append(obj.add_attribute('username', value=self.id))
#
# first_seen = self.get_first_seen()
# last_seen = self.get_last_seen()
# if first_seen:
# obj.first_seen = first_seen
# if last_seen:
# obj.last_seen = last_seen
# if not first_seen or not last_seen:
# self.logger.warning(
# f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
#
# for obj_attr in obj_attrs:
# for tag in self.get_tags():
# obj_attr.add_tag(tag)
# return obj
return
############################################################################
############################################################################
# others optional metas, ... -> # TODO ALL meta in hset
def get_name(self): # get username ????
pass
# users that send at least a message else participants/spectator
# correlation created by messages
def get_users(self):
users = set()
accounts = self.get_correlation('user-account').get('user-account', [])
for account in accounts:
users.add(account[1:])
return users
def _get_timeline_username(self):
return Timeline(self.get_global_id(), 'username')
def get_username(self):
return self._get_timeline_username().get_last_obj_id()
def get_usernames(self):
return self._get_timeline_username().get_objs_ids()
def update_username_timeline(self, username_global_id, timestamp):
self._get_timeline_username().add_timestamp(timestamp, username_global_id)
# def get_last_message_id(self):
#
# return r_object.hget(f'meta:{self.type}:{self.subtype}:{self.id}', 'last:message:id')
def get_obj_message_id(self, obj_id):
if obj_id.endswith('.gz'):
obj_id = obj_id[:-3]
return int(obj_id.split('_')[-1])
def _get_message_timestamp(self, obj_global_id):
return r_object.zscore(f'messages:{self.type}:{self.subtype}:{self.id}', obj_global_id)
def _get_messages(self):
return r_object.zrange(f'messages:{self.type}:{self.subtype}:{self.id}', 0, -1, withscores=True)
def get_message_meta(self, obj_global_id, parent=True, mess_datetime=None):
obj = ail_objects.get_obj_from_global_id(obj_global_id)
mess_dict = obj.get_meta(options={'content', 'link', 'parent', 'user-account'})
if mess_dict.get('parent') and parent:
mess_dict['reply_to'] = self.get_message_meta(mess_dict['parent'], parent=False)
if mess_dict.get('user-account'):
user_account = ail_objects.get_obj_from_global_id(mess_dict['user-account'])
mess_dict['user-account'] = {}
mess_dict['user-account']['type'] = user_account.get_type()
mess_dict['user-account']['subtype'] = user_account.get_subtype(r_str=True)
mess_dict['user-account']['id'] = user_account.get_id()
username = user_account.get_username()
if username:
username = ail_objects.get_obj_from_global_id(username).get_default_meta(link=False)
mess_dict['user-account']['username'] = username # TODO get username at the given timestamp ???
else:
mess_dict['user-account']['id'] = 'UNKNOWN'
if not mess_datetime:
obj_mess_id = self._get_message_timestamp(obj_global_id)
mess_datetime = datetime.fromtimestamp(obj_mess_id)
mess_dict['date'] = mess_datetime.isoformat(' ')
mess_dict['hour'] = mess_datetime.strftime('%H:%M:%S')
return mess_dict
def get_messages(self, start=0, page=1, nb=500): # TODO limit nb returned, # TODO add replies
start = 0
stop = -1
# r_object.delete(f'messages:{self.type}:{self.subtype}:{self.id}')
# TODO chat without username ???? -> chat ID ????
messages = {}
curr_date = None
for message in self._get_messages():
date = datetime.fromtimestamp(message[1])
date_day = date.strftime('%Y/%m/%d')
if date_day != curr_date:
messages[date_day] = []
curr_date = date_day
mess_dict = self.get_message_meta(message[0], parent=True, mess_datetime=date)
messages[date_day].append(mess_dict)
return messages
# Zset with ID ??? id -> item id ??? multiple id == media + text
# id -> media id
# How do we handle reply/thread ??? -> separate with new chats name/id ZSET ???
# Handle media ???
# list of message id -> obj_id
# list of obj_id ->
# abuse parent children ???
# def add(self, timestamp, obj_id, mess_id=0, username=None, user_id=None):
# date = # TODO get date from object
# self.update_daterange(date)
# update_obj_date(date, self.type, self.subtype)
#
#
# # daily
# r_object.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1)
# # all subtypes
# r_object.zincrby(f'{self.type}_all:{self.subtype}', 1, self.id)
#
# #######################################################################
# #######################################################################
#
# # Correlations
# self.add_correlation('item', '', item_id)
# # domain
# if is_crawled(item_id):
# domain = get_item_domain(item_id)
# self.add_correlation('domain', '', domain)
# TODO kvrocks exception if key don't exists
def get_obj_by_message_id(self, mess_id):
return r_object.hget(f'messages:ids:{self.type}:{self.subtype}:{self.id}', mess_id)
# importer -> use cache for previous reply SET to_add_id: previously_imported : expire SET key -> 30 mn
def add_message(self, obj_global_id, timestamp, mess_id, reply_id=None):
r_object.hset(f'messages:ids:{self.type}:{self.subtype}:{self.id}', mess_id, obj_global_id)
r_object.zadd(f'messages:{self.type}:{self.subtype}:{self.id}', {obj_global_id: timestamp})
if reply_id:
reply_obj = self.get_obj_by_message_id(reply_id)
if reply_obj:
self.add_obj_children(reply_obj, obj_global_id)
else:
self.add_message_cached_reply(reply_id, mess_id)
# ADD cached replies
for reply_obj in self.get_cached_message_reply(mess_id):
self.add_obj_children(obj_global_id, reply_obj)
def _get_message_cached_reply(self, message_id):
return r_cache.smembers(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{message_id}')
def get_cached_message_reply(self, message_id):
objs_global_id = []
for mess_id in self._get_message_cached_reply(message_id):
obj_global_id = self.get_obj_by_message_id(mess_id)
if obj_global_id:
objs_global_id.append(obj_global_id)
return objs_global_id
def add_message_cached_reply(self, reply_to_id, message_id):
r_cache.sadd(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{reply_to_id}', message_id)
r_cache.expire(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{reply_to_id}', 600)
# TODO nb replies = nb son ???? what if it create a onion item ??? -> need source filtering
# TODO factorize
def get_all_subtypes():
return ail_core.get_object_all_subtypes('chat')
def get_all():
objs = {}
for subtype in get_all_subtypes():
objs[subtype] = get_all_by_subtype(subtype)
return objs
def get_all_by_subtype(subtype):
return get_all_id('chat', subtype)
# # TODO FILTER NAME + Key + mail
# def sanitize_username_name_to_search(name_to_search, subtype): # TODO FILTER NAME
#
# return name_to_search
#
# def search_usernames_by_name(name_to_search, subtype, r_pos=False):
# usernames = {}
# # for subtype in subtypes:
# r_name = sanitize_username_name_to_search(name_to_search, subtype)
# if not name_to_search or isinstance(r_name, dict):
# # break
# return usernames
# r_name = re.compile(r_name)
# for user_name in get_all_usernames_by_subtype(subtype):
# res = re.search(r_name, user_name)
# if res:
# usernames[user_name] = {}
# if r_pos:
# usernames[user_name]['hl-start'] = res.start()
# usernames[user_name]['hl-end'] = res.end()
# return usernames
if __name__ == '__main__':
chat = Chat('test', 'telegram')
r = chat.get_messages()
print(r)

View file

@ -138,7 +138,7 @@ class Decoded(AbstractDaterangeObject):
with open(filepath, 'rb') as f: with open(filepath, 'rb') as f:
content = f.read() content = f.read()
return content return content
elif r_str == 'bytesio': elif r_type == 'bytesio':
with open(filepath, 'rb') as f: with open(filepath, 'rb') as f:
content = BytesIO(f.read()) content = BytesIO(f.read())
return content return content
@ -149,7 +149,7 @@ class Decoded(AbstractDaterangeObject):
with zipfile.ZipFile(zip_content, "w") as zf: with zipfile.ZipFile(zip_content, "w") as zf:
# TODO: Fix password # TODO: Fix password
# zf.setpassword(b"infected") # zf.setpassword(b"infected")
zf.writestr(self.id, self.get_content().getvalue()) zf.writestr(self.id, self.get_content(r_type='bytesio').getvalue())
zip_content.seek(0) zip_content.seek(0)
return zip_content return zip_content

View file

@ -389,10 +389,10 @@ class Domain(AbstractObject):
har = get_item_har(item_id) har = get_item_har(item_id)
if har: if har:
print(har) print(har)
_write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json') _write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json.gz')
# Screenshot # Screenshot
screenshot = self._get_external_correlation('item', '', item_id, 'screenshot') screenshot = self._get_external_correlation('item', '', item_id, 'screenshot')
if screenshot: if screenshot and screenshot['screenshot']:
screenshot = screenshot['screenshot'].pop()[1:] screenshot = screenshot['screenshot'].pop()[1:]
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8], screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8],
screenshot[8:10], screenshot[10:12], screenshot[12:]) screenshot[8:10], screenshot[10:12], screenshot[12:])
@ -595,21 +595,22 @@ def get_domains_up_by_filers(domain_types, date_from=None, date_to=None, tags=[]
return None return None
def sanitize_domain_name_to_search(name_to_search, domain_type): def sanitize_domain_name_to_search(name_to_search, domain_type):
if not name_to_search:
return ""
if domain_type == 'onion': if domain_type == 'onion':
r_name = r'[a-z0-9\.]+' r_name = r'[a-z0-9\.]+'
else: else:
r_name = r'[a-zA-Z0-9-_\.]+' r_name = r'[a-zA-Z0-9-_\.]+'
# invalid domain name # invalid domain name
if not re.fullmatch(r_name, name_to_search): if not re.fullmatch(r_name, name_to_search):
res = re.match(r_name, name_to_search) return ""
return {'search': name_to_search, 'error': res.string.replace( res[0], '')}
return name_to_search.replace('.', '\.') return name_to_search.replace('.', '\.')
def search_domain_by_name(name_to_search, domain_types, r_pos=False): def search_domain_by_name(name_to_search, domain_types, r_pos=False):
domains = {} domains = {}
for domain_type in domain_types: for domain_type in domain_types:
r_name = sanitize_domain_name_to_search(name_to_search, domain_type) r_name = sanitize_domain_name_to_search(name_to_search, domain_type)
if not name_to_search or isinstance(r_name, dict): if not r_name:
break break
r_name = re.compile(r_name) r_name = re.compile(r_name)
for domain in get_domains_up_by_type(domain_type): for domain in get_domains_up_by_type(domain_type):

121
bin/lib/objects/Etags.py Executable file
View file

@ -0,0 +1,121 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
from hashlib import sha256
from flask import url_for
from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
config_loader = ConfigLoader()
r_objects = config_loader.get_db_conn("Kvrocks_Objects")
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None
# TODO NEW ABSTRACT OBJECT -> daterange for all objects ????
class Etag(AbstractDaterangeObject):
"""
AIL Etag Object.
"""
def __init__(self, obj_id):
super(Etag, self).__init__('etag', obj_id)
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True),
# 'compress': 'gzip'}
# return payload
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
# # TODO:
pass
def get_content(self, r_type='str'):
if r_type == 'str':
return self._get_field('content')
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url
# TODO # CHANGE COLOR
def get_svg_icon(self):
return {'style': 'fas', 'icon': '\uf02b', 'color': '#556F65', 'radius': 5}
def get_misp_object(self):
obj_attrs = []
obj = MISPObject('etag')
first_seen = self.get_first_seen()
last_seen = self.get_last_seen()
if first_seen:
obj.first_seen = first_seen
if last_seen:
obj.last_seen = last_seen
if not first_seen or not last_seen:
self.logger.warning(
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
obj_attrs.append(obj.add_attribute('etag', value=self.get_content()))
for obj_attr in obj_attrs:
for tag in self.get_tags():
obj_attr.add_tag(tag)
return obj
def get_nb_seen(self):
return self.get_nb_correlation('domain')
def get_meta(self, options=set()):
meta = self._get_meta(options=options)
meta['id'] = self.id
meta['tags'] = self.get_tags(r_list=True)
meta['content'] = self.get_content()
return meta
def add(self, date, obj_id): # date = HAR Date
self._add(date, 'domain', '', obj_id)
def create(self, content, _first_seen=None, _last_seen=None):
if not isinstance(content, str):
content = content.decode()
self._set_field('content', content)
self._create()
def create(content):
if isinstance(content, str):
content = content.encode()
obj_id = sha256(content).hexdigest()
etag = Etag(obj_id)
if not etag.exists():
etag.create(content)
return etag
class Etags(AbstractDaterangeObjects):
"""
Etags Objects
"""
def __init__(self):
super().__init__('etag', Etag)
def sanitize_id_to_search(self, name_to_search):
return name_to_search # TODO
# if __name__ == '__main__':
# name_to_search = '98'
# print(search_cves_by_name(name_to_search))

138
bin/lib/objects/HHHashs.py Executable file
View file

@ -0,0 +1,138 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import hashlib
import os
import sys
from flask import url_for
from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
config_loader = ConfigLoader()
r_objects = config_loader.get_db_conn("Kvrocks_Objects")
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None
class HHHash(AbstractDaterangeObject):
"""
AIL HHHash Object.
"""
def __init__(self, obj_id):
super(HHHash, self).__init__('hhhash', obj_id)
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True),
# 'compress': 'gzip'}
# return payload
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
# # TODO:
pass
def get_content(self, r_type='str'):
if r_type == 'str':
return self._get_field('content')
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url
# TODO # CHANGE COLOR
def get_svg_icon(self):
return {'style': 'fas', 'icon': '\uf036', 'color': '#71D090', 'radius': 5}
def get_misp_object(self):
obj_attrs = []
obj = MISPObject('hhhash')
first_seen = self.get_first_seen()
last_seen = self.get_last_seen()
if first_seen:
obj.first_seen = first_seen
if last_seen:
obj.last_seen = last_seen
if not first_seen or not last_seen:
self.logger.warning(
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
obj_attrs.append(obj.add_attribute('hhhash', value=self.get_id()))
obj_attrs.append(obj.add_attribute('hhhash-headers', value=self.get_content()))
obj_attrs.append(obj.add_attribute('hhhash-tool', value='lacus'))
for obj_attr in obj_attrs:
for tag in self.get_tags():
obj_attr.add_tag(tag)
return obj
def get_nb_seen(self):
return self.get_nb_correlation('domain')
def get_meta(self, options=set()):
meta = self._get_meta(options=options)
meta['id'] = self.id
meta['tags'] = self.get_tags(r_list=True)
meta['content'] = self.get_content()
return meta
def add(self, date, obj_id): # date = HAR Date
self._add(date, 'domain', '', obj_id)
def create(self, hhhash_header, _first_seen=None, _last_seen=None): # TODO CREATE ADD FUNCTION -> urls set
self._set_field('content', hhhash_header)
self._create()
def create(hhhash_header, hhhash=None):
if not hhhash:
hhhash = hhhash_headers(hhhash_header)
hhhash = HHHash(hhhash)
if not hhhash.exists():
hhhash.create(hhhash_header)
return hhhash
def build_hhhash_headers(dict_headers): # filter_dup=True
hhhash = ''
previous_header = ''
for header in dict_headers:
header_name = header.get('name')
if header_name:
if header_name != previous_header: # remove dup headers, filter playwright invalid splitting
hhhash = f'{hhhash}:{header_name}'
previous_header = header_name
hhhash = hhhash[1:]
# print(hhhash)
return hhhash
def hhhash_headers(header_hhhash):
m = hashlib.sha256()
m.update(header_hhhash.encode())
digest = m.hexdigest()
return f"hhh:1:{digest}"
class HHHashs(AbstractDaterangeObjects):
"""
HHHashs Objects
"""
def __init__(self):
super().__init__('hhhash', HHHash)
def sanitize_id_to_search(self, name_to_search):
return name_to_search # TODO
# if __name__ == '__main__':
# name_to_search = '98'
# print(search_cves_by_name(name_to_search))

View file

@ -264,10 +264,9 @@ class Item(AbstractObject):
""" """
if options is None: if options is None:
options = set() options = set()
meta = {'id': self.id, meta = self.get_default_meta(tags=True)
'date': self.get_date(separator=True), meta['date'] = self.get_date(separator=True)
'source': self.get_source(), meta['source'] = self.get_source()
'tags': self.get_tags(r_list=True)}
# optional meta fields # optional meta fields
if 'content' in options: if 'content' in options:
meta['content'] = self.get_content() meta['content'] = self.get_content()
@ -289,6 +288,8 @@ class Item(AbstractObject):
meta['mimetype'] = self.get_mimetype(content=content) meta['mimetype'] = self.get_mimetype(content=content)
if 'investigations' in options: if 'investigations' in options:
meta['investigations'] = self.get_investigations() meta['investigations'] = self.get_investigations()
if 'link' in options:
meta['link'] = self.get_link(flask_context=True)
# meta['encoding'] = None # meta['encoding'] = None
return meta return meta

275
bin/lib/objects/Messages.py Executable file
View file

@ -0,0 +1,275 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import re
import sys
import cld3
import html2text
from datetime import datetime
from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ail_core import get_ail_uuid
from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader
from lib.data_retention_engine import update_obj_date, get_obj_date_first
# TODO Set all messages ???
from flask import url_for
config_loader = ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")
r_object = config_loader.get_db_conn("Kvrocks_Objects")
# r_content = config_loader.get_db_conn("Kvrocks_Content")
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None
# TODO SAVE OR EXTRACT MESSAGE SOURCE FOR ICON ?????????
# TODO iterate on all objects
# TODO also add support for small objects ????
# CAN Message exists without CHAT -> no convert it to object
# ID: source:chat_id:message_id ????
#
# /!\ handle null chat and message id -> chat = uuid and message = timestamp ???
class Message(AbstractObject):
"""
AIL Message Object. (strings)
"""
def __init__(self, id): # TODO subtype or use source ????
super(Message, self).__init__('message', id) # message::< telegram/1692189934.380827/ChatID_MessageID >
def exists(self):
if self.subtype is None:
return r_object.exists(f'meta:{self.type}:{self.id}')
else:
return r_object.exists(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}')
def get_source(self):
"""
Returns source/feeder name
"""
l_source = self.id.split('/')[:-2]
return os.path.join(*l_source)
def get_basename(self):
return os.path.basename(self.id)
def get_content(self, r_type='str'): # TODO ADD cache # TODO Compress content ???????
"""
Returns content
"""
content = self._get_field('content')
if r_type == 'str':
return content
elif r_type == 'bytes':
return content.encode()
def get_date(self):
timestamp = self.get_timestamp()
return datetime.fromtimestamp(float(timestamp)).strftime('%Y%m%d')
def get_timestamp(self):
dirs = self.id.split('/')
return dirs[-2]
def get_message_id(self): # TODO optimize
message_id = self.get_basename().rsplit('_', 1)[1]
# if message_id.endswith('.gz'):
# message_id = message_id[:-3]
return message_id
def get_chat_id(self): # TODO optimize -> use me to tag Chat
chat_id = self.get_basename().rsplit('_', 1)[0]
# if chat_id.endswith('.gz'):
# chat_id = chat_id[:-3]
return chat_id
def get_user_account(self):
user_account = self.get_correlation('user-account')
if user_account.get('user-account'):
return f'user-account:{user_account["user-account"].pop()}'
# Update value on import
# reply to -> parent ?
# reply/comment - > children ?
# nb views
# reactions
# nb fowards
# room ???
# message from channel ???
# message media
def get_translation(self): # TODO support multiple translated languages ?????
"""
Returns translated content
"""
return self._get_field('translated') # TODO multiples translation ... -> use set
def _set_translation(self, translation):
"""
Set translated content
"""
return self._set_field('translated', translation) # translation by hash ??? -> avoid translating multiple time
def get_html2text_content(self, content=None, ignore_links=False):
if not content:
content = self.get_content()
h = html2text.HTML2Text()
h.ignore_links = ignore_links
h.ignore_images = ignore_links
return h.handle(content)
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True)}
# return payload
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url
def get_svg_icon(self):
return {'style': 'fas', 'icon': '\uf4ad', 'color': '#4dffff', 'radius': 5}
def get_misp_object(self): # TODO
obj = MISPObject('instant-message', standalone=True)
obj_date = self.get_date()
if obj_date:
obj.first_seen = obj_date
else:
self.logger.warning(
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={obj_date}')
# obj_attrs = [obj.add_attribute('first-seen', value=obj_date),
# obj.add_attribute('raw-data', value=self.id, data=self.get_raw_content()),
# obj.add_attribute('sensor', value=get_ail_uuid())]
obj_attrs = []
for obj_attr in obj_attrs:
for tag in self.get_tags():
obj_attr.add_tag(tag)
return obj
# def get_url(self):
# return r_object.hget(f'meta:item::{self.id}', 'url')
# options: set of optional meta fields
def get_meta(self, options=None):
"""
:type options: set
"""
if options is None:
options = set()
meta = self.get_default_meta(tags=True)
meta['date'] = self.get_date() # TODO replace me by timestamp ??????
meta['source'] = self.get_source()
# optional meta fields
if 'content' in options:
meta['content'] = self.get_content()
if 'parent' in options:
meta['parent'] = self.get_parent()
if 'investigations' in options:
meta['investigations'] = self.get_investigations()
if 'link' in options:
meta['link'] = self.get_link(flask_context=True)
if 'user-account' in options:
meta['user-account'] = self.get_user_account()
# meta['encoding'] = None
return meta
def _languages_cleaner(self, content=None):
if not content:
content = self.get_content()
# REMOVE URLS
regex = r'\b(?:http://|https://)?(?:[a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*\b'
url_regex = re.compile(regex)
urls = url_regex.findall(content)
urls = sorted(urls, key=len, reverse=True)
for url in urls:
content = content.replace(url, '')
# REMOVE PGP Blocks
regex_pgp_public_blocs = r'-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----'
regex_pgp_signature = r'-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----'
regex_pgp_message = r'-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----'
re.compile(regex_pgp_public_blocs)
re.compile(regex_pgp_signature)
re.compile(regex_pgp_message)
res = re.findall(regex_pgp_public_blocs, content)
for it in res:
content = content.replace(it, '')
res = re.findall(regex_pgp_signature, content)
for it in res:
content = content.replace(it, '')
res = re.findall(regex_pgp_message, content)
for it in res:
content = content.replace(it, '')
return content
def detect_languages(self, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7):
languages = []
## CLEAN CONTENT ##
content = self.get_html2text_content(ignore_links=True)
content = self._languages_cleaner(content=content)
# REMOVE USELESS SPACE
content = ' '.join(content.split())
# - CLEAN CONTENT - #
if len(content) >= min_len:
for lang in cld3.get_frequent_languages(content, num_langs=num_langs):
if lang.proportion >= min_proportion and lang.probability >= min_probability and lang.is_reliable:
languages.append(lang)
return languages
# def translate(self, content=None): # TODO translation plugin
# # TODO get text language
# if not content:
# content = self.get_content()
# translated = argostranslate.translate.translate(content, 'ru', 'en')
# # Save translation
# self._set_translation(translated)
# return translated
def create(self, content, translation, tags):
self._set_field('content', content)
# r_content.get(f'content:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', content)
if translation:
self._set_translation(translation)
for tag in tags:
self.add_tag(tag)
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
pass
def create_obj_id(source, chat_id, message_id, timestamp):
return f'{source}/{timestamp}/{chat_id}_{message_id}'
# TODO Check if already exists
# def create(source, chat_id, message_id, timestamp, content, tags=[]):
def create(obj_id, content, translation=None, tags=[]):
message = Message(obj_id)
if not message.exists():
message.create(content, translation, tags)
return message
# TODO Encode translation
if __name__ == '__main__':
r = 'test'
print(r)

View file

@ -88,7 +88,7 @@ class Screenshot(AbstractObject):
return obj return obj
def get_meta(self, options=set()): def get_meta(self, options=set()):
meta = {'id': self.id} meta = self.get_default_meta()
meta['img'] = get_screenshot_rel_path(self.id) ######### # TODO: Rename ME ?????? meta['img'] = get_screenshot_rel_path(self.id) ######### # TODO: Rename ME ??????
meta['tags'] = self.get_tags(r_list=True) meta['tags'] = self.get_tags(r_list=True)
if 'tags_safe' in options: if 'tags_safe' in options:

155
bin/lib/objects/UsersAccount.py Executable file
View file

@ -0,0 +1,155 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
# import re
from flask import url_for
from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ail_core
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
from lib.timeline_engine import Timeline
config_loader = ConfigLoader()
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None
################################################################################
################################################################################
################################################################################
class UserAccount(AbstractSubtypeObject):
"""
AIL User Object. (strings)
"""
def __init__(self, id, subtype):
super(UserAccount, self).__init__('user-account', id, subtype)
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True),
# 'compress': 'gzip'}
# return payload
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
# # TODO:
pass
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
return url
def get_svg_icon(self): # TODO change icon/color
if self.subtype == 'telegram':
style = 'fab'
icon = '\uf2c6'
elif self.subtype == 'twitter':
style = 'fab'
icon = '\uf099'
else:
style = 'fas'
icon = '\uf007'
return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius': 5}
def get_first_name(self):
return self._get_field('firstname')
def get_last_name(self):
return self._get_field('lastname')
def get_phone(self):
return self._get_field('phone')
def set_first_name(self, firstname):
return self._set_field('firstname', firstname)
def set_last_name(self, lastname):
return self._set_field('lastname', lastname)
def set_phone(self, phone):
return self._set_field('phone', phone)
def _get_timeline_username(self):
return Timeline(self.get_global_id(), 'username')
def get_username(self):
return self._get_timeline_username().get_last_obj_id()
def get_usernames(self):
return self._get_timeline_username().get_objs_ids()
def update_username_timeline(self, username_global_id, timestamp):
self._get_timeline_username().add_timestamp(timestamp, username_global_id)
def get_meta(self, options=set()):
meta = self._get_meta(options=options)
meta['id'] = self.id
meta['subtype'] = self.subtype
meta['tags'] = self.get_tags(r_list=True)
if 'username' in options:
meta['username'] = self.get_username()
if 'usernames' in options:
meta['usernames'] = self.get_usernames()
return meta
def get_misp_object(self):
obj_attrs = []
if self.subtype == 'telegram':
obj = MISPObject('telegram-account', standalone=True)
obj_attrs.append(obj.add_attribute('username', value=self.id))
elif self.subtype == 'twitter':
obj = MISPObject('twitter-account', standalone=True)
obj_attrs.append(obj.add_attribute('name', value=self.id))
else:
obj = MISPObject('user-account', standalone=True)
obj_attrs.append(obj.add_attribute('username', value=self.id))
first_seen = self.get_first_seen()
last_seen = self.get_last_seen()
if first_seen:
obj.first_seen = first_seen
if last_seen:
obj.last_seen = last_seen
if not first_seen or not last_seen:
self.logger.warning(
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
for obj_attr in obj_attrs:
for tag in self.get_tags():
obj_attr.add_tag(tag)
return obj
def get_user_by_username():
pass
def get_all_subtypes():
return ail_core.get_object_all_subtypes('user-account')
def get_all():
users = {}
for subtype in get_all_subtypes():
users[subtype] = get_all_by_subtype(subtype)
return users
def get_all_by_subtype(subtype):
return get_all_id('user-account', subtype)
# if __name__ == '__main__':
# name_to_search = 'co'
# subtype = 'telegram'
# print(search_usernames_by_name(name_to_search, subtype))

View file

@ -45,10 +45,10 @@ class AbstractDaterangeObject(AbstractObject, ABC):
def exists(self): def exists(self):
return r_object.exists(f'meta:{self.type}:{self.id}') return r_object.exists(f'meta:{self.type}:{self.id}')
def _get_field(self, field): def _get_field(self, field): # TODO remove me (NEW in abstract)
return r_object.hget(f'meta:{self.type}:{self.id}', field) return r_object.hget(f'meta:{self.type}:{self.id}', field)
def _set_field(self, field, value): def _set_field(self, field, value): # TODO remove me (NEW in abstract)
return r_object.hset(f'meta:{self.type}:{self.id}', field, value) return r_object.hset(f'meta:{self.type}:{self.id}', field, value)
def get_first_seen(self, r_int=False): def get_first_seen(self, r_int=False):
@ -82,9 +82,10 @@ class AbstractDaterangeObject(AbstractObject, ABC):
return int(nb) return int(nb)
def _get_meta(self, options=[]): def _get_meta(self, options=[]):
meta_dict = {'first_seen': self.get_first_seen(), meta_dict = self.get_default_meta()
'last_seen': self.get_last_seen(), meta_dict['first_seen'] = self.get_first_seen()
'nb_seen': self.get_nb_seen()} meta_dict['last_seen'] = self.get_last_seen()
meta_dict['nb_seen'] = self.get_nb_seen()
if 'sparkline' in options: if 'sparkline' in options:
meta_dict['sparkline'] = self.get_sparkline() meta_dict['sparkline'] = self.get_sparkline()
return meta_dict return meta_dict

View file

@ -20,6 +20,7 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
from lib import ail_logger from lib import ail_logger
from lib import Tag from lib import Tag
from lib.ConfigLoader import ConfigLoader
from lib import Duplicate from lib import Duplicate
from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
@ -27,6 +28,11 @@ from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers
logging.config.dictConfig(ail_logger.get_config(name='ail')) logging.config.dictConfig(ail_logger.get_config(name='ail'))
config_loader = ConfigLoader()
# r_cache = config_loader.get_redis_conn("Redis_Cache")
r_object = config_loader.get_db_conn("Kvrocks_Objects")
config_loader = None
class AbstractObject(ABC): class AbstractObject(ABC):
""" """
Abstract Object Abstract Object
@ -59,14 +65,28 @@ class AbstractObject(ABC):
def get_global_id(self): def get_global_id(self):
return f'{self.get_type()}:{self.get_subtype(r_str=True)}:{self.get_id()}' return f'{self.get_type()}:{self.get_subtype(r_str=True)}:{self.get_id()}'
def get_default_meta(self, tags=False): def get_default_meta(self, tags=False, link=False):
dict_meta = {'id': self.get_id(), dict_meta = {'id': self.get_id(),
'type': self.get_type(), 'type': self.get_type(),
'subtype': self.get_subtype()} 'subtype': self.get_subtype(r_str=True)}
if tags: if tags:
dict_meta['tags'] = self.get_tags() dict_meta['tags'] = self.get_tags()
if link:
dict_meta['link'] = self.get_link()
return dict_meta return dict_meta
def _get_field(self, field):
if self.subtype is None:
return r_object.hget(f'meta:{self.type}:{self.id}', field)
else:
return r_object.hget(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', field)
def _set_field(self, field, value):
if self.subtype is None:
return r_object.hset(f'meta:{self.type}:{self.id}', field, value)
else:
return r_object.hset(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', field, value)
## Tags ## ## Tags ##
def get_tags(self, r_list=False): def get_tags(self, r_list=False):
tags = Tag.get_object_tags(self.type, self.id, self.get_subtype(r_str=True)) tags = Tag.get_object_tags(self.type, self.id, self.get_subtype(r_str=True))
@ -198,6 +218,8 @@ class AbstractObject(ABC):
else: else:
return [] return []
## Correlation ##
def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type): def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type):
""" """
Get object correlation Get object correlation
@ -253,3 +275,39 @@ class AbstractObject(ABC):
Get object correlations Get object correlations
""" """
delete_obj_correlation(self.type, self.subtype, self.id, type2, subtype2, id2) delete_obj_correlation(self.type, self.subtype, self.id, type2, subtype2, id2)
## -Correlation- ##
## Parent ##
def is_parent(self):
return r_object.exists(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}')
def is_children(self):
return r_object.hexists(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent')
def get_parent(self):
return r_object.hget(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent')
def get_children(self):
return r_object.smembers(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}')
def set_parent(self, obj_type=None, obj_subtype=None, obj_id=None, obj_global_id=None): # TODO ######################
if not obj_global_id:
if obj_subtype is None:
obj_subtype = ''
obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
r_object.hset(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent', obj_global_id)
def add_children(self, obj_type=None, obj_subtype=None, obj_id=None, obj_global_id=None): # TODO ######################
if not obj_global_id:
if obj_subtype is None:
obj_subtype = ''
obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
r_object.sadd(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', obj_global_id)
def add_obj_children(self, parent_global_id, son_global_id):
r_object.sadd(f'child:{parent_global_id}', son_global_id)
r_object.hset(f'meta:{son_global_id}', 'parent', parent_global_id)
## Parent ##

View file

@ -151,7 +151,7 @@ class AbstractSubtypeObject(AbstractObject, ABC):
# #
# #
def add(self, date, item_id): def add(self, date, obj=None):
self.update_daterange(date) self.update_daterange(date)
update_obj_date(date, self.type, self.subtype) update_obj_date(date, self.type, self.subtype)
# daily # daily
@ -162,20 +162,22 @@ class AbstractSubtypeObject(AbstractObject, ABC):
####################################################################### #######################################################################
####################################################################### #######################################################################
if obj:
# Correlations # Correlations
self.add_correlation('item', '', item_id) self.add_correlation(obj.type, obj.get_subtype(r_str=True), obj.get_id())
if obj.type == 'item': # TODO same for message->chat ???
item_id = obj.get_id()
# domain # domain
if is_crawled(item_id): if is_crawled(item_id):
domain = get_item_domain(item_id) domain = get_item_domain(item_id)
self.add_correlation('domain', '', domain) self.add_correlation('domain', '', domain)
# TODO:ADD objects + Stats # TODO:ADD objects + Stats
def create(self, first_seen, last_seen): def create(self, first_seen, last_seen):
self.set_first_seen(first_seen) self.set_first_seen(first_seen)
self.set_last_seen(last_seen) self.set_last_seen(last_seen)
def _delete(self): def _delete(self):
pass pass

View file

@ -13,16 +13,21 @@ from lib import correlations_engine
from lib import btc_ail from lib import btc_ail
from lib import Tag from lib import Tag
from lib.objects import Chats
from lib.objects import CryptoCurrencies from lib.objects import CryptoCurrencies
from lib.objects import CookiesNames from lib.objects import CookiesNames
from lib.objects.Cves import Cve from lib.objects.Cves import Cve
from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decodeds_objects from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decodeds_objects
from lib.objects.Domains import Domain from lib.objects.Domains import Domain
from lib.objects import Etags
from lib.objects.Favicons import Favicon from lib.objects.Favicons import Favicon
from lib.objects import HHHashs
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
from lib.objects.Messages import Message
from lib.objects import Pgps from lib.objects import Pgps
from lib.objects.Screenshots import Screenshot from lib.objects.Screenshots import Screenshot
from lib.objects import Titles from lib.objects import Titles
from lib.objects.UsersAccount import UserAccount
from lib.objects import Usernames from lib.objects import Usernames
config_loader = ConfigLoader() config_loader = ConfigLoader()
@ -53,12 +58,20 @@ def get_object(obj_type, subtype, obj_id):
return Domain(obj_id) return Domain(obj_id)
elif obj_type == 'decoded': elif obj_type == 'decoded':
return Decoded(obj_id) return Decoded(obj_id)
elif obj_type == 'chat':
return Chats.Chat(obj_id, subtype)
elif obj_type == 'cookie-name': elif obj_type == 'cookie-name':
return CookiesNames.CookieName(obj_id) return CookiesNames.CookieName(obj_id)
elif obj_type == 'cve': elif obj_type == 'cve':
return Cve(obj_id) return Cve(obj_id)
elif obj_type == 'etag':
return Etags.Etag(obj_id)
elif obj_type == 'favicon': elif obj_type == 'favicon':
return Favicon(obj_id) return Favicon(obj_id)
elif obj_type == 'hhhash':
return HHHashs.HHHash(obj_id)
elif obj_type == 'message':
return Message(obj_id)
elif obj_type == 'screenshot': elif obj_type == 'screenshot':
return Screenshot(obj_id) return Screenshot(obj_id)
elif obj_type == 'cryptocurrency': elif obj_type == 'cryptocurrency':
@ -67,6 +80,8 @@ def get_object(obj_type, subtype, obj_id):
return Pgps.Pgp(obj_id, subtype) return Pgps.Pgp(obj_id, subtype)
elif obj_type == 'title': elif obj_type == 'title':
return Titles.Title(obj_id) return Titles.Title(obj_id)
elif obj_type == 'user-account':
return UserAccount(obj_id, subtype)
elif obj_type == 'username': elif obj_type == 'username':
return Usernames.Username(obj_id, subtype) return Usernames.Username(obj_id, subtype)
@ -101,9 +116,12 @@ def get_obj_global_id(obj_type, subtype, obj_id):
obj = get_object(obj_type, subtype, obj_id) obj = get_object(obj_type, subtype, obj_id)
return obj.get_global_id() return obj.get_global_id()
def get_obj_type_subtype_id_from_global_id(global_id):
obj_type, subtype, obj_id = global_id.split(':', 2)
return obj_type, subtype, obj_id
def get_obj_from_global_id(global_id): def get_obj_from_global_id(global_id):
obj = global_id.split(':', 3) obj = get_obj_type_subtype_id_from_global_id(global_id)
return get_object(obj[0], obj[1], obj[2]) return get_object(obj[0], obj[1], obj[2])
@ -159,7 +177,7 @@ def get_objects_meta(objs, options=set(), flask_context=False):
subtype = obj[1] subtype = obj[1]
obj_id = obj[2] obj_id = obj[2]
else: else:
obj_type, subtype, obj_id = obj.split(':', 2) obj_type, subtype, obj_id = get_obj_type_subtype_id_from_global_id(obj)
metas.append(get_object_meta(obj_type, subtype, obj_id, options=options, flask_context=flask_context)) metas.append(get_object_meta(obj_type, subtype, obj_id, options=options, flask_context=flask_context))
return metas return metas
@ -168,7 +186,7 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
obj = get_object(obj_type, subtype, id) obj = get_object(obj_type, subtype, id)
meta = obj.get_meta() meta = obj.get_meta()
meta['icon'] = obj.get_svg_icon() meta['icon'] = obj.get_svg_icon()
if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'title' or obj_type == 'favicon': if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon' or obj_type == 'hhhash':
meta['sparkline'] = obj.get_sparkline() meta['sparkline'] = obj.get_sparkline()
if obj_type == 'cve': if obj_type == 'cve':
meta['cve_search'] = obj.get_cve_search() meta['cve_search'] = obj.get_cve_search()
@ -177,6 +195,8 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
if subtype == 'bitcoin' and related_btc: if subtype == 'bitcoin' and related_btc:
meta["related_btc"] = btc_ail.get_bitcoin_info(obj.id) meta["related_btc"] = btc_ail.get_bitcoin_info(obj.id)
if obj.get_type() == 'decoded': if obj.get_type() == 'decoded':
meta['mimetype'] = obj.get_mimetype()
meta['size'] = obj.get_size()
meta["vt"] = obj.get_meta_vt() meta["vt"] = obj.get_meta_vt()
meta["vt"]["status"] = obj.is_vt_enabled() meta["vt"]["status"] = obj.is_vt_enabled()
# TAGS MODAL # TAGS MODAL
@ -333,8 +353,8 @@ def get_obj_correlations(obj_type, subtype, obj_id):
obj = get_object(obj_type, subtype, obj_id) obj = get_object(obj_type, subtype, obj_id)
return obj.get_correlations() return obj.get_correlations()
def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max): def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max, objs_hidden):
if len(objs) < nb_max or nb_max == -1: if len(objs) < nb_max or nb_max == 0:
if lvl == 0: if lvl == 0:
objs.add((obj_type, subtype, obj_id)) objs.add((obj_type, subtype, obj_id))
@ -346,15 +366,17 @@ def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lv
for obj2_type in correlations: for obj2_type in correlations:
for str_obj in correlations[obj2_type]: for str_obj in correlations[obj2_type]:
obj2_subtype, obj2_id = str_obj.split(':', 1) obj2_subtype, obj2_id = str_obj.split(':', 1)
_get_obj_correlations_objs(objs, obj2_type, obj2_subtype, obj2_id, filter_types, lvl, nb_max) if get_obj_global_id(obj2_type, obj2_subtype, obj2_id) in objs_hidden:
continue # filter object to hide
_get_obj_correlations_objs(objs, obj2_type, obj2_subtype, obj2_id, filter_types, lvl, nb_max, objs_hidden)
def get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=[], lvl=0, nb_max=300): def get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=[], lvl=0, nb_max=300, objs_hidden=set()):
objs = set() objs = set()
_get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max) _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max, objs_hidden)
return objs return objs
def obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=[], lvl=0, nb_max=300): def obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=[], lvl=0, nb_max=300, objs_hidden=set()):
objs = get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=filter_types, lvl=lvl, nb_max=nb_max) objs = get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=filter_types, lvl=lvl, nb_max=nb_max, objs_hidden=objs_hidden)
# print(objs) # print(objs)
for obj_tuple in objs: for obj_tuple in objs:
obj1_type, subtype1, id1 = obj_tuple obj1_type, subtype1, id1 = obj_tuple
@ -395,7 +417,7 @@ def create_correlation_graph_links(links_set):
def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True): def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True):
graph_nodes_list = [] graph_nodes_list = []
for node_id in nodes_set: for node_id in nodes_set:
obj_type, subtype, obj_id = node_id.split(':', 2) obj_type, subtype, obj_id = get_obj_type_subtype_id_from_global_id(node_id)
dict_node = {'id': node_id} dict_node = {'id': node_id}
dict_node['style'] = get_object_svg(obj_type, subtype, obj_id) dict_node['style'] = get_object_svg(obj_type, subtype, obj_id)
@ -416,10 +438,12 @@ def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True):
def get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, def get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1,
objs_hidden=set(),
flask_context=False): flask_context=False):
obj_str_id, nodes, links, meta = correlations_engine.get_correlations_graph_nodes_links(obj_type, subtype, obj_id, obj_str_id, nodes, links, meta = correlations_engine.get_correlations_graph_nodes_links(obj_type, subtype, obj_id,
filter_types=filter_types, filter_types=filter_types,
max_nodes=max_nodes, level=level, max_nodes=max_nodes, level=level,
objs_hidden=objs_hidden,
flask_context=flask_context) flask_context=flask_context)
# print(meta) # print(meta)
meta['objs'] = list(meta['objs']) meta['objs'] = list(meta['objs'])

212
bin/lib/timeline_engine.py Executable file
View file

@ -0,0 +1,212 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
from uuid import uuid4
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
config_loader = ConfigLoader()
r_meta = config_loader.get_db_conn("Kvrocks_Timeline")
config_loader = None
# CORRELATION_TYPES_BY_OBJ = {
# "chat": ["item", "username"], # item ???
# "cookie-name": ["domain"],
# "cryptocurrency": ["domain", "item"],
# "cve": ["domain", "item"],
# "decoded": ["domain", "item"],
# "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
# "etag": ["domain"],
# "favicon": ["domain", "item"],
# "hhhash": ["domain"],
# "item": ["chat", "cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"],
# "pgp": ["domain", "item"],
# "screenshot": ["domain", "item"],
# "title": ["domain", "item"],
# "username": ["chat", "domain", "item"],
# }
#
# def get_obj_correl_types(obj_type):
# return CORRELATION_TYPES_BY_OBJ.get(obj_type)
# def sanityze_obj_correl_types(obj_type, correl_types):
# obj_correl_types = get_obj_correl_types(obj_type)
# if correl_types:
# correl_types = set(correl_types).intersection(obj_correl_types)
# if not correl_types:
# correl_types = obj_correl_types
# if not correl_types:
# return []
# return correl_types
class Timeline:
def __init__(self, global_id, name):
self.id = global_id
self.name = name
def _get_block_obj_global_id(self, block):
return r_meta.hget(f'block:{self.id}:{self.name}', block)
def _set_block_obj_global_id(self, block, global_id):
return r_meta.hset(f'block:{self.id}:{self.name}', block, global_id)
def _get_block_timestamp(self, block, position):
return r_meta.zscore(f'line:{self.id}:{self.name}', f'{position}:{block}')
def _get_nearest_bloc_inf(self, timestamp):
inf = r_meta.zrevrangebyscore(f'line:{self.id}:{self.name}', float(timestamp), 0, start=0, num=1, withscores=True)
if inf:
inf, score = inf[0]
if inf.startswith('end'):
inf_key = f'start:{inf[4:]}'
inf_score = r_meta.zscore(f'line:{self.id}:{self.name}', inf_key)
if inf_score == score:
inf = inf_key
return inf
else:
return None
def _get_nearest_bloc_sup(self, timestamp):
sup = r_meta.zrangebyscore(f'line:{self.id}:{self.name}', float(timestamp), '+inf', start=0, num=1, withscores=True)
if sup:
sup, score = sup[0]
if sup.startswith('start'):
sup_key = f'end:{sup[6:]}'
sup_score = r_meta.zscore(f'line:{self.id}:{self.name}', sup_key)
if score == sup_score:
sup = sup_key
return sup
else:
return None
def get_first_obj_id(self):
first = r_meta.zrange(f'line:{self.id}:{self.name}', 0, 0)
if first: # start:block
first = first[0]
if first.startswith('start:'):
first = first[6:]
else:
first = first[4:]
return self._get_block_obj_global_id(first)
def get_last_obj_id(self):
last = r_meta.zrevrange(f'line:{self.id}:{self.name}', 0, 0)
if last: # end:block
last = last[0]
if last.startswith('end:'):
last = last[4:]
else:
last = last[6:]
return self._get_block_obj_global_id(last)
def get_objs_ids(self):
objs = set()
for block in r_meta.zrange(f'line:{self.id}:{self.name}', 0, -1):
if block:
if block.startswith('start:'):
objs.add(self._get_block_obj_global_id(block[6:]))
return objs
# def get_objs_ids(self):
# objs = {}
# last_obj_id = None
# for block, timestamp in r_meta.zrange(f'line:{self.id}:{self.name}', 0, -1, withscores=True):
# if block:
# if block.startswith('start:'):
# last_obj_id = self._get_block_obj_global_id(block[6:])
# objs[last_obj_id] = {'first_seen': timestamp}
# else:
# objs[last_obj_id]['last_seen'] = timestamp
# return objs
def _update_bloc(self, block, position, timestamp):
r_meta.zadd(f'line:{self.id}:{self.name}', {f'{position}:{block}': timestamp})
def _add_bloc(self, obj_global_id, timestamp, end=None):
if end:
timestamp_end = end
else:
timestamp_end = timestamp
new_bloc = str(uuid4())
r_meta.zadd(f'line:{self.id}:{self.name}', {f'start:{new_bloc}': timestamp, f'end:{new_bloc}': timestamp_end})
self._set_block_obj_global_id(new_bloc, obj_global_id)
return new_bloc
def add_timestamp(self, timestamp, obj_global_id):
inf = self._get_nearest_bloc_inf(timestamp)
sup = self._get_nearest_bloc_sup(timestamp)
if not inf and not sup:
# create new bloc
new_bloc = self._add_bloc(obj_global_id, timestamp)
return new_bloc
# timestamp < first_seen
elif not inf:
sup_pos, sup_id = sup.split(':')
sup_obj = self._get_block_obj_global_id(sup_id)
if sup_obj == obj_global_id:
self._update_bloc(sup_id, 'start', timestamp)
# create new bloc
else:
new_bloc = self._add_bloc(obj_global_id, timestamp)
return new_bloc
# timestamp > first_seen
elif not sup:
inf_pos, inf_id = inf.split(':')
inf_obj = self._get_block_obj_global_id(inf_id)
if inf_obj == obj_global_id:
self._update_bloc(inf_id, 'end', timestamp)
# create new bloc
else:
new_bloc = self._add_bloc(obj_global_id, timestamp)
return new_bloc
else:
inf_pos, inf_id = inf.split(':')
sup_pos, sup_id = sup.split(':')
inf_obj = self._get_block_obj_global_id(inf_id)
if inf_id == sup_id:
# reduce bloc + create two new bloc
if obj_global_id != inf_obj:
# get end timestamp
sup_timestamp = self._get_block_timestamp(sup_id, 'end')
# reduce original bloc
self._update_bloc(inf_id, 'end', timestamp - 1)
# Insert new bloc
new_bloc = self._add_bloc(obj_global_id, timestamp)
# Recreate end of the first bloc by a new bloc
self._add_bloc(inf_obj, timestamp + 1, end=sup_timestamp)
return new_bloc
# timestamp in existing bloc
else:
return inf_id
# different blocs: expend sup/inf bloc or create a new bloc if
elif inf_pos == 'end' and sup_pos == 'start':
# Extend inf bloc
if obj_global_id == inf_obj:
self._update_bloc(inf_id, 'end', timestamp)
return inf_id
sup_obj = self._get_block_obj_global_id(sup_id)
# Extend sup bloc
if obj_global_id == sup_obj:
self._update_bloc(sup_id, 'start', timestamp)
return sup_id
# create new bloc
new_bloc = self._add_bloc(obj_global_id, timestamp)
return new_bloc
# inf_pos == 'start' and sup_pos == 'end'
# else raise error ???

View file

@ -130,7 +130,7 @@ class Cryptocurrencies(AbstractModule, ABC):
if crypto.is_valid_address(): if crypto.is_valid_address():
# print(address) # print(address)
is_valid_address = True is_valid_address = True
crypto.add(date, item_id) crypto.add(date, item)
# Check private key # Check private key
if is_valid_address: if is_valid_address:

View file

@ -42,7 +42,8 @@ class Onion(AbstractModule):
self.faup = crawlers.get_faup() self.faup = crawlers.get_faup()
# activate_crawler = p.config.get("Crawler", "activate_crawler") # activate_crawler = p.config.get("Crawler", "activate_crawler")
self.har = config_loader.get_config_boolean('Crawler', 'default_har')
self.screenshot = config_loader.get_config_boolean('Crawler', 'default_screenshot')
self.onion_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" self.onion_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
# self.i2p_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" # self.i2p_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
@ -90,8 +91,9 @@ class Onion(AbstractModule):
if onion_urls: if onion_urls:
if crawlers.is_crawler_activated(): if crawlers.is_crawler_activated():
for domain in domains: # TODO LOAD DEFAULT SCREENSHOT + HAR for domain in domains:
task_uuid = crawlers.create_task(domain, parent=item.get_id(), priority=0) task_uuid = crawlers.create_task(domain, parent=item.get_id(), priority=0,
har=self.har, screenshot=self.screenshot)
if task_uuid: if task_uuid:
print(f'{domain} added to crawler queue: {task_uuid}') print(f'{domain} added to crawler queue: {task_uuid}')
else: else:

View file

@ -210,18 +210,18 @@ class PgpDump(AbstractModule):
date = item.get_date() date = item.get_date()
for key in self.keys: for key in self.keys:
pgp = Pgps.Pgp(key, 'key') pgp = Pgps.Pgp(key, 'key')
pgp.add(date, self.item_id) pgp.add(date, item)
print(f' key: {key}') print(f' key: {key}')
for name in self.names: for name in self.names:
pgp = Pgps.Pgp(name, 'name') pgp = Pgps.Pgp(name, 'name')
pgp.add(date, self.item_id) pgp.add(date, item)
print(f' name: {name}') print(f' name: {name}')
self.tracker_term.compute(name, obj_type='pgp', subtype='name') self.tracker_term.compute(name, obj_type='pgp', subtype='name')
self.tracker_regex.compute(name, obj_type='pgp', subtype='name') self.tracker_regex.compute(name, obj_type='pgp', subtype='name')
self.tracker_yara.compute(name, obj_type='pgp', subtype='name') self.tracker_yara.compute(name, obj_type='pgp', subtype='name')
for mail in self.mails: for mail in self.mails:
pgp = Pgps.Pgp(mail, 'mail') pgp = Pgps.Pgp(mail, 'mail')
pgp.add(date, self.item_id) pgp.add(date, item)
print(f' mail: {mail}') print(f' mail: {mail}')
self.tracker_term.compute(mail, obj_type='pgp', subtype='mail') self.tracker_term.compute(mail, obj_type='pgp', subtype='mail')
self.tracker_regex.compute(mail, obj_type='pgp', subtype='mail') self.tracker_regex.compute(mail, obj_type='pgp', subtype='mail')

View file

@ -58,7 +58,7 @@ class Telegram(AbstractModule):
user_id = dict_url.get('username') user_id = dict_url.get('username')
if user_id: if user_id:
username = Username(user_id, 'telegram') username = Username(user_id, 'telegram')
username.add(item_date, item.id) username.add(item_date, item)
print(f'username: {user_id}') print(f'username: {user_id}')
invite_hash = dict_url.get('invite_hash') invite_hash = dict_url.get('invite_hash')
if invite_hash: if invite_hash:
@ -73,7 +73,7 @@ class Telegram(AbstractModule):
user_id = dict_url.get('username') user_id = dict_url.get('username')
if user_id: if user_id:
username = Username(user_id, 'telegram') username = Username(user_id, 'telegram')
username.add(item_date, item.id) username.add(item_date, item)
print(f'username: {user_id}') print(f'username: {user_id}')
invite_hash = dict_url.get('invite_hash') invite_hash = dict_url.get('invite_hash')
if invite_hash: if invite_hash:

View file

@ -10,6 +10,8 @@ Update AIL in the background
""" """
import os import os
import logging
import logging.config
import sys import sys
import subprocess import subprocess
@ -17,37 +19,55 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from lib import ail_logger
from lib import ail_updates from lib import ail_updates
def launch_background_upgrade(version, l_script_name): logging.config.dictConfig(ail_logger.get_config(name='updates'))
if ail_updates.is_version_in_background_update(version): def launch_background_upgrade(version):
ail_updates.start_background_update(version) logger = logging.getLogger()
logger.warning(f'launching background update {version}')
update = ail_updates.AILBackgroundUpdate(version)
nb_done = update.get_nb_scripts_done()
update.start()
scripts = update.get_scripts()
scripts = scripts[nb_done:]
for script in scripts:
print('launching background script update', script)
# launch script
update.start_script(script)
script_path = update.get_script_path()
if script_path:
try:
process = subprocess.run(['python', script_path])
if process.returncode != 0:
stderr = process.stderr
if stderr:
error = stderr.decode()
logger.error(error)
update.set_error(error)
else:
update.set_error('Error Updater Script')
logger.error('Error Updater Script')
sys.exit(0)
except Exception as e:
update.set_error(str(e))
logger.error(str(e))
sys.exit(0)
for script_name in l_script_name: if not update.get_error():
ail_updates.set_current_background_update_script(script_name) update.end_script()
update_file = ail_updates.get_current_background_update_script_path(version, script_name) else:
logger.warning('Updater exited on error')
sys.exit(0)
# # TODO: Get error output update.end()
process = subprocess.run(['python', update_file]) logger.warning(f'ending background update {version}')
update_progress = ail_updates.get_current_background_update_progress()
if update_progress == 100:
ail_updates.end_background_update_script()
# # TODO: Create Custom error
# 'Please relaunch the bin/update-background.py script'
# # TODO: Create Class background update
ail_updates.end_background_update(version)
if __name__ == "__main__": if __name__ == "__main__":
if ail_updates.is_update_background_running():
if not ail_updates.exits_background_update_to_launch(): v = ail_updates.get_update_background_version()
ail_updates.clear_background_update() launch_background_upgrade(v)
else: else:
launch_background_upgrade('v1.5', ['Update-ARDB_Onions.py', 'Update-ARDB_Metadata.py', 'Update-ARDB_Tags.py', for ver in ail_updates.get_update_background_to_launch():
'Update-ARDB_Tags_background.py', 'Update-ARDB_Onions_screenshots.py']) launch_background_upgrade(ver)
launch_background_upgrade('v2.6', ['Update_screenshots.py'])
launch_background_upgrade('v2.7', ['Update_domain_tags.py'])
launch_background_upgrade('v3.4', ['Update_domain.py'])
launch_background_upgrade('v3.7', ['Update_trackers.py'])

View file

@ -663,6 +663,7 @@ namespace.crawl ail_crawlers
namespace.db ail_datas namespace.db ail_datas
namespace.dup ail_dups namespace.dup ail_dups
namespace.obj ail_objs namespace.obj ail_objs
namespace.tl ail_tls
namespace.stat ail_stats namespace.stat ail_stats
namespace.tag ail_tags namespace.tag ail_tags
namespace.track ail_trackers namespace.track ail_trackers

View file

@ -45,6 +45,10 @@ sender = sender@example.com
sender_host = smtp.example.com sender_host = smtp.example.com
sender_port = 1337 sender_port = 1337
sender_pw = None sender_pw = None
# Only needed for SMTP over SSL if the mail server don't support TLS (used by default). use this option to validate the server certificate.
cert_required = False
# Only needed for SMTP over SSL if you want to validate your self signed certificate for SSL
ca_file =
# Only needed when the credentials for email server needs a username instead of an email address # Only needed when the credentials for email server needs a username instead of an email address
#sender_user = sender #sender_user = sender
sender_user = sender_user =
@ -191,6 +195,11 @@ host = localhost
port = 6383 port = 6383
password = ail_objs password = ail_objs
[Kvrocks_Timeline]
host = localhost
port = 6383
password = ail_tls
[Kvrocks_Stats] [Kvrocks_Stats]
host = localhost host = localhost
port = 6383 port = 6383

View file

@ -89,12 +89,12 @@ Available Importers:
5. Launch ail-framework, pystemon and PystemonImporter.py (all within the virtual environment): 5. Launch ail-framework, pystemon and PystemonImporter.py (all within the virtual environment):
- Option 1 (recommended): - Option 1 (recommended):
``` ```
./ail-framework/bin/LAUNCH.py -l #starts ail-framework ./ail-framework/bin/LAUNCH.sh -l #starts ail-framework
./ail-framework/bin/LAUNCH.py -f #starts pystemon and the PystemonImporter.py ./ail-framework/bin/LAUNCH.sh -f #starts pystemon and the PystemonImporter.py
``` ```
- Option 2 (may require two terminal windows): - Option 2 (may require two terminal windows):
``` ```
./ail-framework/bin/LAUNCH.py -l #starts ail-framework ./ail-framework/bin/LAUNCH.sh -l #starts ail-framework
./pystemon/pystemon.py ./pystemon/pystemon.py
./ail-framework/bin/importer/PystemonImporter.py ./ail-framework/bin/importer/PystemonImporter.py
``` ```

120
tools/crawler_add_task.py Executable file
View file

@ -0,0 +1,120 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Send an URL to the crawler - Create a crawler task
================
Import URL to be crawled by AIL and then analysed
"""
import argparse
import os
from pyail import PyAIL
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
def check_frequency(value):
value = int(value)
if value <= 0:
raise argparse.ArgumentTypeError(f'Error: Invalid frequency {value}')
if __name__ == "__main__":
# TODO add c argument for config file
parser = argparse.ArgumentParser(description='Send an URL to the crawler - Create a crawler task')
parser.add_argument('-u', '--url', type=str, help='URL to crawl', required=True)
parser.add_argument('-k', '--key', type=str, help='AIL API Key', required=True)
parser.add_argument('-a', '--ail', type=str, help='AIL URL')
parser.add_argument('-d', '--depth', type=int, default=1, help='Depth limit') # TODO improve me
parser.add_argument('--cookiejar', type=str, help='Cookiejar uuid')
parser.add_argument('-p', '--proxy', type=str, help='Proxy address to use, "web" and "tor" can be used as shortcut (web is used by default if the domain isn\'t an onion)')
group = parser.add_mutually_exclusive_group()
group.add_argument('--har', dest='har', action='store_true', help='Save HAR')
group.add_argument('--no-har', dest='har', action='store_false', help='Don\'t save HAR')
parser.set_defaults(har=None)
group = parser.add_mutually_exclusive_group()
group.add_argument('--screenshot', dest='screenshot', action='store_true', help='Save screenshot')
group.add_argument('--no-screenshot', dest='screenshot', action='store_false', help='Don\'t save screenshot')
parser.set_defaults(screenshot=None)
group = parser.add_argument_group('Frequency, create a regular crawler/scheduler. one shot if not specified')
group.add_argument('-f', '--frequency', type=str, choices=['monthly', 'weekly', 'daily', 'hourly'],
help='monthly, weekly, daily or hourly frequency or specify a custom one with the others arguments')
group.add_argument('--minutes', type=int, help='frequency in minutes')
group.add_argument('--hours', type=int, help='frequency in hours')
group.add_argument('--days', type=int, help='frequency in days')
group.add_argument('--weeks', type=int, help='frequency in weeks')
group.add_argument('--months', type=int, help='frequency in months')
args = parser.parse_args()
if not args.url and not args.key:
parser.print_help()
sys.exit(0)
# Load crawler default config
config_loader = ConfigLoader()
har = args.har
if har is None:
har = config_loader.get_config_boolean('Crawler', 'default_har')
screenshot = args.screenshot
if screenshot is None:
screenshot = config_loader.get_config_boolean('Crawler', 'default_screenshot')
if args.depth:
depth = args.depth
if depth < 0:
raise argparse.ArgumentTypeError(f'Error: Invalid depth {depth}')
else:
depth = 1
# frequency
frequency = {}
if args.frequency:
if args.frequency in ['monthly', 'weekly', 'daily', 'hourly']:
frequency = args.frequency
else:
raise argparse.ArgumentTypeError('Invalid frequency')
elif args.minutes or args.hours or args.days or args.weeks or args.months:
if args.minutes:
check_frequency(args.minutes)
frequency['minutes'] = args.minutes
if args.hours:
check_frequency(args.hours)
frequency['hours'] = args.hours
if args.days:
check_frequency(args.days)
frequency['days'] = args.days
if args.weeks:
check_frequency(args.weeks)
frequency['weeks'] = args.weeks
if args.months:
check_frequency(args.months)
frequency['months'] = args.months
if not frequency:
frequency = None
proxy = args.proxy
if args.cookiejar:
cookiejar = args.cookiejar
else:
cookiejar = None
ail = args.ail
if not ail:
ail = 'https://localhost:7000/'
client = PyAIL(ail, args.key, ssl=False)
r = client.crawl_url(args.url, har=har, screenshot=screenshot, depth_limit=depth, frequency=frequency,
cookiejar=cookiejar, proxy=proxy)
print(r)

View file

@ -1,18 +0,0 @@
#!/bin/bash
echo "Killing all screens ..."
bash -c "bash ../../bin/LAUNCH.sh -k"
echo ""
echo "Updating ARDB ..."
pushd ../../
rm -r ardb
pushd ardb/
git clone https://github.com/yinqiwen/ardb.git
git checkout 0.10 || exit 1
make || exit 1
popd
popd
echo "ARDB Updated"
echo ""
exit 0

View file

@ -2,13 +2,11 @@
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; [ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; [ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; [ -z "$AIL_BIN" ] && echo "Needs the env var AIL_BIN. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; [ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH export PATH=$AIL_FLASK:$PATH

View file

@ -20,7 +20,7 @@ class AIL_Updater(object):
self.start_time = time.time() self.start_time = time.time()
self.config = ConfigLoader() self.config = ConfigLoader()
self.r_serv = self.config.get_redis_conn("Kvrocks_DB") self.r_serv = self.config.get_db_conn("Kvrocks_DB")
self.f_version = float(self.version[1:]) self.f_version = float(self.version[1:])
self.current_f_version = ail_updates.get_ail_float_version() self.current_f_version = ail_updates.get_ail_float_version()
@ -35,7 +35,7 @@ class AIL_Updater(object):
""" """
Update DB version Update DB version
""" """
ail_updates.add_ail_update(version) ail_updates.add_ail_update(self.version)
def run_update(self): def run_update(self):
self.update() self.update()

View file

@ -1,50 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ConfigLoader
class AIL_Updater(object):
"""docstring for AIL_Updater."""
def __init__(self, new_version):
self.version = new_version
self.start_time = time.time()
self.config = ConfigLoader.ConfigLoader()
self.r_serv = self.config.get_redis_conn("ARDB_DB")
self.f_version = float(self.version[1:])
self.current_f_version = self.r_serv.get('ail:version')
if self.current_f_version:
self.current_f_version = float(self.current_f_version[1:])
else:
self.current_f_version = 0
def update(self):
"""
AIL DB update
"""
pass
def end_update(self):
"""
Update DB version
"""
# Set current ail version
self.r_serv.hset('ail:update_date', self.version, datetime.datetime.now().strftime("%Y%m%d"))
# Set current ail version
if self.f_version > self.current_f_version:
self.r_serv.set('ail:version', self.version)
def run_update(self):
self.update()
self.end_update()

View file

@ -7,13 +7,13 @@ fi
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; [ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; [ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; [ -z "$AIL_KVROCKS" ] && echo "Needs the env var AIL_KVROCKS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; [ -z "$AIL_BIN" ] && echo "Needs the env var AIL_BIN. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; [ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH export PATH=AIL_KVROCKS:$PATH
export PATH=$AIL_BIN:$PATH export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH export PATH=$AIL_FLASK:$PATH
@ -25,7 +25,7 @@ bash ${AIL_BIN}/LAUNCH.sh -ks
wait wait
echo "" echo ""
bash ${AIL_BIN}/LAUNCH.sh -lav bash ${AIL_BIN}/LAUNCH.sh -lkv
wait wait
echo "" echo ""

View file

@ -1,15 +0,0 @@
#!/bin/bash
YELLOW="\\033[1;33m"
DEFAULT="\\033[0;39m"
echo -e $YELLOW"\t"
echo -e "* ------------------------------------------------------------------"
echo -e "\t"
echo -e " - - - - - - - - PLEASE RELAUNCH AIL - - - - - - - - "
echo -e "\t"
echo -e "* ------------------------------------------------------------------"
echo -e "\t"
echo -e "\t"$DEFAULT
# fix invalid Updater version (kill parent):
kill -SIGUSR1 `ps --pid $$ -oppid=`; exit

View file

@ -1,165 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
from lib import ConfigLoader
def update_tracked_terms(main_key, tracked_container_key):
for tracked_item in r_serv_term.smembers(main_key):
all_items = r_serv_term.smembers(tracked_container_key.format(tracked_item))
for item_path in all_items:
if PASTES_FOLDER in item_path:
new_item_path = item_path.replace(PASTES_FOLDER, '', 1)
r_serv_term.sadd(tracked_container_key.format(tracked_item), new_item_path)
r_serv_term.srem(tracked_container_key.format(tracked_item), item_path)
def update_hash_item(has_type):
#get all hash items:
all_hash_items = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type))
for item_path in all_hash_items:
if PASTES_FOLDER in item_path:
base64_key = '{}_paste:{}'.format(has_type, item_path)
hash_key = 'hash_paste:{}'.format(item_path)
if r_serv_metadata.exists(base64_key):
new_base64_key = base64_key.replace(PASTES_FOLDER, '', 1)
res = r_serv_metadata.renamenx(base64_key, new_base64_key)
if res == 0:
print('same key, double name: {}'.format(item_path))
# fusion
all_key = r_serv_metadata.smembers(base64_key)
for elem in all_key:
r_serv_metadata.sadd(new_base64_key, elem)
r_serv_metadata.srem(base64_key, elem)
if r_serv_metadata.exists(hash_key):
new_hash_key = hash_key.replace(PASTES_FOLDER, '', 1)
res = r_serv_metadata.renamenx(hash_key, new_hash_key)
if res == 0:
print('same key, double name: {}'.format(item_path))
# fusion
all_key = r_serv_metadata.smembers(hash_key)
for elem in all_key:
r_serv_metadata.sadd(new_hash_key, elem)
r_serv_metadata.srem(hash_key, elem)
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
r_serv_term = config_loader.get_redis_conn("ARDB_TermFreq")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
r_serv.set('ail:current_background_script', 'metadata')
## Update metadata ##
print('Updating ARDB_Metadata ...')
index = 0
start = time.time()
#update stats
r_serv.set('ail:current_background_script_stat', 0)
# Update base64
update_hash_item('base64')
#update stats
r_serv.set('ail:current_background_script_stat', 20)
# Update binary
update_hash_item('binary')
#update stats
r_serv.set('ail:current_background_script_stat', 40)
# Update binary
update_hash_item('hexadecimal')
#update stats
r_serv.set('ail:current_background_script_stat', 60)
total_onion = r_serv_tag.scard('infoleak:submission=\"crawler\"')
nb_updated = 0
last_progress = 0
# Update onion metadata
all_crawled_items = r_serv_tag.smembers('infoleak:submission=\"crawler\"')
for item_path in all_crawled_items:
domain = None
if PASTES_FOLDER in item_path:
old_item_metadata = 'paste_metadata:{}'.format(item_path)
item_path = item_path.replace(PASTES_FOLDER, '', 1)
new_item_metadata = 'paste_metadata:{}'.format(item_path)
res = r_serv_metadata.renamenx(old_item_metadata, new_item_metadata)
#key already exist
if res == 0:
r_serv_metadata.delete(old_item_metadata)
# update domain port
domain = r_serv_metadata.hget(new_item_metadata, 'domain')
if domain:
if domain[-3:] != ':80':
r_serv_metadata.hset(new_item_metadata, 'domain', '{}:80'.format(domain))
super_father = r_serv_metadata.hget(new_item_metadata, 'super_father')
if super_father:
if PASTES_FOLDER in super_father:
r_serv_metadata.hset(new_item_metadata, 'super_father', super_father.replace(PASTES_FOLDER, '', 1))
father = r_serv_metadata.hget(new_item_metadata, 'father')
if father:
if PASTES_FOLDER in father:
r_serv_metadata.hset(new_item_metadata, 'father', father.replace(PASTES_FOLDER, '', 1))
nb_updated += 1
progress = int((nb_updated * 30) /total_onion)
print('{}/{} updated {}%'.format(nb_updated, total_onion, progress + 60))
# update progress stats
if progress != last_progress:
r_serv.set('ail:current_background_script_stat', progress + 60)
last_progress = progress
#update stats
r_serv.set('ail:current_background_script_stat', 90)
## update tracked term/set/regex
# update tracked term
update_tracked_terms('TrackedSetTermSet', 'tracked_{}')
#update stats
r_serv.set('ail:current_background_script_stat', 93)
# update tracked set
update_tracked_terms('TrackedSetSet', 'set_{}')
#update stats
r_serv.set('ail:current_background_script_stat', 96)
# update tracked regex
update_tracked_terms('TrackedRegexSet', 'regex_{}')
#update stats
r_serv.set('ail:current_background_script_stat', 100)
##
end = time.time()
print('Updating ARDB_Metadata Done => {} paths: {} s'.format(index, end - start))
print()
r_serv.sadd('ail:update_v1.5', 'metadata')
##
#Key, Dynamic Update
##
#paste_children
#nb_seen_hash, base64_hash, binary_hash
#paste_onion_external_links
#misp_events, hive_cases
##

View file

@ -1,129 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
def substract_date(date_from, date_to):
date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
delta = date_to - date_from # timedelta
l_date = []
for i in range(delta.days + 1):
date = date_from + datetime.timedelta(i)
l_date.append( date.strftime('%Y%m%d') )
return l_date
def get_date_epoch(date):
return int(datetime.datetime(int(date[0:4]), int(date[4:6]), int(date[6:8])).timestamp())
def get_domain_root_from_paste_childrens(item_father, domain):
item_children = r_serv_metadata.smembers('paste_children:{}'.format(item_father))
domain_root = ''
for item_path in item_children:
# remove absolute_path
if PASTES_FOLDER in item_path:
r_serv_metadata.srem('paste_children:{}'.format(item_father), item_path)
item_path = item_path.replace(PASTES_FOLDER, '', 1)
r_serv_metadata.sadd('paste_children:{}'.format(item_father), item_path)
if domain in item_path:
domain_root = item_path
return domain_root
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
r_serv.set('ail:current_background_script', 'onions')
r_serv.set('ail:current_background_script_stat', 0)
## Update Onion ##
print('Updating ARDB_Onion ...')
index = 0
start = time.time()
# clean down domain from db
date_from = '20180929'
date_today = datetime.date.today().strftime("%Y%m%d")
for date in substract_date(date_from, date_today):
onion_down = r_serv_onion.smembers('onion_down:{}'.format(date))
#print(onion_down)
for onion_domain in onion_down:
if not r_serv_onion.sismember('full_onion_up', onion_domain):
# delete history
all_onion_history = r_serv_onion.lrange('onion_history:{}'.format(onion_domain), 0 ,-1)
if all_onion_history:
for date_history in all_onion_history:
#print('onion_history:{}:{}'.format(onion_domain, date_history))
r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
r_serv_onion.delete('onion_history:{}'.format(onion_domain))
#stats
total_domain = r_serv_onion.scard('full_onion_up')
nb_updated = 0
last_progress = 0
# clean up domain
all_domain_up = r_serv_onion.smembers('full_onion_up')
for onion_domain in all_domain_up:
# delete history
all_onion_history = r_serv_onion.lrange('onion_history:{}'.format(onion_domain), 0 ,-1)
if all_onion_history:
for date_history in all_onion_history:
print('--------')
print('onion_history:{}:{}'.format(onion_domain, date_history))
item_father = r_serv_onion.lrange('onion_history:{}:{}'.format(onion_domain, date_history), 0, 0)
print('item_father: {}'.format(item_father))
try:
item_father = item_father[0]
except IndexError:
r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
continue
#print(item_father)
# delete old history
r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
# create new history
root_key = get_domain_root_from_paste_childrens(item_father, onion_domain)
if root_key:
r_serv_onion.zadd(f'crawler_history_onion:{onion_domain}:80', {root_key: get_date_epoch(date_history)})
print('crawler_history_onion:{}:80 {} {}'.format(onion_domain, get_date_epoch(date_history), root_key))
#update service metadata: paste_parent
r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key)
r_serv_onion.delete('onion_history:{}'.format(onion_domain))
r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'ports', '80')
r_serv_onion.hdel('onion_metadata:{}'.format(onion_domain), 'last_seen')
nb_updated += 1
progress = int((nb_updated * 100) /total_domain)
print('{}/{} updated {}%'.format(nb_updated, total_domain, progress))
# update progress stats
if progress != last_progress:
r_serv.set('ail:current_background_script_stat', progress)
last_progress = progress
end = time.time()
print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
print()
print('Done in {} s'.format(end - start_deb))
r_serv.sadd('ail:update_v1.5', 'onions')

View file

@ -1,117 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
from hashlib import sha256
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
def rreplace(s, old, new, occurrence):
li = s.rsplit(old, occurrence)
return new.join(li)
def substract_date(date_from, date_to):
date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
delta = date_to - date_from # timedelta
l_date = []
for i in range(delta.days + 1):
date = date_from + datetime.timedelta(i)
l_date.append( date.strftime('%Y%m%d') )
return l_date
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"))
NEW_SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot')
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
r_serv.set('ail:current_background_script', 'crawled_screenshot')
r_serv.set('ail:current_background_script_stat', 0)
## Update Onion ##
print('Updating ARDB_Onion ...')
index = 0
start = time.time()
# clean down domain from db
date_from = '20180801'
date_today = datetime.date.today().strftime("%Y%m%d")
list_date = substract_date(date_from, date_today)
nb_done = 0
last_progress = 0
total_to_update = len(list_date)
for date in list_date:
screenshot_dir = os.path.join(SCREENSHOT_FOLDER, date[0:4], date[4:6], date[6:8])
if os.path.isdir(screenshot_dir):
print(screenshot_dir)
for file in os.listdir(screenshot_dir):
if file.endswith(".png"):
index += 1
#print(file)
img_path = os.path.join(screenshot_dir, file)
with open(img_path, 'br') as f:
image_content = f.read()
hash = sha256(image_content).hexdigest()
img_dir_path = os.path.join(hash[0:2], hash[2:4], hash[4:6], hash[6:8], hash[8:10], hash[10:12])
filename_img = os.path.join(NEW_SCREENSHOT_FOLDER, img_dir_path, hash[12:] +'.png')
dirname = os.path.dirname(filename_img)
if not os.path.exists(dirname):
os.makedirs(dirname)
if not os.path.exists(filename_img):
os.rename(img_path, filename_img)
else:
os.remove(img_path)
item = os.path.join('crawled', date[0:4], date[4:6], date[6:8], file[:-4])
# add item metadata
r_serv_metadata.hset('paste_metadata:{}'.format(item), 'screenshot', hash)
# add sha256 metadata
r_serv_onion.sadd('screenshot:{}'.format(hash), item)
if file.endswith('.pnghar.txt'):
har_path = os.path.join(screenshot_dir, file)
new_file = rreplace(file, '.pnghar.txt', '.json', 1)
new_har_path = os.path.join(screenshot_dir, new_file)
os.rename(har_path, new_har_path)
progress = int((nb_done * 100) /total_to_update)
# update progress stats
if progress != last_progress:
r_serv.set('ail:current_background_script_stat', progress)
print('{}/{} screenshot updated {}%'.format(nb_done, total_to_update, progress))
last_progress = progress
nb_done += 1
r_serv.set('ail:current_background_script_stat', 100)
end = time.time()
print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
print()
print('Done in {} s'.format(end - start_deb))
r_serv.set('ail:current_background_script_stat', 100)
r_serv.sadd('ail:update_v1.5', 'crawled_screenshot')
if r_serv.scard('ail:update_v1.5') != 5:
r_serv.set('ail:update_error', 'Update v1.5 Failed, please relaunch the bin/update-background.py script')

View file

@ -1,135 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import redis
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
from lib import ConfigLoader
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
r_important_paste_2018 = redis.StrictRedis(
host=config_loader.get_config_str("ARDB_Metadata", "host"),
port=config_loader.get_config_int("ARDB_Metadata", "port"),
db=2018,
decode_responses=True)
r_important_paste_2019 = redis.StrictRedis(
host=config_loader.get_config_str("ARDB_Metadata", "host"),
port=config_loader.get_config_int("ARDB_Metadata", "port"),
db=2019,
decode_responses=True)
config_loader = None
r_serv.set('ail:current_background_script', 'tags')
r_serv.set('ail:current_background_script_stat', 0)
if r_serv.sismember('ail:update_v1.5', 'onions') and r_serv.sismember('ail:update_v1.5', 'metadata'):
print('Updating ARDB_Tags ...')
index = 0
nb_tags_to_update = 0
nb_updated = 0
last_progress = 0
start = time.time()
tags_list = r_serv_tag.smembers('list_tags')
# create temp tags metadata
tag_metadata = {}
for tag in tags_list:
tag_metadata[tag] = {}
tag_metadata[tag]['first_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'first_seen')
if tag_metadata[tag]['first_seen'] is None:
tag_metadata[tag]['first_seen'] = 99999999
else:
tag_metadata[tag]['first_seen'] = int(tag_metadata[tag]['first_seen'])
tag_metadata[tag]['last_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
if tag_metadata[tag]['last_seen'] is None:
tag_metadata[tag]['last_seen'] = 0
else:
tag_metadata[tag]['last_seen'] = int(tag_metadata[tag]['last_seen'])
nb_tags_to_update += r_serv_tag.scard(tag)
if nb_tags_to_update == 0:
nb_tags_to_update = 1
for tag in tags_list:
all_item = r_serv_tag.smembers(tag)
for item_path in all_item:
splitted_item_path = item_path.split('/')
#print(tag)
#print(item_path)
try:
item_date = int( ''.join([splitted_item_path[-4], splitted_item_path[-3], splitted_item_path[-2]]) )
except IndexError:
r_serv_tag.srem(tag, item_path)
continue
# remove absolute path
new_path = item_path.replace(PASTES_FOLDER, '', 1)
if new_path != item_path:
# save in queue absolute path to remove
r_serv_tag.sadd('maj:v1.5:absolute_path_to_rename', item_path)
# update metadata first_seen
if item_date < tag_metadata[tag]['first_seen']:
tag_metadata[tag]['first_seen'] = item_date
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date)
# update metadata last_seen
if item_date > tag_metadata[tag]['last_seen']:
tag_metadata[tag]['last_seen'] = item_date
last_seen_db = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
if last_seen_db:
if item_date > int(last_seen_db):
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date)
else:
tag_metadata[tag]['last_seen'] = last_seen_db
r_serv_tag.sadd('{}:{}'.format(tag, item_date), new_path)
r_serv_tag.hincrby('daily_tags:{}'.format(item_date), tag, 1)
# clean db
r_serv_tag.srem(tag, item_path)
index = index + 1
nb_updated += 1
progress = int((nb_updated * 100) /nb_tags_to_update)
print('{}/{} updated {}%'.format(nb_updated, nb_tags_to_update, progress))
# update progress stats
if progress != last_progress:
r_serv.set('ail:current_background_script_stat', progress)
last_progress = progress
#flush browse importante pastes db
try:
r_important_paste_2018.flushdb()
except Exception:
pass
try:
r_important_paste_2019.flushdb()
except Exception:
pass
end = time.time()
print('Updating ARDB_Tags Done => {} paths: {} s'.format(index, end - start))
r_serv.sadd('ail:update_v1.5', 'tags')

View file

@ -1,70 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
def tags_key_fusion(old_item_path_key, new_item_path_key):
print('fusion:')
print(old_item_path_key)
print(new_item_path_key)
for tag in r_serv_metadata.smembers(old_item_path_key):
r_serv_metadata.sadd(new_item_path_key, tag)
r_serv_metadata.srem(old_item_path_key, tag)
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
config_loader = None
if r_serv.sismember('ail:update_v1.5', 'tags'):
r_serv.set('ail:current_background_script', 'tags_background')
r_serv.set('ail:current_background_script_stat', 0)
print('Updating ARDB_Tags ...')
start = time.time()
# update item metadata tags
tag_not_updated = True
total_to_update = r_serv_tag.scard('maj:v1.5:absolute_path_to_rename')
nb_updated = 0
last_progress = 0
if total_to_update > 0:
while tag_not_updated:
item_path = r_serv_tag.srandmember('maj:v1.5:absolute_path_to_rename')
old_tag_item_key = 'tag:{}'.format(item_path)
new_item_path = item_path.replace(PASTES_FOLDER, '', 1)
new_tag_item_key = 'tag:{}'.format(new_item_path)
res = r_serv_metadata.renamenx(old_tag_item_key, new_tag_item_key)
if res == 0:
tags_key_fusion(old_tag_item_key, new_tag_item_key)
nb_updated += 1
r_serv_tag.srem('maj:v1.5:absolute_path_to_rename', item_path)
if r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') == 0:
tag_not_updated = False
else:
progress = int((nb_updated * 100) / total_to_update)
print('{}/{} Tags updated {}%'.format(nb_updated, total_to_update, progress))
# update progress stats
if progress != last_progress:
r_serv.set('ail:current_background_script_stat', progress)
last_progress = progress
end = time.time()
print('Updating ARDB_Tags Done: {} s'.format(end - start))
r_serv.sadd('ail:update_v1.5', 'tags_background')

View file

@ -1,54 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
print()
print('Updating ARDB_Onion ...')
index = 0
start = time.time()
# update crawler queue
for elem in r_serv_onion.smembers('onion_crawler_queue'):
if PASTES_FOLDER in elem:
r_serv_onion.srem('onion_crawler_queue', elem)
r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1))
index = index +1
for elem in r_serv_onion.smembers('onion_crawler_priority_queue'):
if PASTES_FOLDER in elem:
r_serv_onion.srem('onion_crawler_queue', elem)
r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1))
index = index +1
end = time.time()
print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
print()
# Add background update
r_serv.sadd('ail:to_update', 'v1.5')
#Set current ail version
r_serv.set('ail:version', 'v1.5')
#Set current ail version
r_serv.set('ail:update_date_v1.5', datetime.datetime.now().strftime("%Y%m%d"))
print('Done in {} s'.format(end - start_deb))

View file

@ -1,60 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -k &
wait
echo ""
bash -c "bash ${AIL_HOME}/update/bin/Update_Redis.sh"
#bash -c "bash ${AIL_HOME}/update/bin/Update_ARDB.sh"
echo ""
echo -e $GREEN"Update DomainClassifier"$DEFAULT
echo ""
pip3 install --upgrade --force-reinstall git+https://github.com/D4-project/BGP-Ranking.git/@28013297efb039d2ebbce96ee2d89493f6ae56b0#subdirectory=client&egg=pybgpranking
pip3 install --upgrade --force-reinstall git+https://github.com/adulau/DomainClassifier.git
wait
echo ""
echo ""
echo -e $GREEN"Update Web thirdparty"$DEFAULT
echo ""
bash -c "(cd ${AIL_FLASK}; ./update_thirdparty.sh &)"
wait
echo ""
bash ${AIL_BIN}LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Fixing ARDB ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v1.5/Update.py &
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks &
wait
echo ""
exit 0

View file

@ -1,25 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
r_serv = config_loader.get_redis_conn("ARDB_DB")
config_loader = None
# Set current ail version
r_serv.set('ail:version', 'v1.7')
# Set current ail version
r_serv.set('ail:update_date_v1.7', datetime.datetime.now().strftime("%Y%m%d"))

View file

@ -1,65 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks &
wait
echo ""
echo -e $GREEN"Update DomainClassifier"$DEFAULT
echo ""
cd $AIL_HOME
git clone https://github.com/kazu-yamamoto/pgpdump.git
cd pgpdump
./configure
make
sudo make install
wait
echo ""
echo ""
echo -e $GREEN"Update requirement"$DEFAULT
echo ""
pip3 install beautifulsoup4
bash ${AIL_BIN}LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v1.7/Update.py &
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks &
wait
echo ""
echo -e $GREEN"Update thirdparty ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -t &
wait
echo ""
exit 0

View file

@ -1,33 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
r_serv = config_loader.get_redis_conn("ARDB_DB")
config_loader = None
# Set current ail version
r_serv.set('ail:version', 'v2.0')
# use new update_date format
date_tag_to_replace = ['v1.5', 'v1.7']
for tag in date_tag_to_replace:
if r_serv.exists('ail:update_date_{}'.format(tag)):
date_tag = r_serv.get('ail:update_date_{}'.format(tag))
r_serv.hset('ail:update_date', tag, date_tag)
r_serv.delete('ail:update_date_{}'.format(tag))
# Set current ail version
r_serv.hset('ail:update_date', 'v2.0', datetime.datetime.now().strftime("%Y%m%d"))

View file

@ -1,75 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
echo ""
echo -e $GREEN"Create Self-Signed Certificate"$DEFAULT
echo ""
pushd ${AIL_BIN}/helper/gen_cert
bash gen_root.sh
wait
bash gen_cert.sh
wait
popd
cp ${AIL_BIN}/helper/gen_cert/server.crt ${AIL_FLASK}/server.crt
cp ${AIL_BIN}/helper/gen_cert/server.key ${AIL_FLASK}/server.key
echo ""
echo -e $GREEN"Update requirement"$DEFAULT
echo ""
pip3 install flask-login
wait
echo ""
pip3 install bcrypt
wait
echo ""
echo ""
bash ${AIL_BIN}/LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v2.0/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Update thirdparty ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -t
wait
echo ""
echo ""
echo -e $GREEN"Create Default User"$DEFAULT
echo ""
python3 ${AIL_FLASK}create_default_user.py
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,118 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from packages import Term
from lib import ConfigLoader
def rreplace(s, old, new, occurrence):
li = s.rsplit(old, occurrence)
return new.join(li)
def get_item_id(full_path):
return full_path.replace(PASTES_FOLDER, '', 1)
def get_item_date(id_item):
l_dir = id_item.split('/')
return f'{l_dir[-4]}{l_dir[-3]}{l_dir[-2]}'
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '')
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_term_stats = config_loader.get_redis_conn("ARDB_Trending")
r_serv_termfreq = config_loader.get_redis_conn("ARDB_TermFreq")
config_loader = None
r_serv_term_stats.flushdb()
# Disabled. Checkout the v2.2 branch if you need it
# # convert all regex:
# all_regex = r_serv_termfreq.smembers('TrackedRegexSet')
# for regex in all_regex:
# tags = list(r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(regex)))
# mails = list(r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(regex)))
#
# new_term = regex[1:-1]
# res = Term.parse_json_term_to_add({"term": new_term, "type": 'regex', "tags": tags, "mails": mails, "level": 1},
# 'admin@admin.test')
# if res[1] == 200:
# term_uuid = res[0]['uuid']
# list_items = r_serv_termfreq.smembers('regex_{}'.format(regex))
# for paste_item in list_items:
# item_id = get_item_id(paste_item)
# item_date = get_item_date(item_id)
# Term.add_tracked_item(term_uuid, item_id, item_date)
#
# # Invalid Tracker => remove it
# else:
# print('Invalid Regex Removed: {}'.format(regex))
# print(res[0])
# # allow reprocess
# r_serv_termfreq.srem('TrackedRegexSet', regex)
#
# all_tokens = r_serv_termfreq.smembers('TrackedSetTermSet')
# for token in all_tokens:
# tags = list(r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(token)))
# mails = list(r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(token)))
#
# res = Term.parse_json_term_to_add({"term": token, "type": 'word', "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test')
# if res[1] == 200:
# term_uuid = res[0]['uuid']
# list_items = r_serv_termfreq.smembers('tracked_{}'.format(token))
# for paste_item in list_items:
# item_id = get_item_id(paste_item)
# item_date = get_item_date(item_id)
# Term.add_tracked_item(term_uuid, item_id, item_date)
# # Invalid Tracker => remove it
# else:
# print('Invalid Token Removed: {}'.format(token))
# print(res[0])
# # allow reprocess
# r_serv_termfreq.srem('TrackedSetTermSet', token)
#
# all_set = r_serv_termfreq.smembers('TrackedSetSet')
# for curr_set in all_set:
# tags = list(r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(curr_set)))
# mails = list(r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(curr_set)))
#
# to_remove = ',{}'.format(curr_set.split(',')[-1])
# new_set = rreplace(curr_set, to_remove, '', 1)
# new_set = new_set[2:]
# new_set = new_set.replace(',', '')
#
# res = Term.parse_json_term_to_add({"term": new_set, "type": 'set', "nb_words": 1, "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test')
# if res[1] == 200:
# term_uuid = res[0]['uuid']
# list_items = r_serv_termfreq.smembers('tracked_{}'.format(curr_set))
# for paste_item in list_items:
# item_id = get_item_id(paste_item)
# item_date = get_item_date(item_id)
# Term.add_tracked_item(term_uuid, item_id, item_date)
# # Invalid Tracker => remove it
# else:
# print('Invalid Set Removed: {}'.format(curr_set))
# print(res[0])
# # allow reprocess
# r_serv_termfreq.srem('TrackedSetSet', curr_set)
r_serv_termfreq.flushdb()
# Set current ail version
r_serv.set('ail:version', 'v2.2')
# Set current ail version
r_serv.hset('ail:update_date', 'v2.2', datetime.datetime.now().strftime("%Y%m%d"))

View file

@ -1,39 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v2.2/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,35 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
new_version = 'v2.5'
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
r_serv = config_loader.get_redis_conn("ARDB_DB")
config_loader = None
r_serv.zadd('ail:all_role', {'user': 3})
r_serv.zadd('ail:all_role', {'user_no_api': 4})
r_serv.zadd('ail:all_role', {'read_only': 5})
for user in r_serv.hkeys('user:all'):
r_serv.sadd('user_role:user', user)
r_serv.sadd('user_role:user_no_api', user)
r_serv.sadd('user_role:read_only', user)
# Set current ail version
r_serv.set('ail:version', new_version)
# Set current ail version
r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d"))

View file

@ -1,39 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v2.5/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,27 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
new_version = 'v2.6'
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
r_serv = config_loader.get_redis_conn("ARDB_DB")
config_loader = None
r_serv.sadd('ail:to_update', new_version)
# Set current ail version
r_serv.set('ail:version', new_version)
# Set current ail version
r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d"))

View file

@ -1,39 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v2.6/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,90 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
from pyfaup.faup import Faup
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
def get_domain(item_id):
item_id = item_id.split('/')
item_id = item_id[-1]
return item_id[:-36]
def get_all_item(s_sha256):
return r_serv_onion.smembers(f'screenshot:{s_sha256}')
def sanitize_domain(domain):
faup.decode(domain)
domain_sanitized = faup.get()
domain_sanitized = domain_sanitized['domain']
try:
domain_sanitized = domain_sanitized.decode()
except:
pass
return domain_sanitized.lower()
def update_db(s_sha256):
screenshot_items = get_all_item(s_sha256)
if screenshot_items:
for item_id in screenshot_items:
item_id = item_id.replace(PASTES_FOLDER+'/', '', 1) # remove root path
domain = get_domain(item_id)
domain_sanitized = sanitize_domain(domain)
if domain != domain_sanitized:
r_serv_onion.sadd('incorrect_domain', domain)
domain = domain_sanitized
r_serv_onion.sadd('domain_screenshot:{}'.format(domain), s_sha256)
r_serv_onion.sadd('screenshot_domain:{}'.format(s_sha256), domain)
else:
pass
# broken screenshot
r_serv_onion.sadd('broken_screenshot', s_sha256)
if __name__ == '__main__':
start_deb = time.time()
faup = Faup()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes"))
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot')
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
r_serv_db.set('ail:update_in_progress', 'v2.6')
r_serv_db.set('ail:current_background_update', 'v2.6')
r_serv_db.set('ail:current_background_script_stat', 20)
r_serv_db.set('ail:current_background_script', 'screenshot update')
nb = 0
if os.path.isdir(SCREENSHOT_FOLDER):
for root, dirs, files in os.walk(SCREENSHOT_FOLDER, topdown=False):
# print(dirs)
for name in files:
nb = nb + 1
screenshot_sha256 = os.path.join(root, name)
screenshot_sha256 = screenshot_sha256[:-4] # remove .png
screenshot_sha256 = screenshot_sha256.replace(SCREENSHOT_FOLDER, '', 1)
screenshot_sha256 = screenshot_sha256.replace('/', '')
update_db(screenshot_sha256)
# print('Screenshot updated: {}'.format(nb))
if nb % 1000 == 0:
r_serv_db.set('ail:current_background_script', 'screenshot updated: {}'.format(nb))
r_serv_db.set('ail:current_background_script_stat', 100)
end = time.time()
print('ALL screenshot updated: {} in {} s'.format(nb, end - start_deb))

View file

@ -1,37 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
new_version = 'v2.7'
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_tags = config_loader.get_redis_conn("ARDB_Tags")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
r_serv.sadd('ail:to_update', new_version)
#### Update tags ####
r_serv_tags.sunionstore('list_tags:item', 'list_tags', [])
r_serv_onion.sunionstore('domain_update_v2.7', 'full_onion_up', [])
r_serv_onion.delete('incorrect_domain')
r_serv.set('ail:update_v2.7:deletetagrange', 1)
#### ####
# Set current ail version
r_serv.set('ail:version', new_version)
# Set current ail version
r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d"))

View file

@ -1,39 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v2.7/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,127 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
from pyfaup.faup import Faup
sys.path.append(os.environ['AIL_BIN'])
from packages import Date
from lib import ConfigLoader
def sanitize_domain(domain):
faup.decode(domain)
domain_sanitized = faup.get()
domain_sanitized = domain_sanitized['domain']
try:
domain_sanitized = domain_sanitized.decode()
except:
pass
return domain_sanitized.lower()
def get_all_obj_tags(obj_type):
return list(r_serv_tags.smembers(f'list_tags:{obj_type}'))
def add_global_tag(tag, object_type=None):
r_serv_tags.sadd('list_tags', tag)
if object_type:
r_serv_tags.sadd('list_tags:{}'.format(object_type), tag)
def get_obj_tag(object_id):
res = r_serv_metadata.smembers('tag:{}'.format(object_id))
if res:
return list(res)
else:
return []
def delete_domain_tag_daterange():
all_domains_tags = get_all_obj_tags('domain')
nb_updated = 0
nb_to_update = len(all_domains_tags)
if nb_to_update == 0:
nb_to_update = 1
refresh_time = time.time()
l_dates = Date.substract_date('20191008', Date.get_today_date_str())
for tag in all_domains_tags:
for date_day in l_dates:
r_serv_tags.delete('domain:{}:{}'.format(tag, date_day))
nb_updated += 1
refresh_time = update_progress(refresh_time, nb_updated, nb_to_update)
r_serv_db.delete('ail:update_v2.7:deletetagrange')
def update_domain_tags(domain):
domain_sanitized = sanitize_domain(domain)
if domain != domain_sanitized:
r_serv_onion.sadd('incorrect_domain', domain)
domain = domain_sanitized
domain_tags = get_obj_tag(domain)
for tag in domain_tags:
# delete incorrect tags
if tag == 'infoleak:submission="crawler"' or tag == 'infoleak:submission="manual"':
r_serv_metadata.srem('tag:{}'.format(domain), tag)
else:
add_global_tag(tag, object_type='domain')
r_serv_tags.sadd('{}:{}'.format('domain', tag), domain)
def update_progress(refresh_time, nb_updated, nb_elem_to_update):
if time.time() - refresh_time > 10:
progress = int((nb_updated * 100) / nb_elem_to_update)
print('{}/{} updated {}%'.format(nb_updated, nb_elem_to_update, progress))
r_serv_db.set('ail:current_background_script_stat', progress)
refresh_time = time.time()
return refresh_time
def update_db():
nb_updated = 0
nb_to_update = r_serv_onion.scard('domain_update_v2.7')
refresh_time = time.time()
r_serv_db.set('ail:current_background_script_stat', 0)
r_serv_db.set('ail:current_background_script', 'domain tags update')
domain = r_serv_onion.spop('domain_update_v2.7')
while domain is not None:
update_domain_tags(domain)
nb_updated += 1
refresh_time = update_progress(refresh_time, nb_updated, nb_to_update)
domain = r_serv_onion.spop('domain_update_v2.7')
if r_serv_db.exists('ail:update_v2.7:deletetagrange'):
r_serv_db.set('ail:current_background_script_stat', 0)
r_serv_db.set('ail:current_background_script', 'tags: remove deprecated keys')
delete_domain_tag_daterange()
# sort all crawled domain
r_serv_onion.sort('full_onion_up', alpha=True)
r_serv_onion.sort('full_regular_up', alpha=True)
if __name__ == '__main__':
start_deb = time.time()
faup = Faup()
config_loader = ConfigLoader.ConfigLoader()
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_tags = config_loader.get_redis_conn("ARDB_Tags")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
update_version = 'v2.7'
r_serv_db.set('ail:update_in_progress', update_version)
r_serv_db.set('ail:current_background_update', update_version)
r_serv_db.set('ail:current_background_script_stat', 0)
r_serv_db.set('ail:current_background_script', 'tags update')
update_db()
r_serv_db.set('ail:current_background_script_stat', 100)
end = time.time()
print('ALL domains tags updated in {} s'.format(end - start_deb))

View file

@ -1,43 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v3.0/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Update thirdparty ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -t
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v3.1.1')
updater.run_update()

View file

@ -1,52 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -ldbv &
wait
echo ""
# SUBMODULES #
git submodule init
git submodule update
echo -e $GREEN"Installing YARA ..."$DEFAULT
pip3 install yara-python
bash ${AIL_BIN}/LAUNCH.sh -t
# SUBMODULES #
git submodule init
git submodule update
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v3.1.1/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,23 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v3.1')
updater.run_update()

View file

@ -1,46 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -lav &
wait
echo ""
pip3 install scrapy
pip3 install scrapy-splash
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v3.1/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Update thirdparty ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -t
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v3.2')
updater.run_update()

View file

@ -1,52 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -ldbv &
wait
echo ""
# SUBMODULES #
git submodule init
git submodule update
echo -e $GREEN"Installing YARA ..."$DEFAULT
pip3 install yara-python
bash ${AIL_BIN}/LAUNCH.sh -t
# SUBMODULES #
git submodule init
git submodule update
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v3.2/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v3.3')
updater.run_update()

View file

@ -1,54 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -ldbv &
wait
echo ""
# SUBMODULES #
git submodule update
# echo ""
# echo -e $GREEN"installing KVORCKS ..."$DEFAULT
# cd ${AIL_HOME}
# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git
# pushd kvrocks/
# make -j4
# popd
echo -e $GREEN"Installing html2text ..."$DEFAULT
pip3 install html2text
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v3.3/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,34 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
self.r_serv_onion = self.config.get_redis_conn("ARDB_Onion")
def update(self):
"""
Update Domain Languages
"""
self.r_serv_onion.sunionstore('domain_update_v3.4', 'full_onion_up', 'full_regular_up')
self.r_serv.set('update:nb_elem_to_convert', self.r_serv_onion.scard('domain_update_v3.4'))
self.r_serv.set('update:nb_elem_converted', 0)
# Add background update
self.r_serv.sadd('ail:to_update', self.version)
if __name__ == '__main__':
updater = Updater('v3.4')
updater.run_update()

View file

@ -1,54 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# bash ${AIL_BIN}/LAUNCH.sh -ldbv &
# wait
# echo ""
# SUBMODULES #
git submodule update
# echo ""
# echo -e $GREEN"installing KVORCKS ..."$DEFAULT
# cd ${AIL_HOME}
# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git
# pushd kvrocks/
# make -j4
# popd
echo -e $GREEN"Installing html2text ..."$DEFAULT
pip3 install pycld3
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v3.4/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,121 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ConfigLoader
from lib.objects.Items import Item
def get_domain_type(domain_name):
if str(domain_name).endswith('.onion'):
return 'onion'
else:
return 'regular'
def add_domain_language(domain_name, language):
language = language.split('-')[0]
domain_type = get_domain_type(domain_name)
r_serv_onion.sadd('all_domains_languages', language)
r_serv_onion.sadd(f'all_domains_languages:{domain_type}', language)
r_serv_onion.sadd(f'language:domains:{domain_type}:{language}', domain_name)
r_serv_onion.sadd(f'domain:language:{domain_name}', language)
def add_domain_languages_by_item_id(domain_name, item_id):
item = Item(item_id)
for lang in item.get_languages():
add_domain_language(domain_name, lang.language)
def update_update_stats():
nb_updated = int(r_serv_db.get('update:nb_elem_converted'))
progress = int((nb_updated * 100) / nb_elem_to_update)
print(f'{nb_updated}/{nb_elem_to_update} updated {progress}%')
r_serv_db.set('ail:current_background_script_stat', progress)
def update_domain_language(domain_obj, item_id):
domain_name = domain_obj.get_domain_name()
add_domain_languages_by_item_id(domain_name, item_id)
def get_domain_history(domain_type, domain_name):
return r_serv_onion.zrange(f'crawler_history_{domain_type}:{domain_name}:80', 0, -1, withscores=True)
def get_item_children(item_id):
return r_serv_metadata.smembers(f'paste_children:{item_id}')
def get_domain_items(domain_name, root_item_id):
dom_item = get_domain_item_children(domain_name, root_item_id)
dom_item.append(root_item_id)
return dom_item
def is_item_in_domain(domain_name, item_id):
is_in_domain = False
domain_length = len(domain_name)
if len(item_id) > (domain_length+48):
if item_id[-36-domain_length:-36] == domain_name:
is_in_domain = True
return is_in_domain
def get_domain_item_children(domain_name, root_item_id):
all_items = []
for item_id in get_item_children(root_item_id):
if is_item_in_domain(domain_name, item_id):
all_items.append(item_id)
all_items.extend(get_domain_item_children(domain_name, item_id))
return all_items
def get_domain_crawled_item_root(domain_name, domain_type, epoch):
res = r_serv_onion.zrevrangebyscore(f'crawler_history_{domain_type}:{domain_name}:80', int(epoch), int(epoch))
return {"root_item": res[0], "epoch": int(epoch)}
def get_domain_items_crawled(domain_name, domain_type, epoch):
item_crawled = []
item_root = get_domain_crawled_item_root(domain_name, domain_type, epoch)
if item_root:
if item_root['root_item'] != str(item_root['epoch']) and item_root['root_item']:
for item_id in get_domain_items(domain_name, item_root['root_item']):
item_crawled.append(item_id)
return item_crawled
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
r_serv_db.set('ail:current_background_script', 'domain languages update')
nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert')
if not nb_elem_to_update:
nb_elem_to_update = 1
else:
nb_elem_to_update = int(nb_elem_to_update)
# _delete_all_domains_languages()
while True:
domain = r_serv_onion.spop('domain_update_v3.4')
if domain is not None:
print(domain)
domain = str(domain)
domain_t = get_domain_type(domain)
for domain_history in get_domain_history(domain_t, domain):
domain_items = get_domain_items_crawled(domain, domain_t, domain_history[1])
for id_item in domain_items:
update_domain_language(domain, id_item)
r_serv_db.incr('update:nb_elem_converted')
update_update_stats()
else:
r_serv_db.set('ail:current_background_script_stat', 100)
sys.exit(0)

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v3.5')
updater.run_update()

View file

@ -1,35 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo -e $GREEN"Installing PyAIL ..."$DEFAULT
pip3 install -U pyail
echo -e $GREEN"Installing D4 CLIENT ..."$DEFAULT
pip3 install -U d4-pyclient
echo ""
echo -e $GREEN"Updating DomainClassifier ..."$DEFAULT
pip3 install -U DomainClassifier
exit 0

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v3.6')
updater.run_update()

View file

@ -1,39 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo -e $GREEN"Updating D4 CLIENT ..."$DEFAULT
pip3 install -U d4-pyclient
echo ""
echo -e $GREEN"Installing nose ..."$DEFAULT
pip3 install -U nose
echo -e $GREEN"Installing coverage ..."$DEFAULT
pip3 install -U coverage
echo ""
echo -e $GREEN"Updating DomainClassifier ..."$DEFAULT
pip3 install -U DomainClassifier
exit 0

View file

@ -1,40 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
from lib import Tracker
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
def update(self):
"""
Update Domain Languages
"""
print('Fixing Tracker_uuid list ...')
Tracker.fix_all_tracker_uuid_list()
nb = 0
for tracker_uuid in Tracker.get_trackers():
self.r_serv.sadd('trackers_update_v3.7', tracker_uuid)
nb += 1
self.r_serv.set('update:nb_elem_to_convert', nb)
self.r_serv.set('update:nb_elem_converted',0)
# Add background update
self.r_serv.sadd('ail:to_update', self.version)
if __name__ == '__main__':
updater = Updater('v3.7')
updater.run_update()

View file

@ -1,44 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo -e $GREEN"Updating thirdparty ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ut
wait
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v3.7/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,53 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ConfigLoader
from lib import Tracker
def update_update_stats():
nb_updated = int(r_serv_db.get('update:nb_elem_converted'))
progress = int((nb_updated * 100) / nb_elem_to_update)
print(f'{nb_updated}/{nb_elem_to_update} updated {progress}%')
r_serv_db.set('ail:current_background_script_stat', progress)
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_tracker = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None
Tracker.r_serv_tracker = r_serv_tracker
r_serv_db.set('ail:current_background_script', 'trackers update')
nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert')
if not nb_elem_to_update:
nb_elem_to_update = 1
else:
nb_elem_to_update = int(nb_elem_to_update)
while True:
tracker_uuid = r_serv_db.spop('trackers_update_v3.7')
if tracker_uuid is not None:
print(tracker_uuid)
# FIX STATS
Tracker.fix_tracker_stats_per_day(tracker_uuid)
# MAP TRACKER - ITEM_ID
Tracker.fix_tracker_item_link(tracker_uuid)
r_serv_db.incr('update:nb_elem_converted')
update_update_stats()
else:
r_serv_db.set('ail:current_background_script_stat', 100)
sys.exit(0)

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v4.0')
updater.run_update()

View file

@ -1,29 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo ""
echo -e $GREEN"Installing nose ..."$DEFAULT
pip3 install -U websockets
exit 0

View file

@ -1,31 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import redis
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
def update(self):
r_tracking = redis.StrictRedis(host='localhost',
port=6382,
db=2,
decode_responses=True)
# FLUSH OLD DB
r_tracking.flushdb()
if __name__ == '__main__':
updater = Updater('v4.1')
updater.run_update()

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v4.2.1')
updater.run_update()

View file

@ -1,29 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo ""
echo -e $GREEN"Updating pusblogger ..."$DEFAULT
pip3 install -U pubsublogger
exit 0

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v4.2')
updater.run_update()

View file

@ -1,33 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo ""
echo -e $GREEN"Installing typo-squatting ..."$DEFAULT
pip3 install -U ail_typo_squatting
echo ""
echo -e $GREEN"Updating d4-client ..."$DEFAULT
pip3 install -U d4-pyclient
exit 0

View file

@ -8,7 +8,8 @@ sys.path.append(os.environ['AIL_HOME'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from update.bin.old_ail_updater import AIL_Updater from update.bin.ail_updater import AIL_Updater
from lib import ail_updates
class Updater(AIL_Updater): class Updater(AIL_Updater):
"""default Updater.""" """default Updater."""
@ -18,5 +19,6 @@ class Updater(AIL_Updater):
if __name__ == '__main__': if __name__ == '__main__':
updater = Updater('v3.0') updater = Updater('v5.2')
updater.run_update() updater.run_update()
ail_updates.add_background_update('v5.2')

View file

@ -2,13 +2,11 @@
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; [ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; [ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; [ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; [ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH export PATH=$AIL_FLASK:$PATH
@ -22,4 +20,12 @@ wait
# SUBMODULES # # SUBMODULES #
git submodule update git submodule update
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v5.2/Update.py
wait
echo ""
echo ""
exit 0 exit 0

27
update/v5.2/compress_har.py Executable file
View file

@ -0,0 +1,27 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import gzip
import os
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ail_updates
from lib import crawlers
if __name__ == '__main__':
update = ail_updates.AILBackgroundUpdate('v5.2')
HAR_DIR = crawlers.HAR_DIR
hars_ids = crawlers.get_all_har_ids()
update.set_nb_to_update(len(hars_ids))
n = 0
for har_id in hars_ids:
crawlers._gzip_har(har_id)
update.inc_nb_updated()
if n % 100 == 0:
update.update_progress()
crawlers._gzip_all_hars()

View file

@ -17,9 +17,6 @@ from flask_login import LoginManager, current_user, login_user, logout_user, log
import importlib import importlib
from os.path import join from os.path import join
# # TODO: put me in lib/Tag
from pytaxonomies import Taxonomies
sys.path.append('./modules/') sys.path.append('./modules/')
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
@ -51,6 +48,9 @@ from blueprints.objects_decoded import objects_decoded
from blueprints.objects_subtypes import objects_subtypes from blueprints.objects_subtypes import objects_subtypes
from blueprints.objects_title import objects_title from blueprints.objects_title import objects_title
from blueprints.objects_cookie_name import objects_cookie_name from blueprints.objects_cookie_name import objects_cookie_name
from blueprints.objects_etag import objects_etag
from blueprints.objects_hhhash import objects_hhhash
from blueprints.objects_chat import objects_chat
Flask_dir = os.environ['AIL_FLASK'] Flask_dir = os.environ['AIL_FLASK']
@ -106,6 +106,9 @@ app.register_blueprint(objects_decoded, url_prefix=baseUrl)
app.register_blueprint(objects_subtypes, url_prefix=baseUrl) app.register_blueprint(objects_subtypes, url_prefix=baseUrl)
app.register_blueprint(objects_title, url_prefix=baseUrl) app.register_blueprint(objects_title, url_prefix=baseUrl)
app.register_blueprint(objects_cookie_name, url_prefix=baseUrl) app.register_blueprint(objects_cookie_name, url_prefix=baseUrl)
app.register_blueprint(objects_etag, url_prefix=baseUrl)
app.register_blueprint(objects_hhhash, url_prefix=baseUrl)
app.register_blueprint(objects_chat, url_prefix=baseUrl)
# ========= =========# # ========= =========#
@ -250,16 +253,6 @@ default_taxonomies = ["infoleak", "gdpr", "fpf", "dark-web"]
for taxonomy in default_taxonomies: for taxonomy in default_taxonomies:
Tag.enable_taxonomy_tags(taxonomy) Tag.enable_taxonomy_tags(taxonomy)
# ========== INITIAL tags auto export ============
# taxonomies = Taxonomies()
#
# infoleak_tags = taxonomies.get('infoleak').machinetags()
# infoleak_automatic_tags = []
# for tag in taxonomies.get('infoleak').machinetags():
# if tag.split('=')[0][:] == 'infoleak:automatic-detection':
# r_serv_db.sadd('list_export_tags', tag)
#
# r_serv_db.sadd('list_export_tags', 'infoleak:submission="manual"')
# ============ MAIN ============ # ============ MAIN ============
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -61,6 +61,13 @@ def sanitise_level(level):
level = 2 level = 2
return level return level
def sanitise_objs_hidden(objs_hidden):
if objs_hidden:
objs_hidden = set(objs_hidden.split(',')) # TODO sanitize objects
else:
objs_hidden = set()
return objs_hidden
# ============= ROUTES ============== # ============= ROUTES ==============
@correlation.route('/correlation/show', methods=['GET', 'POST']) @correlation.route('/correlation/show', methods=['GET', 'POST'])
@login_required @login_required
@ -83,12 +90,18 @@ def show_correlation():
correl_option = request.form.get('CookieNameCheck') correl_option = request.form.get('CookieNameCheck')
if correl_option: if correl_option:
filter_types.append('cookie-name') filter_types.append('cookie-name')
correl_option = request.form.get('EtagCheck')
if correl_option:
filter_types.append('etag')
correl_option = request.form.get('CveCheck') correl_option = request.form.get('CveCheck')
if correl_option: if correl_option:
filter_types.append('cve') filter_types.append('cve')
correl_option = request.form.get('CryptocurrencyCheck') correl_option = request.form.get('CryptocurrencyCheck')
if correl_option: if correl_option:
filter_types.append('cryptocurrency') filter_types.append('cryptocurrency')
correl_option = request.form.get('HHHashCheck')
if correl_option:
filter_types.append('hhhash')
correl_option = request.form.get('PgpCheck') correl_option = request.form.get('PgpCheck')
if correl_option: if correl_option:
filter_types.append('pgp') filter_types.append('pgp')
@ -127,6 +140,10 @@ def show_correlation():
max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes')) max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes'))
mode = sanitise_graph_mode(request.args.get('mode')) mode = sanitise_graph_mode(request.args.get('mode'))
level = sanitise_level(request.args.get('level')) level = sanitise_level(request.args.get('level'))
objs_hidden = sanitise_objs_hidden(request.args.get('hidden'))
obj_to_hide = request.args.get('hide')
if obj_to_hide:
objs_hidden.add(obj_to_hide)
related_btc = bool(request.args.get('related_btc', False)) related_btc = bool(request.args.get('related_btc', False))
@ -136,17 +153,24 @@ def show_correlation():
if not ail_objects.exists_obj(obj_type, subtype, obj_id): if not ail_objects.exists_obj(obj_type, subtype, obj_id):
return abort(404) return abort(404)
# object exist # object exist
else: else: # TODO remove old dict key
dict_object = {"object_type": obj_type, dict_object = {"type": obj_type,
"correlation_id": obj_id, "id": obj_id,
"object_type": obj_type,
"max_nodes": max_nodes, "mode": mode, "level": level, "max_nodes": max_nodes, "mode": mode, "level": level,
"filter": filter_types, "filter_str": ",".join(filter_types), "filter": filter_types, "filter_str": ",".join(filter_types),
"hidden": objs_hidden, "hidden_str": ",".join(objs_hidden),
"correlation_id": obj_id,
"metadata": ail_objects.get_object_meta(obj_type, subtype, obj_id, "metadata": ail_objects.get_object_meta(obj_type, subtype, obj_id,
options={'tags'}, flask_context=True), options={'tags'}, flask_context=True),
"nb_correl": ail_objects.get_obj_nb_correlations(obj_type, subtype, obj_id) "nb_correl": ail_objects.get_obj_nb_correlations(obj_type, subtype, obj_id)
} }
if subtype: if subtype:
dict_object["subtype"] = subtype
dict_object["metadata"]['type_id'] = subtype dict_object["metadata"]['type_id'] = subtype
else:
dict_object["subtype"] = ''
dict_object["metadata_card"] = ail_objects.get_object_card_meta(obj_type, subtype, obj_id, related_btc=related_btc) dict_object["metadata_card"] = ail_objects.get_object_card_meta(obj_type, subtype, obj_id, related_btc=related_btc)
return render_template("show_correlation.html", dict_object=dict_object, bootstrap_label=bootstrap_label, return render_template("show_correlation.html", dict_object=dict_object, bootstrap_label=bootstrap_label,
tags_selector_data=Tag.get_tags_selector_data()) tags_selector_data=Tag.get_tags_selector_data())
@ -156,26 +180,15 @@ def show_correlation():
@login_read_only @login_read_only
def get_description(): def get_description():
object_id = request.args.get('object_id') object_id = request.args.get('object_id')
object_id = object_id.split(':') obj_type, subtype, obj_id = ail_objects.get_obj_type_subtype_id_from_global_id(object_id)
# unpack object_id # # TODO: put me in lib
if len(object_id) == 3:
object_type = object_id[0]
type_id = object_id[1]
correlation_id = object_id[2]
elif len(object_id) == 2:
object_type = object_id[0]
type_id = None
correlation_id = object_id[1]
else:
return jsonify({})
# check if correlation_id exist # check if obj exist
# # TODO: return error json # # TODO: return error json
if not ail_objects.exists_obj(object_type, type_id, correlation_id): if not ail_objects.exists_obj(obj_type, subtype, obj_id):
return Response(json.dumps({"status": "error", "reason": "404 Not Found"}, indent=2, sort_keys=True), mimetype='application/json'), 404 return Response(json.dumps({"status": "error", "reason": "404 Not Found"}, indent=2, sort_keys=True), mimetype='application/json'), 404
# object exist # object exist
else: else:
res = ail_objects.get_object_meta(object_type, type_id, correlation_id, options={'tags', 'tags_safe'}, res = ail_objects.get_object_meta(obj_type, subtype, obj_id, options={'tags', 'tags_safe'},
flask_context=True) flask_context=True)
if 'tags' in res: if 'tags' in res:
res['tags'] = list(res['tags']) res['tags'] = list(res['tags'])
@ -191,9 +204,15 @@ def graph_node_json():
max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes')) max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes'))
level = sanitise_level(request.args.get('level')) level = sanitise_level(request.args.get('level'))
hidden = request.args.get('hidden')
if hidden:
hidden = set(hidden.split(','))
else:
hidden = set()
filter_types = ail_objects.sanitize_objs_types(request.args.get('filter', '').split(',')) filter_types = ail_objects.sanitize_objs_types(request.args.get('filter', '').split(','))
json_graph = ail_objects.get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=filter_types, max_nodes=max_nodes, level=level, flask_context=True) json_graph = ail_objects.get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=filter_types, max_nodes=max_nodes, level=level, objs_hidden=hidden, flask_context=True)
#json_graph = Correlate_object.get_graph_node_object_correlation(obj_type, obj_id, 'union', correlation_names, correlation_objects, requested_correl_type=subtype, max_nodes=max_nodes) #json_graph = Correlate_object.get_graph_node_object_correlation(obj_type, obj_id, 'union', correlation_names, correlation_objects, requested_correl_type=subtype, max_nodes=max_nodes)
return jsonify(json_graph) return jsonify(json_graph)
@ -221,6 +240,7 @@ def correlation_tags_add():
nb_max = sanitise_nb_max_nodes(request.form.get('tag_nb_max')) nb_max = sanitise_nb_max_nodes(request.form.get('tag_nb_max'))
level = sanitise_level(request.form.get('tag_level')) level = sanitise_level(request.form.get('tag_level'))
filter_types = ail_objects.sanitize_objs_types(request.form.get('tag_filter', '').split(',')) filter_types = ail_objects.sanitize_objs_types(request.form.get('tag_filter', '').split(','))
hidden = sanitise_objs_hidden(request.form.get('tag_hidden'))
if not ail_objects.exists_obj(obj_type, subtype, obj_id): if not ail_objects.exists_obj(obj_type, subtype, obj_id):
return abort(404) return abort(404)
@ -249,9 +269,11 @@ def correlation_tags_add():
if tags: if tags:
ail_objects.obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=filter_types, ail_objects.obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=filter_types,
objs_hidden=hidden,
lvl=level + 1, nb_max=nb_max) lvl=level + 1, nb_max=nb_max)
return redirect(url_for('correlation.show_correlation', return redirect(url_for('correlation.show_correlation',
type=obj_type, subtype=subtype, id=obj_id, type=obj_type, subtype=subtype, id=obj_id,
level=level, level=level,
max_nodes=nb_max, max_nodes=nb_max,
hidden=hidden, hidden_str=",".join(hidden),
filter=",".join(filter_types))) filter=",".join(filter_types)))

Some files were not shown because too many files have changed in this diff Show more