chg: [ail queues] merge

This commit is contained in:
terrtia 2023-09-08 10:52:55 +02:00
commit c19b1f34e3
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
131 changed files with 4383 additions and 3039 deletions

View file

@ -27,7 +27,7 @@ fi
export PATH=$AIL_VENV/bin:$PATH
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_KVROCKS:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
@ -685,9 +685,6 @@ while [ "$1" != "" ]; do
-lrv | --launchRedisVerify ) launch_redis;
wait_until_redis_is_ready;
;;
-lav | --launchARDBVerify ) launch_ardb;
wait_until_ardb_is_ready;
;;
-lkv | --launchKVORCKSVerify ) launch_kvrocks;
wait_until_kvrocks_is_ready;
;;

View file

@ -17,6 +17,7 @@ from lib import ail_logger
from lib import crawlers
from lib.ConfigLoader import ConfigLoader
from lib.objects import CookiesNames
from lib.objects import Etags
from lib.objects.Domains import Domain
from lib.objects.Items import Item
from lib.objects import Screenshots
@ -59,6 +60,7 @@ class Crawler(AbstractModule):
self.root_item = None
self.date = None
self.items_dir = None
self.original_domain = None
self.domain = None
# TODO Replace with warning list ???
@ -98,7 +100,7 @@ class Crawler(AbstractModule):
self.crawler_scheduler.update_queue()
self.crawler_scheduler.process_queue()
self.refresh_lacus_status() # TODO LOG ERROR
self.refresh_lacus_status() # TODO LOG ERROR
if not self.is_lacus_up:
return None
@ -121,11 +123,19 @@ class Crawler(AbstractModule):
if capture:
try:
status = self.lacus.get_capture_status(capture.uuid)
if status != crawlers.CaptureStatus.DONE: # TODO ADD GLOBAL TIMEOUT-> Save start time ### print start time
if status == crawlers.CaptureStatus.DONE:
return capture
elif status == crawlers.CaptureStatus.UNKNOWN:
capture_start = capture.get_start_time(r_str=False)
if int(time.time()) - capture_start > 600: # TODO ADD in new crawler config
task = capture.get_task()
task.reset()
capture.delete()
else:
capture.update(status)
else:
capture.update(status)
print(capture.uuid, crawlers.CaptureStatus(status).name, int(time.time()))
else:
return capture
except ConnectionError:
print(capture.uuid)
@ -181,6 +191,7 @@ class Crawler(AbstractModule):
print(domain)
self.domain = Domain(domain)
self.original_domain = Domain(domain)
epoch = int(time.time())
parent_id = task.get_parent()
@ -203,12 +214,20 @@ class Crawler(AbstractModule):
# Origin + History + tags
if self.root_item:
self.domain.set_last_origin(parent_id)
self.domain.add_history(epoch, root_item=self.root_item)
# Tags
for tag in task.get_tags():
self.domain.add_tag(tag)
elif self.domain.was_up():
self.domain.add_history(epoch, root_item=epoch)
self.domain.add_history(epoch, root_item=self.root_item)
if self.domain != self.original_domain:
self.original_domain.update_daterange(self.date.replace('/', ''))
if self.root_item:
self.original_domain.set_last_origin(parent_id)
# Tags
for tag in task.get_tags():
self.domain.add_tag(tag)
self.original_domain.add_history(epoch, root_item=self.root_item)
crawlers.update_last_crawled_domain(self.original_domain.get_domain_type(), self.original_domain.id, epoch)
crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch)
print('capture:', capture.uuid, 'completed')
@ -263,7 +282,7 @@ class Crawler(AbstractModule):
title_content = crawlers.extract_title_from_html(entries['html'])
if title_content:
title = Titles.create_title(title_content)
title.add(item.get_date(), item_id)
title.add(item.get_date(), item)
# SCREENSHOT
if self.screenshot:
@ -287,7 +306,12 @@ class Crawler(AbstractModule):
for cookie_name in crawlers.extract_cookies_names_from_har(entries['har']):
print(cookie_name)
cookie = CookiesNames.create(cookie_name)
cookie.add(self.date.replace('/', ''), self.domain.id)
cookie.add(self.date.replace('/', ''), self.domain)
for etag_content in crawlers.extract_etag_from_har(entries['har']):
print(etag_content)
etag = Etags.create(etag_content)
etag.add(self.date.replace('/', ''), self.domain)
crawlers.extract_hhhash(entries['har'], self.domain.id, self.date.replace('/', ''))
# Next Children
entries_children = entries.get('children')

View file

@ -8,9 +8,12 @@ Import Content
"""
import os
import logging
import logging.config
import sys
from abc import ABC
from ssl import create_default_context
import smtplib
from email.mime.multipart import MIMEMultipart
@ -22,17 +25,22 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ail_logger
from exporter.abstract_exporter import AbstractExporter
from lib.ConfigLoader import ConfigLoader
# from lib.objects.abstract_object import AbstractObject
# from lib.Tracker import Tracker
logging.config.dictConfig(ail_logger.get_config(name='modules'))
class MailExporter(AbstractExporter, ABC):
def __init__(self, host=None, port=None, password=None, user='', sender=''):
def __init__(self, host=None, port=None, password=None, user='', sender='', cert_required=None, ca_file=None):
super().__init__()
config_loader = ConfigLoader()
self.logger = logging.getLogger(f'{self.__class__.__name__}')
if host:
self.host = host
self.port = port
@ -45,6 +53,15 @@ class MailExporter(AbstractExporter, ABC):
self.pw = config_loader.get_config_str("Notifications", "sender_pw")
if self.pw == 'None':
self.pw = None
if cert_required is not None:
self.cert_required = bool(cert_required)
self.ca_file = ca_file
else:
self.cert_required = config_loader.get_config_boolean("Notifications", "cert_required")
if self.cert_required:
self.ca_file = config_loader.get_config_str("Notifications", "ca_file")
else:
self.ca_file = None
if user:
self.user = user
else:
@ -67,8 +84,12 @@ class MailExporter(AbstractExporter, ABC):
smtp_server = smtplib.SMTP(self.host, self.port)
smtp_server.starttls()
except smtplib.SMTPNotSupportedError:
print("The server does not support the STARTTLS extension.")
smtp_server = smtplib.SMTP_SSL(self.host, self.port)
self.logger.info(f"The server {self.host}:{self.port} does not support the STARTTLS extension.")
if self.cert_required:
context = create_default_context(cafile=self.ca_file)
else:
context = None
smtp_server = smtplib.SMTP_SSL(self.host, self.port, context=context)
smtp_server.ehlo()
if self.user is not None:
@ -80,7 +101,7 @@ class MailExporter(AbstractExporter, ABC):
return smtp_server
# except Exception as err:
# traceback.print_tb(err.__traceback__)
# logger.warning(err)
# self.logger.warning(err)
def _export(self, recipient, subject, body):
mime_msg = MIMEMultipart()
@ -95,8 +116,8 @@ class MailExporter(AbstractExporter, ABC):
smtp_client.quit()
# except Exception as err:
# traceback.print_tb(err.__traceback__)
# logger.warning(err)
print(f'Send notification: {subject} to {recipient}')
# self.logger.warning(err)
self.logger.info(f'Send notification: {subject} to {recipient}')
class MailExporterTracker(MailExporter):

View file

@ -87,13 +87,16 @@ class FeederImporter(AbstractImporter):
feeder_name = feeder.get_name()
print(f'importing: {feeder_name} feeder')
item_id = feeder.get_item_id()
item_id = feeder.get_item_id() # TODO replace me with object global id
# process meta
if feeder.get_json_meta():
feeder.process_meta()
gzip64_content = feeder.get_gzip64_content()
return f'{feeder_name} {item_id} {gzip64_content}'
if feeder_name == 'telegram':
return item_id # TODO support UI dashboard
else:
gzip64_content = feeder.get_gzip64_content()
return f'{feeder_name} {item_id} {gzip64_content}'
class FeederModuleImporter(AbstractModule):

View file

@ -35,7 +35,7 @@ class PystemonImporter(AbstractImporter):
print(item_id)
if item_id:
print(item_id)
full_item_path = os.path.join(self.dir_pystemon, item_id) # TODO SANITIZE PATH
full_item_path = os.path.join(self.dir_pystemon, item_id) # TODO SANITIZE PATH
# Check if pystemon file exists
if not os.path.isfile(full_item_path):
print(f'Error: {full_item_path}, file not found')
@ -47,7 +47,12 @@ class PystemonImporter(AbstractImporter):
if not content:
return None
return self.create_message(item_id, content, source='pystemon')
if full_item_path[-3:] == '.gz':
gzipped = True
else:
gzipped = False
return self.create_message(item_id, content, gzipped=gzipped, source='pystemon')
except IOError as e:
print(f'Error: {full_item_path}, IOError')

View file

@ -89,7 +89,7 @@ class AbstractImporter(ABC): # TODO ail queues
if not gzipped:
content = self.b64_gzip(content)
elif not b64:
content = self.b64(gzipped)
content = self.b64(content)
if not content:
return None
if isinstance(content, bytes):

View file

@ -17,7 +17,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from importer.feeders.Default import DefaultFeeder
from lib.objects.Usernames import Username
from lib import item_basic
from lib.objects.Items import Item
class JabberFeeder(DefaultFeeder):
@ -36,7 +36,7 @@ class JabberFeeder(DefaultFeeder):
self.item_id = f'{item_id}.gz'
return self.item_id
def process_meta(self):
def process_meta(self): # TODO replace me by message
"""
Process JSON meta field.
"""
@ -44,10 +44,12 @@ class JabberFeeder(DefaultFeeder):
# item_basic.add_map_obj_id_item_id(jabber_id, item_id, 'jabber_id') ##############################################
to = str(self.json_data['meta']['jabber:to'])
fr = str(self.json_data['meta']['jabber:from'])
date = item_basic.get_item_date(item_id)
item = Item(self.item_id)
date = item.get_date()
user_to = Username(to, 'jabber')
user_fr = Username(fr, 'jabber')
user_to.add(date, self.item_id)
user_fr.add(date, self.item_id)
user_to.add(date, item)
user_fr.add(date, item)
return None

View file

@ -16,8 +16,28 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from importer.feeders.Default import DefaultFeeder
from lib.ConfigLoader import ConfigLoader
from lib.objects.Chats import Chat
from lib.objects import Messages
from lib.objects import UsersAccount
from lib.objects.Usernames import Username
from lib import item_basic
import base64
import io
import gzip
def gunzip_bytes_obj(bytes_obj):
gunzipped_bytes_obj = None
try:
in_ = io.BytesIO()
in_.write(bytes_obj)
in_.seek(0)
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
gunzipped_bytes_obj = fo.read()
except Exception as e:
print(f'Global; Invalid Gzip file: {e}')
return gunzipped_bytes_obj
class TelegramFeeder(DefaultFeeder):
@ -26,31 +46,90 @@ class TelegramFeeder(DefaultFeeder):
self.name = 'telegram'
# define item id
def get_item_id(self):
# TODO use telegram message date
date = datetime.date.today().strftime("%Y/%m/%d")
channel_id = str(self.json_data['meta']['channel_id'])
message_id = str(self.json_data['meta']['message_id'])
item_id = f'{channel_id}_{message_id}'
item_id = os.path.join('telegram', date, item_id)
self.item_id = f'{item_id}.gz'
def get_item_id(self): # TODO rename self.item_id
# Get message date
timestamp = self.json_data['meta']['date']['timestamp'] # TODO CREATE DEFAULT TIMESTAMP
# if self.json_data['meta'].get('date'):
# date = datetime.datetime.fromtimestamp( self.json_data['meta']['date']['timestamp'])
# date = date.strftime('%Y/%m/%d')
# else:
# date = datetime.date.today().strftime("%Y/%m/%d")
chat_id = str(self.json_data['meta']['chat']['id'])
message_id = str(self.json_data['meta']['id'])
self.item_id = Messages.create_obj_id('telegram', chat_id, message_id, timestamp) # TODO rename self.item_id
return self.item_id
def process_meta(self):
"""
Process JSON meta field.
"""
# channel_id = str(self.json_data['meta']['channel_id'])
# message_id = str(self.json_data['meta']['message_id'])
# telegram_id = f'{channel_id}_{message_id}'
# item_basic.add_map_obj_id_item_id(telegram_id, item_id, 'telegram_id') #########################################
user = None
if self.json_data['meta'].get('user'):
user = str(self.json_data['meta']['user'])
elif self.json_data['meta'].get('channel'):
user = str(self.json_data['meta']['channel'].get('username'))
if user:
date = item_basic.get_item_date(self.item_id)
username = Username(user, 'telegram')
username.add(date, self.item_id)
# message chat
meta = self.json_data['meta']
mess_id = self.json_data['meta']['id']
if meta.get('reply_to'):
reply_to_id = meta['reply_to']['id']
else:
reply_to_id = None
timestamp = meta['date']['timestamp']
date = datetime.datetime.fromtimestamp(timestamp)
date = date.strftime('%Y%m%d')
if self.json_data.get('translation'):
translation = self.json_data['translation']
else:
translation = None
decoded = base64.standard_b64decode(self.json_data['data'])
content = gunzip_bytes_obj(decoded)
message = Messages.create(self.item_id, content, translation=translation)
if meta.get('chat'):
chat = Chat(meta['chat']['id'], 'telegram')
if meta['chat'].get('username'):
chat_username = Username(meta['chat']['username'], 'telegram')
chat.update_username_timeline(chat_username.get_global_id(), timestamp)
# Chat---Message
chat.add(date)
chat.add_message(message.get_global_id(), timestamp, mess_id, reply_id=reply_to_id)
else:
chat = None
# message sender
if meta.get('sender'): # TODO handle message channel forward - check if is user
user_id = meta['sender']['id']
user_account = UsersAccount.UserAccount(user_id, 'telegram')
# UserAccount---Message
user_account.add(date, obj=message)
# UserAccount---Chat
user_account.add_correlation(chat.type, chat.get_subtype(r_str=True), chat.id)
if meta['sender'].get('firstname'):
user_account.set_first_name(meta['sender']['firstname'])
if meta['sender'].get('lastname'):
user_account.set_last_name(meta['sender']['lastname'])
if meta['sender'].get('phone'):
user_account.set_phone(meta['sender']['phone'])
if meta['sender'].get('username'):
username = Username(meta['sender']['username'], 'telegram')
# TODO timeline or/and correlation ????
user_account.add_correlation(username.type, username.get_subtype(r_str=True), username.id)
user_account.update_username_timeline(username.get_global_id(), timestamp)
# Username---Message
username.add(date) # TODO # correlation message ???
# if chat: # TODO Chat---Username correlation ???
# # Chat---Username
# chat.add_correlation(username.type, username.get_subtype(r_str=True), username.id)
# if meta.get('fwd_from'):
# if meta['fwd_from'].get('post_author') # user first name
# TODO reply threads ????
# message edit ????
return None

View file

@ -17,7 +17,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from importer.feeders.Default import DefaultFeeder
from lib.objects.Usernames import Username
from lib import item_basic
from lib.objects.Items import Item
class TwitterFeeder(DefaultFeeder):
@ -40,9 +40,9 @@ class TwitterFeeder(DefaultFeeder):
'''
# tweet_id = str(self.json_data['meta']['twitter:tweet_id'])
# item_basic.add_map_obj_id_item_id(tweet_id, item_id, 'twitter_id') ############################################
date = item_basic.get_item_date(self.item_id)
item = Item(self.item_id)
date = item.get_date()
user = str(self.json_data['meta']['twitter:id'])
username = Username(user, 'twitter')
username.add(date, item_id)
username.add(date, item)
return None

View file

@ -235,18 +235,27 @@ class Investigation(object):
objs.append(dict_obj)
return objs
def get_objects_comment(self, obj_global_id):
return r_tracking.hget(f'investigations:objs:comment:{self.uuid}', obj_global_id)
def set_objects_comment(self, obj_global_id, comment):
if comment:
r_tracking.hset(f'investigations:objs:comment:{self.uuid}', obj_global_id, comment)
# # TODO: def register_object(self, Object): in OBJECT CLASS
def register_object(self, obj_id, obj_type, subtype):
def register_object(self, obj_id, obj_type, subtype, comment=''):
r_tracking.sadd(f'investigations:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
r_tracking.sadd(f'obj:investigations:{obj_type}:{subtype}:{obj_id}', self.uuid)
if comment:
self.set_objects_comment(f'{obj_type}:{subtype}:{obj_id}', comment)
timestamp = int(time.time())
self.set_last_change(timestamp)
def unregister_object(self, obj_id, obj_type, subtype):
r_tracking.srem(f'investigations:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
r_tracking.srem(f'obj:investigations:{obj_type}:{subtype}:{obj_id}', self.uuid)
r_tracking.hdel(f'investigations:objs:comment:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
timestamp = int(time.time())
self.set_last_change(timestamp)
@ -351,7 +360,7 @@ def get_investigations_selector():
for investigation_uuid in get_all_investigations():
investigation = Investigation(investigation_uuid)
name = investigation.get_info()
l_investigations.append({"id":investigation_uuid, "name": name})
l_investigations.append({"id": investigation_uuid, "name": name})
return l_investigations
#{id:'8dc4b81aeff94a9799bd70ba556fa345',name:"Paris"}
@ -453,7 +462,11 @@ def api_register_object(json_dict):
if subtype == 'None':
subtype = ''
obj_id = json_dict.get('id', '').replace(' ', '')
res = investigation.register_object(obj_id, obj_type, subtype)
comment = json_dict.get('comment', '')
# if comment:
# comment = escape(comment)
res = investigation.register_object(obj_id, obj_type, subtype, comment=comment)
return res, 200
def api_unregister_object(json_dict):

View file

@ -338,7 +338,7 @@ def get_galaxy_meta(galaxy_name, nb_active_tags=False):
else:
meta['icon'] = f'fas fa-{icon}'
if nb_active_tags:
meta['nb_active_tags'] = get_galaxy_nb_tags_enabled(galaxy)
meta['nb_active_tags'] = get_galaxy_nb_tags_enabled(galaxy.type)
meta['nb_tags'] = len(get_galaxy_tags(galaxy.type))
return meta

View file

@ -207,6 +207,13 @@ class Tracker:
if filters:
self._set_field('filters', json.dumps(filters))
def del_filters(self, tracker_type, to_track):
filters = self.get_filters()
for obj_type in filters:
r_tracker.srem(f'trackers:objs:{tracker_type}:{obj_type}', to_track)
r_tracker.srem(f'trackers:uuid:{tracker_type}:{to_track}', f'{self.uuid}:{obj_type}')
r_tracker.hdel(f'tracker:{self.uuid}', 'filters')
def get_tracked(self):
return self._get_field('tracked')
@ -513,6 +520,7 @@ class Tracker:
self._set_mails(mails)
# Filters
self.del_filters(old_type, old_to_track)
if not filters:
filters = {}
for obj_type in get_objects_tracked():
@ -522,9 +530,6 @@ class Tracker:
for obj_type in filters:
r_tracker.sadd(f'trackers:objs:{tracker_type}:{obj_type}', to_track)
r_tracker.sadd(f'trackers:uuid:{tracker_type}:{to_track}', f'{self.uuid}:{obj_type}')
if tracker_type != old_type:
r_tracker.srem(f'trackers:objs:{old_type}:{obj_type}', old_to_track)
r_tracker.srem(f'trackers:uuid:{old_type}:{old_to_track}', f'{self.uuid}:{obj_type}')
# Refresh Trackers
trigger_trackers_refresh(tracker_type)
@ -650,14 +655,14 @@ def get_user_trackers_meta(user_id, tracker_type=None):
metas = []
for tracker_uuid in get_user_trackers(user_id, tracker_type=tracker_type):
tracker = Tracker(tracker_uuid)
metas.append(tracker.get_meta(options={'mails', 'sparkline', 'tags'}))
metas.append(tracker.get_meta(options={'description', 'mails', 'sparkline', 'tags'}))
return metas
def get_global_trackers_meta(tracker_type=None):
metas = []
for tracker_uuid in get_global_trackers(tracker_type=tracker_type):
tracker = Tracker(tracker_uuid)
metas.append(tracker.get_meta(options={'mails', 'sparkline', 'tags'}))
metas.append(tracker.get_meta(options={'description', 'mails', 'sparkline', 'tags'}))
return metas
def get_users_trackers_meta():

View file

@ -247,7 +247,10 @@ class User(UserMixin):
self.id = "__anonymous__"
def exists(self):
return self.id != "__anonymous__"
if self.id == "__anonymous__":
return False
else:
return r_serv_db.exists(f'ail:user:metadata:{self.id}')
# return True or False
# def is_authenticated():
@ -287,3 +290,6 @@ class User(UserMixin):
return True
else:
return False
def get_role(self):
return r_serv_db.hget(f'ail:user:metadata:{self.id}', 'role')

View file

@ -15,8 +15,8 @@ config_loader = ConfigLoader()
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None
AIL_OBJECTS = sorted({'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'favicon', 'item', 'pgp',
'screenshot', 'title', 'username'})
AIL_OBJECTS = sorted({'chat', 'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'etag', 'favicon', 'hhhash', 'item',
'pgp', 'screenshot', 'title', 'user-account', 'username'})
def get_ail_uuid():
ail_uuid = r_serv_db.get('ail:uuid')
@ -38,9 +38,11 @@ def get_all_objects():
return AIL_OBJECTS
def get_objects_with_subtypes():
return ['cryptocurrency', 'pgp', 'username']
return ['chat', 'cryptocurrency', 'pgp', 'username']
def get_object_all_subtypes(obj_type):
if obj_type == 'chat':
return ['discord', 'jabber', 'telegram']
if obj_type == 'cryptocurrency':
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash']
if obj_type == 'pgp':
@ -66,6 +68,14 @@ def get_all_objects_with_subtypes_tuple():
str_objs.append((obj_type, ''))
return str_objs
def unpack_obj_global_id(global_id, r_type='tuple'):
if r_type == 'dict':
obj = global_id.split(':', 2)
return {'type': obj[0], 'subtype': obj[1], 'id': obj['2']}
else: # tuple(type, subtype, id)
return global_id.split(':', 2)
##-- AIL OBJECTS --##
#### Redis ####

View file

@ -15,38 +15,15 @@ config_loader = ConfigLoader()
r_db = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None
BACKGROUND_UPDATES = {
'v1.5': {
'nb_updates': 5,
'message': 'Tags and Screenshots'
},
'v2.4': {
'nb_updates': 1,
'message': ' Domains Tags and Correlations'
},
'v2.6': {
'nb_updates': 1,
'message': 'Domains Tags and Correlations'
},
'v2.7': {
'nb_updates': 1,
'message': 'Domains Tags'
},
'v3.4': {
'nb_updates': 1,
'message': 'Domains Languages'
},
'v3.7': {
'nb_updates': 1,
'message': 'Trackers first_seen/last_seen'
}
}
# # # # # # # #
# #
# UPDATE #
# #
# # # # # # # #
def get_ail_version():
return r_db.get('ail:version')
def get_ail_float_version():
version = get_ail_version()
if version:
@ -55,6 +32,179 @@ def get_ail_float_version():
version = 0
return version
# # # - - # # #
# # # # # # # # # # # #
# #
# UPDATE BACKGROUND #
# #
# # # # # # # # # # # #
BACKGROUND_UPDATES = {
'v5.2': {
'message': 'Compress HAR',
'scripts': ['compress_har.py']
},
}
class AILBackgroundUpdate:
"""
AIL Background Update.
"""
def __init__(self, version):
self.version = version
def _get_field(self, field):
return r_db.hget('ail:update:background', field)
def _set_field(self, field, value):
r_db.hset('ail:update:background', field, value)
def get_version(self):
return self.version
def get_message(self):
return BACKGROUND_UPDATES.get(self.version, {}).get('message', '')
def get_error(self):
return self._get_field('error')
def set_error(self, error): # TODO ADD LOGS
self._set_field('error', error)
def get_nb_scripts(self):
return int(len(BACKGROUND_UPDATES.get(self.version, {}).get('scripts', [''])))
def get_scripts(self):
return BACKGROUND_UPDATES.get(self.version, {}).get('scripts', [])
def get_nb_scripts_done(self):
done = self._get_field('done')
try:
done = int(done)
except (TypeError, ValueError):
done = 0
return done
def inc_nb_scripts_done(self):
self._set_field('done', self.get_nb_scripts_done() + 1)
def get_script(self):
return self._get_field('script')
def get_script_path(self):
path = os.path.basename(self.get_script())
if path:
return os.path.join(os.environ['AIL_HOME'], 'update', self.version, path)
def get_nb_to_update(self): # TODO use cache ?????
nb_to_update = self._get_field('nb_to_update')
if not nb_to_update:
nb_to_update = 1
return int(nb_to_update)
def set_nb_to_update(self, nb):
self._set_field('nb_to_update', int(nb))
def get_nb_updated(self): # TODO use cache ?????
nb_updated = self._get_field('nb_updated')
if not nb_updated:
nb_updated = 0
return int(nb_updated)
def inc_nb_updated(self): # TODO use cache ?????
r_db.hincrby('ail:update:background', 'nb_updated', 1)
def get_progress(self): # TODO use cache ?????
return self._get_field('progress')
def set_progress(self, progress):
self._set_field('progress', progress)
def update_progress(self):
nb_updated = self.get_nb_updated()
nb_to_update = self.get_nb_to_update()
if nb_updated == nb_to_update:
progress = 100
elif nb_updated > nb_to_update:
progress = 99
else:
progress = int((nb_updated * 100) / nb_to_update)
self.set_progress(progress)
print(f'{nb_updated}/{nb_to_update} updated {progress}%')
return progress
def is_running(self):
return r_db.hget('ail:update:background', 'version') == self.version
def get_meta(self, options=set()):
meta = {'version': self.get_version(),
'error': self.get_error(),
'script': self.get_script(),
'script_progress': self.get_progress(),
'nb_update': self.get_nb_scripts(),
'nb_completed': self.get_nb_scripts_done()}
meta['progress'] = int(meta['nb_completed'] * 100 / meta['nb_update'])
if 'message' in options:
meta['message'] = self.get_message()
return meta
def start(self):
self._set_field('version', self.version)
r_db.hdel('ail:update:background', 'error')
def start_script(self, script):
self.clear()
self._set_field('script', script)
self.set_progress(0)
def end_script(self):
self.set_progress(100)
self.inc_nb_scripts_done()
def clear(self):
r_db.hdel('ail:update:background', 'error')
r_db.hdel('ail:update:background', 'progress')
r_db.hdel('ail:update:background', 'nb_updated')
r_db.hdel('ail:update:background', 'nb_to_update')
def end(self):
r_db.delete('ail:update:background')
r_db.srem('ail:updates:background', self.version)
# To Add in update script
def add_background_update(version):
r_db.sadd('ail:updates:background', version)
def is_update_background_running():
return r_db.exists('ail:update:background')
def get_update_background_version():
return r_db.hget('ail:update:background', 'version')
def get_update_background_meta(options=set()):
version = get_update_background_version()
if version:
return AILBackgroundUpdate(version).get_meta(options=options)
else:
return {}
def get_update_background_to_launch():
to_launch = []
updates = r_db.smembers('ail:updates:background')
for version in BACKGROUND_UPDATES:
if version in updates:
to_launch.append(version)
return to_launch
# # # - - # # #
##########################################################################################
##########################################################################################
##########################################################################################
def get_ail_all_updates(date_separator='-'):
dict_update = r_db.hgetall('ail:update_date')
@ -87,111 +237,6 @@ def check_version(version):
return True
#### UPDATE BACKGROUND ####
def exits_background_update_to_launch():
return r_db.scard('ail:update:to_update') != 0
def is_version_in_background_update(version):
return r_db.sismember('ail:update:to_update', version)
def get_all_background_updates_to_launch():
return r_db.smembers('ail:update:to_update')
def get_current_background_update():
return r_db.get('ail:update:update_in_progress')
def get_current_background_update_script():
return r_db.get('ail:update:current_background_script')
def get_current_background_update_script_path(version, script_name):
return os.path.join(os.environ['AIL_HOME'], 'update', version, script_name)
def get_current_background_nb_update_completed():
return r_db.scard('ail:update:update_in_progress:completed')
def get_current_background_update_progress():
progress = r_db.get('ail:update:current_background_script_stat')
if not progress:
progress = 0
return int(progress)
def get_background_update_error():
return r_db.get('ail:update:error')
def add_background_updates_to_launch(version):
return r_db.sadd('ail:update:to_update', version)
def start_background_update(version):
r_db.delete('ail:update:error')
r_db.set('ail:update:update_in_progress', version)
def set_current_background_update_script(script_name):
r_db.set('ail:update:current_background_script', script_name)
r_db.set('ail:update:current_background_script_stat', 0)
def set_current_background_update_progress(progress):
r_db.set('ail:update:current_background_script_stat', progress)
def set_background_update_error(error):
r_db.set('ail:update:error', error)
def end_background_update_script():
r_db.sadd('ail:update:update_in_progress:completed')
def end_background_update(version):
r_db.delete('ail:update:update_in_progress')
r_db.delete('ail:update:current_background_script')
r_db.delete('ail:update:current_background_script_stat')
r_db.delete('ail:update:update_in_progress:completed')
r_db.srem('ail:update:to_update', version)
def clear_background_update():
r_db.delete('ail:update:error')
r_db.delete('ail:update:update_in_progress')
r_db.delete('ail:update:current_background_script')
r_db.delete('ail:update:current_background_script_stat')
r_db.delete('ail:update:update_in_progress:completed')
def get_update_background_message(version):
return BACKGROUND_UPDATES[version]['message']
# TODO: Detect error in subprocess
def get_update_background_metadata():
dict_update = {}
version = get_current_background_update()
if version:
dict_update['version'] = version
dict_update['script'] = get_current_background_update_script()
dict_update['script_progress'] = get_current_background_update_progress()
dict_update['nb_update'] = BACKGROUND_UPDATES[dict_update['version']]['nb_updates']
dict_update['nb_completed'] = get_current_background_nb_update_completed()
dict_update['progress'] = int(dict_update['nb_completed'] * 100 / dict_update['nb_update'])
dict_update['error'] = get_background_update_error()
return dict_update
##-- UPDATE BACKGROUND --##
if __name__ == '__main__':
res = check_version('v3.1..1')
print(res)

View file

@ -41,17 +41,22 @@ config_loader = None
##################################
CORRELATION_TYPES_BY_OBJ = {
"chat": ["user-account"], # message or direct correlation like cve, bitcoin, ... ???
"cookie-name": ["domain"],
"cryptocurrency": ["domain", "item"],
"cve": ["domain", "item"],
"decoded": ["domain", "item"],
"domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "favicon", "item", "pgp", "title", "screenshot", "username"],
"cryptocurrency": ["domain", "item", "message"],
"cve": ["domain", "item", "message"],
"decoded": ["domain", "item", "message"],
"domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
"etag": ["domain"],
"favicon": ["domain", "item"], # TODO Decoded
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"],
"pgp": ["domain", "item"],
"hhhash": ["domain"],
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
"message": ["cve", "cryptocurrency", "decoded", "pgp", "user-account"], # chat ??
"pgp": ["domain", "item", "message"],
"screenshot": ["domain", "item"],
"title": ["domain", "item"],
"username": ["domain", "item"],
"user-account": ["chat", "message"],
"username": ["domain", "item", "message"], # TODO chat-user/account
}
def get_obj_correl_types(obj_type):
@ -63,6 +68,8 @@ def sanityze_obj_correl_types(obj_type, correl_types):
correl_types = set(correl_types).intersection(obj_correl_types)
if not correl_types:
correl_types = obj_correl_types
if not correl_types:
return []
return correl_types
def get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type):
@ -169,18 +176,18 @@ def get_obj_str_id(obj_type, subtype, obj_id):
subtype = ''
return f'{obj_type}:{subtype}:{obj_id}'
def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, flask_context=False):
def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, objs_hidden=set(), flask_context=False):
links = set()
nodes = set()
meta = {'complete': True, 'objs': set()}
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
_get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=filter_types, previous_str_obj='')
_get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj='')
return obj_str_id, nodes, links, meta
def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], previous_str_obj=''):
def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], objs_hidden=set(), previous_str_obj=''):
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
meta['objs'].add(obj_str_id)
nodes.add(obj_str_id)
@ -191,6 +198,10 @@ def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id,
for str_obj in obj_correlations[correl_type]:
subtype2, obj2_id = str_obj.split(':', 1)
obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id)
# filter objects to hide
if obj2_str_id in objs_hidden:
continue
meta['objs'].add(obj2_str_id)
if obj2_str_id == previous_str_obj:
@ -204,5 +215,5 @@ def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id,
if level > 0:
next_level = level - 1
_get_correlations_graph_node(links, nodes, meta, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, previous_str_obj=obj_str_id)
_get_correlations_graph_node(links, nodes, meta, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj=obj_str_id)

View file

@ -39,6 +39,7 @@ from packages import git_status
from packages import Date
from lib.ConfigLoader import ConfigLoader
from lib.objects.Domains import Domain
from lib.objects import HHHashs
from lib.objects.Items import Item
config_loader = ConfigLoader()
@ -134,7 +135,7 @@ def unpack_url(url):
# # # # # # # # TODO CREATE NEW OBJECT
def get_favicon_from_html(html, domain, url):
favicon_urls = extract_favicon_from_html(html, url)
favicon_urls, favicons = extract_favicon_from_html(html, url)
# add root favicon
if not favicon_urls:
favicon_urls.add(f'{urlparse(url).scheme}://{domain}/favicon.ico')
@ -162,7 +163,6 @@ def extract_favicon_from_html(html, url):
# - <meta name="msapplication-TileColor" content="#aaaaaa"> <meta name="theme-color" content="#ffffff">
# - <meta name="msapplication-config" content="/icons/browserconfig.xml">
# Root Favicon
f = get_faup()
f.decode(url)
@ -244,13 +244,6 @@ def extract_description_from_html(html):
return description['content']
return ''
def extract_description_from_html(html):
soup = BeautifulSoup(html, 'html.parser')
description = soup.find('meta', attrs={'name': 'description'})
if description:
return description['content']
return ''
def extract_keywords_from_html(html):
soup = BeautifulSoup(html, 'html.parser')
keywords = soup.find('meta', attrs={'name': 'keywords'})
@ -264,6 +257,7 @@ def extract_author_from_html(html):
if keywords:
return keywords['content']
return ''
# # # - - # # #
@ -275,7 +269,7 @@ def extract_author_from_html(html):
def create_har_id(date, item_id):
item_id = item_id.split('/')[-1]
return os.path.join(date, f'{item_id}.json')
return os.path.join(date, f'{item_id}.json.gz')
def save_har(har_id, har_content):
# create dir
@ -284,8 +278,8 @@ def save_har(har_id, har_content):
os.makedirs(har_dir)
# save HAR
filename = os.path.join(get_har_dir(), har_id)
with open(filename, 'w') as f:
f.write(json.dumps(har_content))
with gzip.open(filename, 'wb') as f:
f.write(json.dumps(har_content).encode())
def get_all_har_ids():
har_ids = []
@ -299,9 +293,10 @@ def get_all_har_ids():
except (TypeError, ValueError):
pass
for file in [f for f in os.listdir(today_root_dir) if os.path.isfile(os.path.join(today_root_dir, f))]:
har_id = os.path.relpath(os.path.join(today_root_dir, file), HAR_DIR)
har_ids.append(har_id)
if os.path.exists(today_root_dir):
for file in [f for f in os.listdir(today_root_dir) if os.path.isfile(os.path.join(today_root_dir, f))]:
har_id = os.path.relpath(os.path.join(today_root_dir, file), HAR_DIR)
har_ids.append(har_id)
for ydir in sorted(dirs_year, reverse=False):
search_dear = os.path.join(HAR_DIR, ydir)
@ -312,14 +307,17 @@ def get_all_har_ids():
har_ids.append(har_id)
return har_ids
def extract_cookies_names_from_har_by_har_id(har_id):
def get_har_content(har_id):
har_path = os.path.join(HAR_DIR, har_id)
with open(har_path) as f:
try:
har_content = json.loads(f.read())
except json.decoder.JSONDecodeError:
har_content = {}
return extract_cookies_names_from_har(har_content)
try:
with gzip.open(har_path) as f:
try:
return json.loads(f.read())
except json.decoder.JSONDecodeError:
return {}
except Exception as e:
print(e) # TODO LOGS
return {}
def extract_cookies_names_from_har(har):
cookies = set()
@ -334,17 +332,110 @@ def extract_cookies_names_from_har(har):
cookies.add(name)
return cookies
def _reprocess_all_hars():
def _reprocess_all_hars_cookie_name():
from lib.objects import CookiesNames
for har_id in get_all_har_ids():
domain = har_id.split('/')[-1]
domain = domain[:-41]
domain = domain[:-44]
date = har_id.split('/')
date = f'{date[-4]}{date[-3]}{date[-2]}'
for cookie_name in extract_cookies_names_from_har_by_har_id(har_id):
for cookie_name in extract_cookies_names_from_har(get_har_content(har_id)):
print(domain, date, cookie_name)
cookie = CookiesNames.create(cookie_name)
cookie.add(date, domain)
cookie.add(date, Domain(domain))
def extract_etag_from_har(har): # TODO check response url
etags = set()
for entrie in har.get('log', {}).get('entries', []):
for header in entrie.get('response', {}).get('headers', []):
if header.get('name') == 'etag':
# print(header)
etag = header.get('value')
if etag:
etags.add(etag)
return etags
def _reprocess_all_hars_etag():
from lib.objects import Etags
for har_id in get_all_har_ids():
domain = har_id.split('/')[-1]
domain = domain[:-44]
date = har_id.split('/')
date = f'{date[-4]}{date[-3]}{date[-2]}'
for etag_content in extract_etag_from_har(get_har_content(har_id)):
print(domain, date, etag_content)
etag = Etags.create(etag_content)
etag.add(date, Domain(domain))
def extract_hhhash_by_id(har_id, domain, date):
return extract_hhhash(get_har_content(har_id), domain, date)
def extract_hhhash(har, domain, date):
hhhashs = set()
urls = set()
for entrie in har.get('log', {}).get('entries', []):
url = entrie.get('request').get('url')
if url not in urls:
# filter redirect
if entrie.get('response').get('status') == 200: # != 301:
# print(url, entrie.get('response').get('status'))
f = get_faup()
f.decode(url)
domain_url = f.get().get('domain')
if domain_url == domain:
headers = entrie.get('response').get('headers')
hhhash_header = HHHashs.build_hhhash_headers(headers)
hhhash = HHHashs.hhhash_headers(hhhash_header)
if hhhash not in hhhashs:
print('', url, hhhash)
# -----
obj = HHHashs.create(hhhash_header, hhhash)
obj.add(date, Domain(domain))
hhhashs.add(hhhash)
urls.add(url)
print()
print()
print('HHHASH:')
for hhhash in hhhashs:
print(hhhash)
return hhhashs
def _reprocess_all_hars_hhhashs():
for har_id in get_all_har_ids():
print()
print(har_id)
domain = har_id.split('/')[-1]
domain = domain[:-44]
date = har_id.split('/')
date = f'{date[-4]}{date[-3]}{date[-2]}'
extract_hhhash_by_id(har_id, domain, date)
def _gzip_har(har_id):
har_path = os.path.join(HAR_DIR, har_id)
new_id = f'{har_path}.gz'
if not har_id.endswith('.gz'):
if not os.path.exists(new_id):
with open(har_path, 'rb') as f:
content = f.read()
if content:
with gzip.open(new_id, 'wb') as f:
r = f.write(content)
print(r)
if os.path.exists(new_id) and os.path.exists(har_path):
os.remove(har_path)
print('delete:', har_path)
def _gzip_all_hars():
for har_id in get_all_har_ids():
_gzip_har(har_id)
# # # - - # # #
@ -662,8 +753,7 @@ class Cookie:
meta[field] = value
if r_json:
data = json.dumps(meta, indent=4, sort_keys=True)
meta = {'data': data}
meta['uuid'] = self.uuid
meta = {'data': data, 'uuid': self.uuid}
return meta
def edit(self, cookie_dict):
@ -775,7 +865,7 @@ def unpack_imported_json_cookie(json_cookie):
## - - ##
#### COOKIEJAR API ####
def api_import_cookies_from_json(user_id, cookiejar_uuid, json_cookies_str): # # TODO: add catch
def api_import_cookies_from_json(user_id, cookiejar_uuid, json_cookies_str): # # TODO: add catch
resp = api_verify_cookiejar_acl(cookiejar_uuid, user_id)
if resp:
return resp
@ -944,8 +1034,8 @@ class CrawlerScheduler:
minutes = 0
current_time = datetime.now().timestamp()
time_next_run = (datetime.now() + relativedelta(months=int(months), weeks=int(weeks),
days=int(days), hours=int(hours),
minutes=int(minutes))).timestamp()
days=int(days), hours=int(hours),
minutes=int(minutes))).timestamp()
# Make sure the next capture is not scheduled for in a too short interval
interval_next_capture = time_next_run - current_time
if interval_next_capture < self.min_frequency:
@ -1225,8 +1315,13 @@ class CrawlerCapture:
if task_uuid:
return CrawlerTask(task_uuid)
def get_start_time(self):
return self.get_task().get_start_time()
def get_start_time(self, r_str=True):
start_time = self.get_task().get_start_time()
if r_str:
return start_time
else:
start_time = datetime.strptime(start_time, "%Y/%m/%d - %H:%M.%S").timestamp()
return int(start_time)
def get_status(self):
status = r_cache.hget(f'crawler:capture:{self.uuid}', 'status')
@ -1239,7 +1334,8 @@ class CrawlerCapture:
def create(self, task_uuid):
if self.exists():
raise Exception(f'Error: Capture {self.uuid} already exists')
print(f'Capture {self.uuid} already exists') # TODO LOGS
return None
launch_time = int(time.time())
r_crawler.hset(f'crawler:task:{task_uuid}', 'capture', self.uuid)
r_crawler.hset('crawler:captures:tasks', self.uuid, task_uuid)
@ -1492,6 +1588,11 @@ class CrawlerTask:
def start(self):
self._set_field('start_time', datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
def reset(self):
priority = 49
r_crawler.hdel(f'crawler:task:{self.uuid}', 'start_time')
self.add_to_db_crawler_queue(priority)
# Crawler
def remove(self): # zrem cache + DB
capture_uuid = self.get_capture()
@ -1622,14 +1723,16 @@ def api_add_crawler_task(data, user_id=None):
if frequency:
# TODO verify user
return create_schedule(frequency, user_id, url, depth=depth_limit, har=har, screenshot=screenshot, header=None,
cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags), 200
task_uuid = create_schedule(frequency, user_id, url, depth=depth_limit, har=har, screenshot=screenshot, header=None,
cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags)
else:
# TODO HEADERS
# TODO USER AGENT
return create_task(url, depth=depth_limit, har=har, screenshot=screenshot, header=None,
task_uuid = create_task(url, depth=depth_limit, har=har, screenshot=screenshot, header=None,
cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags,
parent='manual', priority=90), 200
parent='manual', priority=90)
return {'uuid': task_uuid}, 200
#### ####
@ -1702,13 +1805,13 @@ class CrawlerProxy:
self.uuid = proxy_uuid
def get_description(self):
return r_crawler.hgrt(f'crawler:proxy:{self.uuif}', 'description')
return r_crawler.hget(f'crawler:proxy:{self.uuid}', 'description')
# Host
# Port
# Type -> need test
def get_url(self):
return r_crawler.hgrt(f'crawler:proxy:{self.uuif}', 'url')
return r_crawler.hget(f'crawler:proxy:{self.uuid}', 'url')
#### CRAWLER LACUS ####
@ -1770,7 +1873,11 @@ def ping_lacus():
ping = False
req_error = {'error': 'Lacus URL undefined', 'status_code': 400}
else:
ping = lacus.is_up
try:
ping = lacus.is_up
except:
req_error = {'error': 'Failed to connect Lacus URL', 'status_code': 400}
ping = False
update_lacus_connection_status(ping, req_error=req_error)
return ping
@ -1787,7 +1894,7 @@ def api_save_lacus_url_key(data):
# unpack json
manager_url = data.get('url', None)
api_key = data.get('api_key', None)
if not manager_url: # or not api_key:
if not manager_url: # or not api_key:
return {'status': 'error', 'reason': 'No url or API key supplied'}, 400
# check if is valid url
try:
@ -1830,7 +1937,7 @@ def api_set_crawler_max_captures(data):
save_nb_max_captures(nb_captures)
return nb_captures, 200
## TEST ##
## TEST ##
def is_test_ail_crawlers_successful():
return r_db.hget('crawler:tor:test', 'success') == 'True'
@ -1903,14 +2010,16 @@ def test_ail_crawlers():
# TODO MOVE ME IN CRAWLER OR FLASK
load_blacklist()
# if __name__ == '__main__':
# delete_captures()
# item_id = 'crawled/2023/02/20/data.gz'
# item = Item(item_id)
# content = item.get_content()
# temp_url = ''
# r = extract_favicon_from_html(content, temp_url)
# print(r)
# _reprocess_all_hars()
if __name__ == '__main__':
# delete_captures()
# item_id = 'crawled/2023/02/20/data.gz'
# item = Item(item_id)
# content = item.get_content()
# temp_url = ''
# r = extract_favicon_from_html(content, temp_url)
# print(r)
# _reprocess_all_hars_cookie_name()
# _reprocess_all_hars_etag()
# _gzip_all_hars()
_reprocess_all_hars_hhhashs()

View file

@ -129,7 +129,7 @@ def get_item_url(item_id):
def get_item_har(item_id):
har = '/'.join(item_id.rsplit('/')[-4:])
har = f'{har}.json'
har = f'{har}.json.gz'
path = os.path.join(ConfigLoader.get_hars_dir(), har)
if os.path.isfile(path):
return har

View file

@ -104,9 +104,13 @@ def _get_word_regex(word):
def convert_byte_offset_to_string(b_content, offset):
byte_chunk = b_content[:offset + 1]
string_chunk = byte_chunk.decode()
offset = len(string_chunk) - 1
return offset
try:
string_chunk = byte_chunk.decode()
offset = len(string_chunk) - 1
return offset
except UnicodeDecodeError as e:
logger.error(f'Yara offset converter error, {str(e)}\n{offset}/{len(b_content)}')
return convert_byte_offset_to_string(b_content, offset - 1)
# TODO RETRO HUNTS

309
bin/lib/objects/Chats.py Executable file
View file

@ -0,0 +1,309 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
from datetime import datetime
from flask import url_for
# from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ail_core
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
from lib.data_retention_engine import update_obj_date
from lib.objects import ail_objects
from lib.timeline_engine import Timeline
from lib.correlations_engine import get_correlation_by_correl_type
config_loader = ConfigLoader()
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
r_object = config_loader.get_db_conn("Kvrocks_Objects")
r_cache = config_loader.get_redis_conn("Redis_Cache")
config_loader = None
################################################################################
################################################################################
################################################################################
class Chat(AbstractSubtypeObject): # TODO # ID == username ?????
"""
AIL Chat Object. (strings)
"""
def __init__(self, id, subtype):
super(Chat, self).__init__('chat', id, subtype)
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True),
# 'compress': 'gzip'}
# return payload
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
# # TODO:
pass
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
return url
def get_svg_icon(self): # TODO
# if self.subtype == 'telegram':
# style = 'fab'
# icon = '\uf2c6'
# elif self.subtype == 'discord':
# style = 'fab'
# icon = '\uf099'
# else:
# style = 'fas'
# icon = '\uf007'
style = 'fas'
icon = '\uf086'
return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius': 5}
def get_meta(self, options=set()):
meta = self._get_meta(options=options)
meta['id'] = self.id
meta['subtype'] = self.subtype
meta['tags'] = self.get_tags(r_list=True)
return meta
def get_misp_object(self):
# obj_attrs = []
# if self.subtype == 'telegram':
# obj = MISPObject('telegram-account', standalone=True)
# obj_attrs.append(obj.add_attribute('username', value=self.id))
#
# elif self.subtype == 'twitter':
# obj = MISPObject('twitter-account', standalone=True)
# obj_attrs.append(obj.add_attribute('name', value=self.id))
#
# else:
# obj = MISPObject('user-account', standalone=True)
# obj_attrs.append(obj.add_attribute('username', value=self.id))
#
# first_seen = self.get_first_seen()
# last_seen = self.get_last_seen()
# if first_seen:
# obj.first_seen = first_seen
# if last_seen:
# obj.last_seen = last_seen
# if not first_seen or not last_seen:
# self.logger.warning(
# f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
#
# for obj_attr in obj_attrs:
# for tag in self.get_tags():
# obj_attr.add_tag(tag)
# return obj
return
############################################################################
############################################################################
# others optional metas, ... -> # TODO ALL meta in hset
def get_name(self): # get username ????
pass
# users that send at least a message else participants/spectator
# correlation created by messages
def get_users(self):
users = set()
accounts = self.get_correlation('user-account').get('user-account', [])
for account in accounts:
users.add(account[1:])
return users
def _get_timeline_username(self):
return Timeline(self.get_global_id(), 'username')
def get_username(self):
return self._get_timeline_username().get_last_obj_id()
def get_usernames(self):
return self._get_timeline_username().get_objs_ids()
def update_username_timeline(self, username_global_id, timestamp):
self._get_timeline_username().add_timestamp(timestamp, username_global_id)
# def get_last_message_id(self):
#
# return r_object.hget(f'meta:{self.type}:{self.subtype}:{self.id}', 'last:message:id')
def get_obj_message_id(self, obj_id):
if obj_id.endswith('.gz'):
obj_id = obj_id[:-3]
return int(obj_id.split('_')[-1])
def _get_message_timestamp(self, obj_global_id):
return r_object.zscore(f'messages:{self.type}:{self.subtype}:{self.id}', obj_global_id)
def _get_messages(self):
return r_object.zrange(f'messages:{self.type}:{self.subtype}:{self.id}', 0, -1, withscores=True)
def get_message_meta(self, obj_global_id, parent=True, mess_datetime=None):
obj = ail_objects.get_obj_from_global_id(obj_global_id)
mess_dict = obj.get_meta(options={'content', 'link', 'parent', 'user-account'})
if mess_dict.get('parent') and parent:
mess_dict['reply_to'] = self.get_message_meta(mess_dict['parent'], parent=False)
if mess_dict.get('user-account'):
user_account = ail_objects.get_obj_from_global_id(mess_dict['user-account'])
mess_dict['user-account'] = {}
mess_dict['user-account']['type'] = user_account.get_type()
mess_dict['user-account']['subtype'] = user_account.get_subtype(r_str=True)
mess_dict['user-account']['id'] = user_account.get_id()
username = user_account.get_username()
if username:
username = ail_objects.get_obj_from_global_id(username).get_default_meta(link=False)
mess_dict['user-account']['username'] = username # TODO get username at the given timestamp ???
else:
mess_dict['user-account']['id'] = 'UNKNOWN'
if not mess_datetime:
obj_mess_id = self._get_message_timestamp(obj_global_id)
mess_datetime = datetime.fromtimestamp(obj_mess_id)
mess_dict['date'] = mess_datetime.isoformat(' ')
mess_dict['hour'] = mess_datetime.strftime('%H:%M:%S')
return mess_dict
def get_messages(self, start=0, page=1, nb=500): # TODO limit nb returned, # TODO add replies
start = 0
stop = -1
# r_object.delete(f'messages:{self.type}:{self.subtype}:{self.id}')
# TODO chat without username ???? -> chat ID ????
messages = {}
curr_date = None
for message in self._get_messages():
date = datetime.fromtimestamp(message[1])
date_day = date.strftime('%Y/%m/%d')
if date_day != curr_date:
messages[date_day] = []
curr_date = date_day
mess_dict = self.get_message_meta(message[0], parent=True, mess_datetime=date)
messages[date_day].append(mess_dict)
return messages
# Zset with ID ??? id -> item id ??? multiple id == media + text
# id -> media id
# How do we handle reply/thread ??? -> separate with new chats name/id ZSET ???
# Handle media ???
# list of message id -> obj_id
# list of obj_id ->
# abuse parent children ???
# def add(self, timestamp, obj_id, mess_id=0, username=None, user_id=None):
# date = # TODO get date from object
# self.update_daterange(date)
# update_obj_date(date, self.type, self.subtype)
#
#
# # daily
# r_object.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1)
# # all subtypes
# r_object.zincrby(f'{self.type}_all:{self.subtype}', 1, self.id)
#
# #######################################################################
# #######################################################################
#
# # Correlations
# self.add_correlation('item', '', item_id)
# # domain
# if is_crawled(item_id):
# domain = get_item_domain(item_id)
# self.add_correlation('domain', '', domain)
# TODO kvrocks exception if key don't exists
def get_obj_by_message_id(self, mess_id):
return r_object.hget(f'messages:ids:{self.type}:{self.subtype}:{self.id}', mess_id)
# importer -> use cache for previous reply SET to_add_id: previously_imported : expire SET key -> 30 mn
def add_message(self, obj_global_id, timestamp, mess_id, reply_id=None):
r_object.hset(f'messages:ids:{self.type}:{self.subtype}:{self.id}', mess_id, obj_global_id)
r_object.zadd(f'messages:{self.type}:{self.subtype}:{self.id}', {obj_global_id: timestamp})
if reply_id:
reply_obj = self.get_obj_by_message_id(reply_id)
if reply_obj:
self.add_obj_children(reply_obj, obj_global_id)
else:
self.add_message_cached_reply(reply_id, mess_id)
# ADD cached replies
for reply_obj in self.get_cached_message_reply(mess_id):
self.add_obj_children(obj_global_id, reply_obj)
def _get_message_cached_reply(self, message_id):
return r_cache.smembers(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{message_id}')
def get_cached_message_reply(self, message_id):
objs_global_id = []
for mess_id in self._get_message_cached_reply(message_id):
obj_global_id = self.get_obj_by_message_id(mess_id)
if obj_global_id:
objs_global_id.append(obj_global_id)
return objs_global_id
def add_message_cached_reply(self, reply_to_id, message_id):
r_cache.sadd(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{reply_to_id}', message_id)
r_cache.expire(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{reply_to_id}', 600)
# TODO nb replies = nb son ???? what if it create a onion item ??? -> need source filtering
# TODO factorize
def get_all_subtypes():
return ail_core.get_object_all_subtypes('chat')
def get_all():
objs = {}
for subtype in get_all_subtypes():
objs[subtype] = get_all_by_subtype(subtype)
return objs
def get_all_by_subtype(subtype):
return get_all_id('chat', subtype)
# # TODO FILTER NAME + Key + mail
# def sanitize_username_name_to_search(name_to_search, subtype): # TODO FILTER NAME
#
# return name_to_search
#
# def search_usernames_by_name(name_to_search, subtype, r_pos=False):
# usernames = {}
# # for subtype in subtypes:
# r_name = sanitize_username_name_to_search(name_to_search, subtype)
# if not name_to_search or isinstance(r_name, dict):
# # break
# return usernames
# r_name = re.compile(r_name)
# for user_name in get_all_usernames_by_subtype(subtype):
# res = re.search(r_name, user_name)
# if res:
# usernames[user_name] = {}
# if r_pos:
# usernames[user_name]['hl-start'] = res.start()
# usernames[user_name]['hl-end'] = res.end()
# return usernames
if __name__ == '__main__':
chat = Chat('test', 'telegram')
r = chat.get_messages()
print(r)

View file

@ -138,7 +138,7 @@ class Decoded(AbstractDaterangeObject):
with open(filepath, 'rb') as f:
content = f.read()
return content
elif r_str == 'bytesio':
elif r_type == 'bytesio':
with open(filepath, 'rb') as f:
content = BytesIO(f.read())
return content
@ -149,7 +149,7 @@ class Decoded(AbstractDaterangeObject):
with zipfile.ZipFile(zip_content, "w") as zf:
# TODO: Fix password
# zf.setpassword(b"infected")
zf.writestr(self.id, self.get_content().getvalue())
zf.writestr(self.id, self.get_content(r_type='bytesio').getvalue())
zip_content.seek(0)
return zip_content

View file

@ -389,10 +389,10 @@ class Domain(AbstractObject):
har = get_item_har(item_id)
if har:
print(har)
_write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json')
_write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json.gz')
# Screenshot
screenshot = self._get_external_correlation('item', '', item_id, 'screenshot')
if screenshot:
if screenshot and screenshot['screenshot']:
screenshot = screenshot['screenshot'].pop()[1:]
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8],
screenshot[8:10], screenshot[10:12], screenshot[12:])
@ -595,21 +595,22 @@ def get_domains_up_by_filers(domain_types, date_from=None, date_to=None, tags=[]
return None
def sanitize_domain_name_to_search(name_to_search, domain_type):
if not name_to_search:
return ""
if domain_type == 'onion':
r_name = r'[a-z0-9\.]+'
else:
r_name = r'[a-zA-Z0-9-_\.]+'
# invalid domain name
if not re.fullmatch(r_name, name_to_search):
res = re.match(r_name, name_to_search)
return {'search': name_to_search, 'error': res.string.replace( res[0], '')}
return ""
return name_to_search.replace('.', '\.')
def search_domain_by_name(name_to_search, domain_types, r_pos=False):
domains = {}
for domain_type in domain_types:
r_name = sanitize_domain_name_to_search(name_to_search, domain_type)
if not name_to_search or isinstance(r_name, dict):
if not r_name:
break
r_name = re.compile(r_name)
for domain in get_domains_up_by_type(domain_type):

121
bin/lib/objects/Etags.py Executable file
View file

@ -0,0 +1,121 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
from hashlib import sha256
from flask import url_for
from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
config_loader = ConfigLoader()
r_objects = config_loader.get_db_conn("Kvrocks_Objects")
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None
# TODO NEW ABSTRACT OBJECT -> daterange for all objects ????
class Etag(AbstractDaterangeObject):
"""
AIL Etag Object.
"""
def __init__(self, obj_id):
super(Etag, self).__init__('etag', obj_id)
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True),
# 'compress': 'gzip'}
# return payload
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
# # TODO:
pass
def get_content(self, r_type='str'):
if r_type == 'str':
return self._get_field('content')
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url
# TODO # CHANGE COLOR
def get_svg_icon(self):
return {'style': 'fas', 'icon': '\uf02b', 'color': '#556F65', 'radius': 5}
def get_misp_object(self):
obj_attrs = []
obj = MISPObject('etag')
first_seen = self.get_first_seen()
last_seen = self.get_last_seen()
if first_seen:
obj.first_seen = first_seen
if last_seen:
obj.last_seen = last_seen
if not first_seen or not last_seen:
self.logger.warning(
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
obj_attrs.append(obj.add_attribute('etag', value=self.get_content()))
for obj_attr in obj_attrs:
for tag in self.get_tags():
obj_attr.add_tag(tag)
return obj
def get_nb_seen(self):
return self.get_nb_correlation('domain')
def get_meta(self, options=set()):
meta = self._get_meta(options=options)
meta['id'] = self.id
meta['tags'] = self.get_tags(r_list=True)
meta['content'] = self.get_content()
return meta
def add(self, date, obj_id): # date = HAR Date
self._add(date, 'domain', '', obj_id)
def create(self, content, _first_seen=None, _last_seen=None):
if not isinstance(content, str):
content = content.decode()
self._set_field('content', content)
self._create()
def create(content):
if isinstance(content, str):
content = content.encode()
obj_id = sha256(content).hexdigest()
etag = Etag(obj_id)
if not etag.exists():
etag.create(content)
return etag
class Etags(AbstractDaterangeObjects):
"""
Etags Objects
"""
def __init__(self):
super().__init__('etag', Etag)
def sanitize_id_to_search(self, name_to_search):
return name_to_search # TODO
# if __name__ == '__main__':
# name_to_search = '98'
# print(search_cves_by_name(name_to_search))

138
bin/lib/objects/HHHashs.py Executable file
View file

@ -0,0 +1,138 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import hashlib
import os
import sys
from flask import url_for
from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
config_loader = ConfigLoader()
r_objects = config_loader.get_db_conn("Kvrocks_Objects")
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None
class HHHash(AbstractDaterangeObject):
"""
AIL HHHash Object.
"""
def __init__(self, obj_id):
super(HHHash, self).__init__('hhhash', obj_id)
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True),
# 'compress': 'gzip'}
# return payload
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
# # TODO:
pass
def get_content(self, r_type='str'):
if r_type == 'str':
return self._get_field('content')
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url
# TODO # CHANGE COLOR
def get_svg_icon(self):
return {'style': 'fas', 'icon': '\uf036', 'color': '#71D090', 'radius': 5}
def get_misp_object(self):
obj_attrs = []
obj = MISPObject('hhhash')
first_seen = self.get_first_seen()
last_seen = self.get_last_seen()
if first_seen:
obj.first_seen = first_seen
if last_seen:
obj.last_seen = last_seen
if not first_seen or not last_seen:
self.logger.warning(
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
obj_attrs.append(obj.add_attribute('hhhash', value=self.get_id()))
obj_attrs.append(obj.add_attribute('hhhash-headers', value=self.get_content()))
obj_attrs.append(obj.add_attribute('hhhash-tool', value='lacus'))
for obj_attr in obj_attrs:
for tag in self.get_tags():
obj_attr.add_tag(tag)
return obj
def get_nb_seen(self):
return self.get_nb_correlation('domain')
def get_meta(self, options=set()):
meta = self._get_meta(options=options)
meta['id'] = self.id
meta['tags'] = self.get_tags(r_list=True)
meta['content'] = self.get_content()
return meta
def add(self, date, obj_id): # date = HAR Date
self._add(date, 'domain', '', obj_id)
def create(self, hhhash_header, _first_seen=None, _last_seen=None): # TODO CREATE ADD FUNCTION -> urls set
self._set_field('content', hhhash_header)
self._create()
def create(hhhash_header, hhhash=None):
if not hhhash:
hhhash = hhhash_headers(hhhash_header)
hhhash = HHHash(hhhash)
if not hhhash.exists():
hhhash.create(hhhash_header)
return hhhash
def build_hhhash_headers(dict_headers): # filter_dup=True
hhhash = ''
previous_header = ''
for header in dict_headers:
header_name = header.get('name')
if header_name:
if header_name != previous_header: # remove dup headers, filter playwright invalid splitting
hhhash = f'{hhhash}:{header_name}'
previous_header = header_name
hhhash = hhhash[1:]
# print(hhhash)
return hhhash
def hhhash_headers(header_hhhash):
m = hashlib.sha256()
m.update(header_hhhash.encode())
digest = m.hexdigest()
return f"hhh:1:{digest}"
class HHHashs(AbstractDaterangeObjects):
"""
HHHashs Objects
"""
def __init__(self):
super().__init__('hhhash', HHHash)
def sanitize_id_to_search(self, name_to_search):
return name_to_search # TODO
# if __name__ == '__main__':
# name_to_search = '98'
# print(search_cves_by_name(name_to_search))

View file

@ -264,10 +264,9 @@ class Item(AbstractObject):
"""
if options is None:
options = set()
meta = {'id': self.id,
'date': self.get_date(separator=True),
'source': self.get_source(),
'tags': self.get_tags(r_list=True)}
meta = self.get_default_meta(tags=True)
meta['date'] = self.get_date(separator=True)
meta['source'] = self.get_source()
# optional meta fields
if 'content' in options:
meta['content'] = self.get_content()
@ -289,6 +288,8 @@ class Item(AbstractObject):
meta['mimetype'] = self.get_mimetype(content=content)
if 'investigations' in options:
meta['investigations'] = self.get_investigations()
if 'link' in options:
meta['link'] = self.get_link(flask_context=True)
# meta['encoding'] = None
return meta

275
bin/lib/objects/Messages.py Executable file
View file

@ -0,0 +1,275 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import re
import sys
import cld3
import html2text
from datetime import datetime
from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ail_core import get_ail_uuid
from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader
from lib.data_retention_engine import update_obj_date, get_obj_date_first
# TODO Set all messages ???
from flask import url_for
config_loader = ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")
r_object = config_loader.get_db_conn("Kvrocks_Objects")
# r_content = config_loader.get_db_conn("Kvrocks_Content")
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None
# TODO SAVE OR EXTRACT MESSAGE SOURCE FOR ICON ?????????
# TODO iterate on all objects
# TODO also add support for small objects ????
# CAN Message exists without CHAT -> no convert it to object
# ID: source:chat_id:message_id ????
#
# /!\ handle null chat and message id -> chat = uuid and message = timestamp ???
class Message(AbstractObject):
"""
AIL Message Object. (strings)
"""
def __init__(self, id): # TODO subtype or use source ????
super(Message, self).__init__('message', id) # message::< telegram/1692189934.380827/ChatID_MessageID >
def exists(self):
if self.subtype is None:
return r_object.exists(f'meta:{self.type}:{self.id}')
else:
return r_object.exists(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}')
def get_source(self):
"""
Returns source/feeder name
"""
l_source = self.id.split('/')[:-2]
return os.path.join(*l_source)
def get_basename(self):
return os.path.basename(self.id)
def get_content(self, r_type='str'): # TODO ADD cache # TODO Compress content ???????
"""
Returns content
"""
content = self._get_field('content')
if r_type == 'str':
return content
elif r_type == 'bytes':
return content.encode()
def get_date(self):
timestamp = self.get_timestamp()
return datetime.fromtimestamp(float(timestamp)).strftime('%Y%m%d')
def get_timestamp(self):
dirs = self.id.split('/')
return dirs[-2]
def get_message_id(self): # TODO optimize
message_id = self.get_basename().rsplit('_', 1)[1]
# if message_id.endswith('.gz'):
# message_id = message_id[:-3]
return message_id
def get_chat_id(self): # TODO optimize -> use me to tag Chat
chat_id = self.get_basename().rsplit('_', 1)[0]
# if chat_id.endswith('.gz'):
# chat_id = chat_id[:-3]
return chat_id
def get_user_account(self):
user_account = self.get_correlation('user-account')
if user_account.get('user-account'):
return f'user-account:{user_account["user-account"].pop()}'
# Update value on import
# reply to -> parent ?
# reply/comment - > children ?
# nb views
# reactions
# nb fowards
# room ???
# message from channel ???
# message media
def get_translation(self): # TODO support multiple translated languages ?????
"""
Returns translated content
"""
return self._get_field('translated') # TODO multiples translation ... -> use set
def _set_translation(self, translation):
"""
Set translated content
"""
return self._set_field('translated', translation) # translation by hash ??? -> avoid translating multiple time
def get_html2text_content(self, content=None, ignore_links=False):
if not content:
content = self.get_content()
h = html2text.HTML2Text()
h.ignore_links = ignore_links
h.ignore_images = ignore_links
return h.handle(content)
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True)}
# return payload
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url
def get_svg_icon(self):
return {'style': 'fas', 'icon': '\uf4ad', 'color': '#4dffff', 'radius': 5}
def get_misp_object(self): # TODO
obj = MISPObject('instant-message', standalone=True)
obj_date = self.get_date()
if obj_date:
obj.first_seen = obj_date
else:
self.logger.warning(
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={obj_date}')
# obj_attrs = [obj.add_attribute('first-seen', value=obj_date),
# obj.add_attribute('raw-data', value=self.id, data=self.get_raw_content()),
# obj.add_attribute('sensor', value=get_ail_uuid())]
obj_attrs = []
for obj_attr in obj_attrs:
for tag in self.get_tags():
obj_attr.add_tag(tag)
return obj
# def get_url(self):
# return r_object.hget(f'meta:item::{self.id}', 'url')
# options: set of optional meta fields
def get_meta(self, options=None):
"""
:type options: set
"""
if options is None:
options = set()
meta = self.get_default_meta(tags=True)
meta['date'] = self.get_date() # TODO replace me by timestamp ??????
meta['source'] = self.get_source()
# optional meta fields
if 'content' in options:
meta['content'] = self.get_content()
if 'parent' in options:
meta['parent'] = self.get_parent()
if 'investigations' in options:
meta['investigations'] = self.get_investigations()
if 'link' in options:
meta['link'] = self.get_link(flask_context=True)
if 'user-account' in options:
meta['user-account'] = self.get_user_account()
# meta['encoding'] = None
return meta
def _languages_cleaner(self, content=None):
if not content:
content = self.get_content()
# REMOVE URLS
regex = r'\b(?:http://|https://)?(?:[a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*\b'
url_regex = re.compile(regex)
urls = url_regex.findall(content)
urls = sorted(urls, key=len, reverse=True)
for url in urls:
content = content.replace(url, '')
# REMOVE PGP Blocks
regex_pgp_public_blocs = r'-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----'
regex_pgp_signature = r'-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----'
regex_pgp_message = r'-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----'
re.compile(regex_pgp_public_blocs)
re.compile(regex_pgp_signature)
re.compile(regex_pgp_message)
res = re.findall(regex_pgp_public_blocs, content)
for it in res:
content = content.replace(it, '')
res = re.findall(regex_pgp_signature, content)
for it in res:
content = content.replace(it, '')
res = re.findall(regex_pgp_message, content)
for it in res:
content = content.replace(it, '')
return content
def detect_languages(self, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7):
languages = []
## CLEAN CONTENT ##
content = self.get_html2text_content(ignore_links=True)
content = self._languages_cleaner(content=content)
# REMOVE USELESS SPACE
content = ' '.join(content.split())
# - CLEAN CONTENT - #
if len(content) >= min_len:
for lang in cld3.get_frequent_languages(content, num_langs=num_langs):
if lang.proportion >= min_proportion and lang.probability >= min_probability and lang.is_reliable:
languages.append(lang)
return languages
# def translate(self, content=None): # TODO translation plugin
# # TODO get text language
# if not content:
# content = self.get_content()
# translated = argostranslate.translate.translate(content, 'ru', 'en')
# # Save translation
# self._set_translation(translated)
# return translated
def create(self, content, translation, tags):
self._set_field('content', content)
# r_content.get(f'content:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', content)
if translation:
self._set_translation(translation)
for tag in tags:
self.add_tag(tag)
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
pass
def create_obj_id(source, chat_id, message_id, timestamp):
return f'{source}/{timestamp}/{chat_id}_{message_id}'
# TODO Check if already exists
# def create(source, chat_id, message_id, timestamp, content, tags=[]):
def create(obj_id, content, translation=None, tags=[]):
message = Message(obj_id)
if not message.exists():
message.create(content, translation, tags)
return message
# TODO Encode translation
if __name__ == '__main__':
r = 'test'
print(r)

View file

@ -88,7 +88,7 @@ class Screenshot(AbstractObject):
return obj
def get_meta(self, options=set()):
meta = {'id': self.id}
meta = self.get_default_meta()
meta['img'] = get_screenshot_rel_path(self.id) ######### # TODO: Rename ME ??????
meta['tags'] = self.get_tags(r_list=True)
if 'tags_safe' in options:

155
bin/lib/objects/UsersAccount.py Executable file
View file

@ -0,0 +1,155 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
# import re
from flask import url_for
from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ail_core
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
from lib.timeline_engine import Timeline
config_loader = ConfigLoader()
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None
################################################################################
################################################################################
################################################################################
class UserAccount(AbstractSubtypeObject):
"""
AIL User Object. (strings)
"""
def __init__(self, id, subtype):
super(UserAccount, self).__init__('user-account', id, subtype)
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True),
# 'compress': 'gzip'}
# return payload
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
# # TODO:
pass
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
return url
def get_svg_icon(self): # TODO change icon/color
if self.subtype == 'telegram':
style = 'fab'
icon = '\uf2c6'
elif self.subtype == 'twitter':
style = 'fab'
icon = '\uf099'
else:
style = 'fas'
icon = '\uf007'
return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius': 5}
def get_first_name(self):
return self._get_field('firstname')
def get_last_name(self):
return self._get_field('lastname')
def get_phone(self):
return self._get_field('phone')
def set_first_name(self, firstname):
return self._set_field('firstname', firstname)
def set_last_name(self, lastname):
return self._set_field('lastname', lastname)
def set_phone(self, phone):
return self._set_field('phone', phone)
def _get_timeline_username(self):
return Timeline(self.get_global_id(), 'username')
def get_username(self):
return self._get_timeline_username().get_last_obj_id()
def get_usernames(self):
return self._get_timeline_username().get_objs_ids()
def update_username_timeline(self, username_global_id, timestamp):
self._get_timeline_username().add_timestamp(timestamp, username_global_id)
def get_meta(self, options=set()):
meta = self._get_meta(options=options)
meta['id'] = self.id
meta['subtype'] = self.subtype
meta['tags'] = self.get_tags(r_list=True)
if 'username' in options:
meta['username'] = self.get_username()
if 'usernames' in options:
meta['usernames'] = self.get_usernames()
return meta
def get_misp_object(self):
obj_attrs = []
if self.subtype == 'telegram':
obj = MISPObject('telegram-account', standalone=True)
obj_attrs.append(obj.add_attribute('username', value=self.id))
elif self.subtype == 'twitter':
obj = MISPObject('twitter-account', standalone=True)
obj_attrs.append(obj.add_attribute('name', value=self.id))
else:
obj = MISPObject('user-account', standalone=True)
obj_attrs.append(obj.add_attribute('username', value=self.id))
first_seen = self.get_first_seen()
last_seen = self.get_last_seen()
if first_seen:
obj.first_seen = first_seen
if last_seen:
obj.last_seen = last_seen
if not first_seen or not last_seen:
self.logger.warning(
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
for obj_attr in obj_attrs:
for tag in self.get_tags():
obj_attr.add_tag(tag)
return obj
def get_user_by_username():
pass
def get_all_subtypes():
return ail_core.get_object_all_subtypes('user-account')
def get_all():
users = {}
for subtype in get_all_subtypes():
users[subtype] = get_all_by_subtype(subtype)
return users
def get_all_by_subtype(subtype):
return get_all_id('user-account', subtype)
# if __name__ == '__main__':
# name_to_search = 'co'
# subtype = 'telegram'
# print(search_usernames_by_name(name_to_search, subtype))

View file

@ -45,10 +45,10 @@ class AbstractDaterangeObject(AbstractObject, ABC):
def exists(self):
return r_object.exists(f'meta:{self.type}:{self.id}')
def _get_field(self, field):
def _get_field(self, field): # TODO remove me (NEW in abstract)
return r_object.hget(f'meta:{self.type}:{self.id}', field)
def _set_field(self, field, value):
def _set_field(self, field, value): # TODO remove me (NEW in abstract)
return r_object.hset(f'meta:{self.type}:{self.id}', field, value)
def get_first_seen(self, r_int=False):
@ -82,9 +82,10 @@ class AbstractDaterangeObject(AbstractObject, ABC):
return int(nb)
def _get_meta(self, options=[]):
meta_dict = {'first_seen': self.get_first_seen(),
'last_seen': self.get_last_seen(),
'nb_seen': self.get_nb_seen()}
meta_dict = self.get_default_meta()
meta_dict['first_seen'] = self.get_first_seen()
meta_dict['last_seen'] = self.get_last_seen()
meta_dict['nb_seen'] = self.get_nb_seen()
if 'sparkline' in options:
meta_dict['sparkline'] = self.get_sparkline()
return meta_dict

View file

@ -20,6 +20,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from lib import ail_logger
from lib import Tag
from lib.ConfigLoader import ConfigLoader
from lib import Duplicate
from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
@ -27,6 +28,11 @@ from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers
logging.config.dictConfig(ail_logger.get_config(name='ail'))
config_loader = ConfigLoader()
# r_cache = config_loader.get_redis_conn("Redis_Cache")
r_object = config_loader.get_db_conn("Kvrocks_Objects")
config_loader = None
class AbstractObject(ABC):
"""
Abstract Object
@ -59,14 +65,28 @@ class AbstractObject(ABC):
def get_global_id(self):
return f'{self.get_type()}:{self.get_subtype(r_str=True)}:{self.get_id()}'
def get_default_meta(self, tags=False):
def get_default_meta(self, tags=False, link=False):
dict_meta = {'id': self.get_id(),
'type': self.get_type(),
'subtype': self.get_subtype()}
'subtype': self.get_subtype(r_str=True)}
if tags:
dict_meta['tags'] = self.get_tags()
if link:
dict_meta['link'] = self.get_link()
return dict_meta
def _get_field(self, field):
if self.subtype is None:
return r_object.hget(f'meta:{self.type}:{self.id}', field)
else:
return r_object.hget(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', field)
def _set_field(self, field, value):
if self.subtype is None:
return r_object.hset(f'meta:{self.type}:{self.id}', field, value)
else:
return r_object.hset(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', field, value)
## Tags ##
def get_tags(self, r_list=False):
tags = Tag.get_object_tags(self.type, self.id, self.get_subtype(r_str=True))
@ -198,6 +218,8 @@ class AbstractObject(ABC):
else:
return []
## Correlation ##
def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type):
"""
Get object correlation
@ -253,3 +275,39 @@ class AbstractObject(ABC):
Get object correlations
"""
delete_obj_correlation(self.type, self.subtype, self.id, type2, subtype2, id2)
## -Correlation- ##
## Parent ##
def is_parent(self):
return r_object.exists(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}')
def is_children(self):
return r_object.hexists(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent')
def get_parent(self):
return r_object.hget(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent')
def get_children(self):
return r_object.smembers(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}')
def set_parent(self, obj_type=None, obj_subtype=None, obj_id=None, obj_global_id=None): # TODO ######################
if not obj_global_id:
if obj_subtype is None:
obj_subtype = ''
obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
r_object.hset(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent', obj_global_id)
def add_children(self, obj_type=None, obj_subtype=None, obj_id=None, obj_global_id=None): # TODO ######################
if not obj_global_id:
if obj_subtype is None:
obj_subtype = ''
obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
r_object.sadd(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', obj_global_id)
def add_obj_children(self, parent_global_id, son_global_id):
r_object.sadd(f'child:{parent_global_id}', son_global_id)
r_object.hset(f'meta:{son_global_id}', 'parent', parent_global_id)
## Parent ##

View file

@ -151,7 +151,7 @@ class AbstractSubtypeObject(AbstractObject, ABC):
#
#
def add(self, date, item_id):
def add(self, date, obj=None):
self.update_daterange(date)
update_obj_date(date, self.type, self.subtype)
# daily
@ -162,20 +162,22 @@ class AbstractSubtypeObject(AbstractObject, ABC):
#######################################################################
#######################################################################
# Correlations
self.add_correlation('item', '', item_id)
# domain
if is_crawled(item_id):
domain = get_item_domain(item_id)
self.add_correlation('domain', '', domain)
if obj:
# Correlations
self.add_correlation(obj.type, obj.get_subtype(r_str=True), obj.get_id())
if obj.type == 'item': # TODO same for message->chat ???
item_id = obj.get_id()
# domain
if is_crawled(item_id):
domain = get_item_domain(item_id)
self.add_correlation('domain', '', domain)
# TODO:ADD objects + Stats
def create(self, first_seen, last_seen):
self.set_first_seen(first_seen)
self.set_last_seen(last_seen)
def _delete(self):
pass

View file

@ -13,16 +13,21 @@ from lib import correlations_engine
from lib import btc_ail
from lib import Tag
from lib.objects import Chats
from lib.objects import CryptoCurrencies
from lib.objects import CookiesNames
from lib.objects.Cves import Cve
from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decodeds_objects
from lib.objects.Domains import Domain
from lib.objects import Etags
from lib.objects.Favicons import Favicon
from lib.objects import HHHashs
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
from lib.objects.Messages import Message
from lib.objects import Pgps
from lib.objects.Screenshots import Screenshot
from lib.objects import Titles
from lib.objects.UsersAccount import UserAccount
from lib.objects import Usernames
config_loader = ConfigLoader()
@ -53,12 +58,20 @@ def get_object(obj_type, subtype, obj_id):
return Domain(obj_id)
elif obj_type == 'decoded':
return Decoded(obj_id)
elif obj_type == 'chat':
return Chats.Chat(obj_id, subtype)
elif obj_type == 'cookie-name':
return CookiesNames.CookieName(obj_id)
elif obj_type == 'cve':
return Cve(obj_id)
elif obj_type == 'etag':
return Etags.Etag(obj_id)
elif obj_type == 'favicon':
return Favicon(obj_id)
elif obj_type == 'hhhash':
return HHHashs.HHHash(obj_id)
elif obj_type == 'message':
return Message(obj_id)
elif obj_type == 'screenshot':
return Screenshot(obj_id)
elif obj_type == 'cryptocurrency':
@ -67,6 +80,8 @@ def get_object(obj_type, subtype, obj_id):
return Pgps.Pgp(obj_id, subtype)
elif obj_type == 'title':
return Titles.Title(obj_id)
elif obj_type == 'user-account':
return UserAccount(obj_id, subtype)
elif obj_type == 'username':
return Usernames.Username(obj_id, subtype)
@ -101,9 +116,12 @@ def get_obj_global_id(obj_type, subtype, obj_id):
obj = get_object(obj_type, subtype, obj_id)
return obj.get_global_id()
def get_obj_type_subtype_id_from_global_id(global_id):
obj_type, subtype, obj_id = global_id.split(':', 2)
return obj_type, subtype, obj_id
def get_obj_from_global_id(global_id):
obj = global_id.split(':', 3)
obj = get_obj_type_subtype_id_from_global_id(global_id)
return get_object(obj[0], obj[1], obj[2])
@ -159,7 +177,7 @@ def get_objects_meta(objs, options=set(), flask_context=False):
subtype = obj[1]
obj_id = obj[2]
else:
obj_type, subtype, obj_id = obj.split(':', 2)
obj_type, subtype, obj_id = get_obj_type_subtype_id_from_global_id(obj)
metas.append(get_object_meta(obj_type, subtype, obj_id, options=options, flask_context=flask_context))
return metas
@ -168,7 +186,7 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
obj = get_object(obj_type, subtype, id)
meta = obj.get_meta()
meta['icon'] = obj.get_svg_icon()
if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'title' or obj_type == 'favicon':
if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon' or obj_type == 'hhhash':
meta['sparkline'] = obj.get_sparkline()
if obj_type == 'cve':
meta['cve_search'] = obj.get_cve_search()
@ -177,6 +195,8 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
if subtype == 'bitcoin' and related_btc:
meta["related_btc"] = btc_ail.get_bitcoin_info(obj.id)
if obj.get_type() == 'decoded':
meta['mimetype'] = obj.get_mimetype()
meta['size'] = obj.get_size()
meta["vt"] = obj.get_meta_vt()
meta["vt"]["status"] = obj.is_vt_enabled()
# TAGS MODAL
@ -333,8 +353,8 @@ def get_obj_correlations(obj_type, subtype, obj_id):
obj = get_object(obj_type, subtype, obj_id)
return obj.get_correlations()
def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max):
if len(objs) < nb_max or nb_max == -1:
def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max, objs_hidden):
if len(objs) < nb_max or nb_max == 0:
if lvl == 0:
objs.add((obj_type, subtype, obj_id))
@ -346,15 +366,17 @@ def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lv
for obj2_type in correlations:
for str_obj in correlations[obj2_type]:
obj2_subtype, obj2_id = str_obj.split(':', 1)
_get_obj_correlations_objs(objs, obj2_type, obj2_subtype, obj2_id, filter_types, lvl, nb_max)
if get_obj_global_id(obj2_type, obj2_subtype, obj2_id) in objs_hidden:
continue # filter object to hide
_get_obj_correlations_objs(objs, obj2_type, obj2_subtype, obj2_id, filter_types, lvl, nb_max, objs_hidden)
def get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=[], lvl=0, nb_max=300):
def get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=[], lvl=0, nb_max=300, objs_hidden=set()):
objs = set()
_get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max)
_get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max, objs_hidden)
return objs
def obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=[], lvl=0, nb_max=300):
objs = get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=filter_types, lvl=lvl, nb_max=nb_max)
def obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=[], lvl=0, nb_max=300, objs_hidden=set()):
objs = get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=filter_types, lvl=lvl, nb_max=nb_max, objs_hidden=objs_hidden)
# print(objs)
for obj_tuple in objs:
obj1_type, subtype1, id1 = obj_tuple
@ -395,7 +417,7 @@ def create_correlation_graph_links(links_set):
def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True):
graph_nodes_list = []
for node_id in nodes_set:
obj_type, subtype, obj_id = node_id.split(':', 2)
obj_type, subtype, obj_id = get_obj_type_subtype_id_from_global_id(node_id)
dict_node = {'id': node_id}
dict_node['style'] = get_object_svg(obj_type, subtype, obj_id)
@ -416,10 +438,12 @@ def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True):
def get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1,
objs_hidden=set(),
flask_context=False):
obj_str_id, nodes, links, meta = correlations_engine.get_correlations_graph_nodes_links(obj_type, subtype, obj_id,
filter_types=filter_types,
max_nodes=max_nodes, level=level,
objs_hidden=objs_hidden,
flask_context=flask_context)
# print(meta)
meta['objs'] = list(meta['objs'])

212
bin/lib/timeline_engine.py Executable file
View file

@ -0,0 +1,212 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
from uuid import uuid4
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
config_loader = ConfigLoader()
r_meta = config_loader.get_db_conn("Kvrocks_Timeline")
config_loader = None
# CORRELATION_TYPES_BY_OBJ = {
# "chat": ["item", "username"], # item ???
# "cookie-name": ["domain"],
# "cryptocurrency": ["domain", "item"],
# "cve": ["domain", "item"],
# "decoded": ["domain", "item"],
# "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
# "etag": ["domain"],
# "favicon": ["domain", "item"],
# "hhhash": ["domain"],
# "item": ["chat", "cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"],
# "pgp": ["domain", "item"],
# "screenshot": ["domain", "item"],
# "title": ["domain", "item"],
# "username": ["chat", "domain", "item"],
# }
#
# def get_obj_correl_types(obj_type):
# return CORRELATION_TYPES_BY_OBJ.get(obj_type)
# def sanityze_obj_correl_types(obj_type, correl_types):
# obj_correl_types = get_obj_correl_types(obj_type)
# if correl_types:
# correl_types = set(correl_types).intersection(obj_correl_types)
# if not correl_types:
# correl_types = obj_correl_types
# if not correl_types:
# return []
# return correl_types
class Timeline:
def __init__(self, global_id, name):
self.id = global_id
self.name = name
def _get_block_obj_global_id(self, block):
return r_meta.hget(f'block:{self.id}:{self.name}', block)
def _set_block_obj_global_id(self, block, global_id):
return r_meta.hset(f'block:{self.id}:{self.name}', block, global_id)
def _get_block_timestamp(self, block, position):
return r_meta.zscore(f'line:{self.id}:{self.name}', f'{position}:{block}')
def _get_nearest_bloc_inf(self, timestamp):
inf = r_meta.zrevrangebyscore(f'line:{self.id}:{self.name}', float(timestamp), 0, start=0, num=1, withscores=True)
if inf:
inf, score = inf[0]
if inf.startswith('end'):
inf_key = f'start:{inf[4:]}'
inf_score = r_meta.zscore(f'line:{self.id}:{self.name}', inf_key)
if inf_score == score:
inf = inf_key
return inf
else:
return None
def _get_nearest_bloc_sup(self, timestamp):
sup = r_meta.zrangebyscore(f'line:{self.id}:{self.name}', float(timestamp), '+inf', start=0, num=1, withscores=True)
if sup:
sup, score = sup[0]
if sup.startswith('start'):
sup_key = f'end:{sup[6:]}'
sup_score = r_meta.zscore(f'line:{self.id}:{self.name}', sup_key)
if score == sup_score:
sup = sup_key
return sup
else:
return None
def get_first_obj_id(self):
first = r_meta.zrange(f'line:{self.id}:{self.name}', 0, 0)
if first: # start:block
first = first[0]
if first.startswith('start:'):
first = first[6:]
else:
first = first[4:]
return self._get_block_obj_global_id(first)
def get_last_obj_id(self):
last = r_meta.zrevrange(f'line:{self.id}:{self.name}', 0, 0)
if last: # end:block
last = last[0]
if last.startswith('end:'):
last = last[4:]
else:
last = last[6:]
return self._get_block_obj_global_id(last)
def get_objs_ids(self):
objs = set()
for block in r_meta.zrange(f'line:{self.id}:{self.name}', 0, -1):
if block:
if block.startswith('start:'):
objs.add(self._get_block_obj_global_id(block[6:]))
return objs
# def get_objs_ids(self):
# objs = {}
# last_obj_id = None
# for block, timestamp in r_meta.zrange(f'line:{self.id}:{self.name}', 0, -1, withscores=True):
# if block:
# if block.startswith('start:'):
# last_obj_id = self._get_block_obj_global_id(block[6:])
# objs[last_obj_id] = {'first_seen': timestamp}
# else:
# objs[last_obj_id]['last_seen'] = timestamp
# return objs
def _update_bloc(self, block, position, timestamp):
r_meta.zadd(f'line:{self.id}:{self.name}', {f'{position}:{block}': timestamp})
def _add_bloc(self, obj_global_id, timestamp, end=None):
if end:
timestamp_end = end
else:
timestamp_end = timestamp
new_bloc = str(uuid4())
r_meta.zadd(f'line:{self.id}:{self.name}', {f'start:{new_bloc}': timestamp, f'end:{new_bloc}': timestamp_end})
self._set_block_obj_global_id(new_bloc, obj_global_id)
return new_bloc
def add_timestamp(self, timestamp, obj_global_id):
inf = self._get_nearest_bloc_inf(timestamp)
sup = self._get_nearest_bloc_sup(timestamp)
if not inf and not sup:
# create new bloc
new_bloc = self._add_bloc(obj_global_id, timestamp)
return new_bloc
# timestamp < first_seen
elif not inf:
sup_pos, sup_id = sup.split(':')
sup_obj = self._get_block_obj_global_id(sup_id)
if sup_obj == obj_global_id:
self._update_bloc(sup_id, 'start', timestamp)
# create new bloc
else:
new_bloc = self._add_bloc(obj_global_id, timestamp)
return new_bloc
# timestamp > first_seen
elif not sup:
inf_pos, inf_id = inf.split(':')
inf_obj = self._get_block_obj_global_id(inf_id)
if inf_obj == obj_global_id:
self._update_bloc(inf_id, 'end', timestamp)
# create new bloc
else:
new_bloc = self._add_bloc(obj_global_id, timestamp)
return new_bloc
else:
inf_pos, inf_id = inf.split(':')
sup_pos, sup_id = sup.split(':')
inf_obj = self._get_block_obj_global_id(inf_id)
if inf_id == sup_id:
# reduce bloc + create two new bloc
if obj_global_id != inf_obj:
# get end timestamp
sup_timestamp = self._get_block_timestamp(sup_id, 'end')
# reduce original bloc
self._update_bloc(inf_id, 'end', timestamp - 1)
# Insert new bloc
new_bloc = self._add_bloc(obj_global_id, timestamp)
# Recreate end of the first bloc by a new bloc
self._add_bloc(inf_obj, timestamp + 1, end=sup_timestamp)
return new_bloc
# timestamp in existing bloc
else:
return inf_id
# different blocs: expend sup/inf bloc or create a new bloc if
elif inf_pos == 'end' and sup_pos == 'start':
# Extend inf bloc
if obj_global_id == inf_obj:
self._update_bloc(inf_id, 'end', timestamp)
return inf_id
sup_obj = self._get_block_obj_global_id(sup_id)
# Extend sup bloc
if obj_global_id == sup_obj:
self._update_bloc(sup_id, 'start', timestamp)
return sup_id
# create new bloc
new_bloc = self._add_bloc(obj_global_id, timestamp)
return new_bloc
# inf_pos == 'start' and sup_pos == 'end'
# else raise error ???

View file

@ -130,7 +130,7 @@ class Cryptocurrencies(AbstractModule, ABC):
if crypto.is_valid_address():
# print(address)
is_valid_address = True
crypto.add(date, item_id)
crypto.add(date, item)
# Check private key
if is_valid_address:

View file

@ -131,7 +131,7 @@ class Mixer(AbstractModule):
self.last_refresh = time.time()
self.clear_feeders_stat()
time.sleep(0.5)
time.sleep(0.5)
def computeNone(self):
self.refresh_stats()

View file

@ -42,7 +42,8 @@ class Onion(AbstractModule):
self.faup = crawlers.get_faup()
# activate_crawler = p.config.get("Crawler", "activate_crawler")
self.har = config_loader.get_config_boolean('Crawler', 'default_har')
self.screenshot = config_loader.get_config_boolean('Crawler', 'default_screenshot')
self.onion_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
# self.i2p_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
@ -90,8 +91,9 @@ class Onion(AbstractModule):
if onion_urls:
if crawlers.is_crawler_activated():
for domain in domains: # TODO LOAD DEFAULT SCREENSHOT + HAR
task_uuid = crawlers.create_task(domain, parent=item.get_id(), priority=0)
for domain in domains:
task_uuid = crawlers.create_task(domain, parent=item.get_id(), priority=0,
har=self.har, screenshot=self.screenshot)
if task_uuid:
print(f'{domain} added to crawler queue: {task_uuid}')
else:

View file

@ -210,18 +210,18 @@ class PgpDump(AbstractModule):
date = item.get_date()
for key in self.keys:
pgp = Pgps.Pgp(key, 'key')
pgp.add(date, self.item_id)
pgp.add(date, item)
print(f' key: {key}')
for name in self.names:
pgp = Pgps.Pgp(name, 'name')
pgp.add(date, self.item_id)
pgp.add(date, item)
print(f' name: {name}')
self.tracker_term.compute(name, obj_type='pgp', subtype='name')
self.tracker_regex.compute(name, obj_type='pgp', subtype='name')
self.tracker_yara.compute(name, obj_type='pgp', subtype='name')
for mail in self.mails:
pgp = Pgps.Pgp(mail, 'mail')
pgp.add(date, self.item_id)
pgp.add(date, item)
print(f' mail: {mail}')
self.tracker_term.compute(mail, obj_type='pgp', subtype='mail')
self.tracker_regex.compute(mail, obj_type='pgp', subtype='mail')

View file

@ -58,7 +58,7 @@ class Telegram(AbstractModule):
user_id = dict_url.get('username')
if user_id:
username = Username(user_id, 'telegram')
username.add(item_date, item.id)
username.add(item_date, item)
print(f'username: {user_id}')
invite_hash = dict_url.get('invite_hash')
if invite_hash:
@ -73,7 +73,7 @@ class Telegram(AbstractModule):
user_id = dict_url.get('username')
if user_id:
username = Username(user_id, 'telegram')
username.add(item_date, item.id)
username.add(item_date, item)
print(f'username: {user_id}')
invite_hash = dict_url.get('invite_hash')
if invite_hash:

View file

@ -10,6 +10,8 @@ Update AIL in the background
"""
import os
import logging
import logging.config
import sys
import subprocess
@ -17,37 +19,55 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ail_logger
from lib import ail_updates
def launch_background_upgrade(version, l_script_name):
if ail_updates.is_version_in_background_update(version):
ail_updates.start_background_update(version)
logging.config.dictConfig(ail_logger.get_config(name='updates'))
def launch_background_upgrade(version):
logger = logging.getLogger()
logger.warning(f'launching background update {version}')
update = ail_updates.AILBackgroundUpdate(version)
nb_done = update.get_nb_scripts_done()
update.start()
scripts = update.get_scripts()
scripts = scripts[nb_done:]
for script in scripts:
print('launching background script update', script)
# launch script
update.start_script(script)
script_path = update.get_script_path()
if script_path:
try:
process = subprocess.run(['python', script_path])
if process.returncode != 0:
stderr = process.stderr
if stderr:
error = stderr.decode()
logger.error(error)
update.set_error(error)
else:
update.set_error('Error Updater Script')
logger.error('Error Updater Script')
sys.exit(0)
except Exception as e:
update.set_error(str(e))
logger.error(str(e))
sys.exit(0)
for script_name in l_script_name:
ail_updates.set_current_background_update_script(script_name)
update_file = ail_updates.get_current_background_update_script_path(version, script_name)
if not update.get_error():
update.end_script()
else:
logger.warning('Updater exited on error')
sys.exit(0)
# # TODO: Get error output
process = subprocess.run(['python', update_file])
update_progress = ail_updates.get_current_background_update_progress()
if update_progress == 100:
ail_updates.end_background_update_script()
# # TODO: Create Custom error
# 'Please relaunch the bin/update-background.py script'
# # TODO: Create Class background update
ail_updates.end_background_update(version)
update.end()
logger.warning(f'ending background update {version}')
if __name__ == "__main__":
if not ail_updates.exits_background_update_to_launch():
ail_updates.clear_background_update()
if ail_updates.is_update_background_running():
v = ail_updates.get_update_background_version()
launch_background_upgrade(v)
else:
launch_background_upgrade('v1.5', ['Update-ARDB_Onions.py', 'Update-ARDB_Metadata.py', 'Update-ARDB_Tags.py',
'Update-ARDB_Tags_background.py', 'Update-ARDB_Onions_screenshots.py'])
launch_background_upgrade('v2.6', ['Update_screenshots.py'])
launch_background_upgrade('v2.7', ['Update_domain_tags.py'])
launch_background_upgrade('v3.4', ['Update_domain.py'])
launch_background_upgrade('v3.7', ['Update_trackers.py'])
for ver in ail_updates.get_update_background_to_launch():
launch_background_upgrade(ver)

View file

@ -663,6 +663,7 @@ namespace.crawl ail_crawlers
namespace.db ail_datas
namespace.dup ail_dups
namespace.obj ail_objs
namespace.tl ail_tls
namespace.stat ail_stats
namespace.tag ail_tags
namespace.track ail_trackers

View file

@ -45,6 +45,10 @@ sender = sender@example.com
sender_host = smtp.example.com
sender_port = 1337
sender_pw = None
# Only needed for SMTP over SSL if the mail server don't support TLS (used by default). use this option to validate the server certificate.
cert_required = False
# Only needed for SMTP over SSL if you want to validate your self signed certificate for SSL
ca_file =
# Only needed when the credentials for email server needs a username instead of an email address
#sender_user = sender
sender_user =
@ -191,6 +195,11 @@ host = localhost
port = 6383
password = ail_objs
[Kvrocks_Timeline]
host = localhost
port = 6383
password = ail_tls
[Kvrocks_Stats]
host = localhost
port = 6383

View file

@ -89,12 +89,12 @@ Available Importers:
5. Launch ail-framework, pystemon and PystemonImporter.py (all within the virtual environment):
- Option 1 (recommended):
```
./ail-framework/bin/LAUNCH.py -l #starts ail-framework
./ail-framework/bin/LAUNCH.py -f #starts pystemon and the PystemonImporter.py
./ail-framework/bin/LAUNCH.sh -l #starts ail-framework
./ail-framework/bin/LAUNCH.sh -f #starts pystemon and the PystemonImporter.py
```
- Option 2 (may require two terminal windows):
```
./ail-framework/bin/LAUNCH.py -l #starts ail-framework
./ail-framework/bin/LAUNCH.sh -l #starts ail-framework
./pystemon/pystemon.py
./ail-framework/bin/importer/PystemonImporter.py
```

120
tools/crawler_add_task.py Executable file
View file

@ -0,0 +1,120 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Send an URL to the crawler - Create a crawler task
================
Import URL to be crawled by AIL and then analysed
"""
import argparse
import os
from pyail import PyAIL
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
def check_frequency(value):
value = int(value)
if value <= 0:
raise argparse.ArgumentTypeError(f'Error: Invalid frequency {value}')
if __name__ == "__main__":
# TODO add c argument for config file
parser = argparse.ArgumentParser(description='Send an URL to the crawler - Create a crawler task')
parser.add_argument('-u', '--url', type=str, help='URL to crawl', required=True)
parser.add_argument('-k', '--key', type=str, help='AIL API Key', required=True)
parser.add_argument('-a', '--ail', type=str, help='AIL URL')
parser.add_argument('-d', '--depth', type=int, default=1, help='Depth limit') # TODO improve me
parser.add_argument('--cookiejar', type=str, help='Cookiejar uuid')
parser.add_argument('-p', '--proxy', type=str, help='Proxy address to use, "web" and "tor" can be used as shortcut (web is used by default if the domain isn\'t an onion)')
group = parser.add_mutually_exclusive_group()
group.add_argument('--har', dest='har', action='store_true', help='Save HAR')
group.add_argument('--no-har', dest='har', action='store_false', help='Don\'t save HAR')
parser.set_defaults(har=None)
group = parser.add_mutually_exclusive_group()
group.add_argument('--screenshot', dest='screenshot', action='store_true', help='Save screenshot')
group.add_argument('--no-screenshot', dest='screenshot', action='store_false', help='Don\'t save screenshot')
parser.set_defaults(screenshot=None)
group = parser.add_argument_group('Frequency, create a regular crawler/scheduler. one shot if not specified')
group.add_argument('-f', '--frequency', type=str, choices=['monthly', 'weekly', 'daily', 'hourly'],
help='monthly, weekly, daily or hourly frequency or specify a custom one with the others arguments')
group.add_argument('--minutes', type=int, help='frequency in minutes')
group.add_argument('--hours', type=int, help='frequency in hours')
group.add_argument('--days', type=int, help='frequency in days')
group.add_argument('--weeks', type=int, help='frequency in weeks')
group.add_argument('--months', type=int, help='frequency in months')
args = parser.parse_args()
if not args.url and not args.key:
parser.print_help()
sys.exit(0)
# Load crawler default config
config_loader = ConfigLoader()
har = args.har
if har is None:
har = config_loader.get_config_boolean('Crawler', 'default_har')
screenshot = args.screenshot
if screenshot is None:
screenshot = config_loader.get_config_boolean('Crawler', 'default_screenshot')
if args.depth:
depth = args.depth
if depth < 0:
raise argparse.ArgumentTypeError(f'Error: Invalid depth {depth}')
else:
depth = 1
# frequency
frequency = {}
if args.frequency:
if args.frequency in ['monthly', 'weekly', 'daily', 'hourly']:
frequency = args.frequency
else:
raise argparse.ArgumentTypeError('Invalid frequency')
elif args.minutes or args.hours or args.days or args.weeks or args.months:
if args.minutes:
check_frequency(args.minutes)
frequency['minutes'] = args.minutes
if args.hours:
check_frequency(args.hours)
frequency['hours'] = args.hours
if args.days:
check_frequency(args.days)
frequency['days'] = args.days
if args.weeks:
check_frequency(args.weeks)
frequency['weeks'] = args.weeks
if args.months:
check_frequency(args.months)
frequency['months'] = args.months
if not frequency:
frequency = None
proxy = args.proxy
if args.cookiejar:
cookiejar = args.cookiejar
else:
cookiejar = None
ail = args.ail
if not ail:
ail = 'https://localhost:7000/'
client = PyAIL(ail, args.key, ssl=False)
r = client.crawl_url(args.url, har=har, screenshot=screenshot, depth_limit=depth, frequency=frequency,
cookiejar=cookiejar, proxy=proxy)
print(r)

View file

@ -1,18 +0,0 @@
#!/bin/bash
echo "Killing all screens ..."
bash -c "bash ../../bin/LAUNCH.sh -k"
echo ""
echo "Updating ARDB ..."
pushd ../../
rm -r ardb
pushd ardb/
git clone https://github.com/yinqiwen/ardb.git
git checkout 0.10 || exit 1
make || exit 1
popd
popd
echo "ARDB Updated"
echo ""
exit 0

View file

@ -2,13 +2,11 @@
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_BIN. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH

View file

@ -20,7 +20,7 @@ class AIL_Updater(object):
self.start_time = time.time()
self.config = ConfigLoader()
self.r_serv = self.config.get_redis_conn("Kvrocks_DB")
self.r_serv = self.config.get_db_conn("Kvrocks_DB")
self.f_version = float(self.version[1:])
self.current_f_version = ail_updates.get_ail_float_version()
@ -35,7 +35,7 @@ class AIL_Updater(object):
"""
Update DB version
"""
ail_updates.add_ail_update(version)
ail_updates.add_ail_update(self.version)
def run_update(self):
self.update()

View file

@ -1,50 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ConfigLoader
class AIL_Updater(object):
"""docstring for AIL_Updater."""
def __init__(self, new_version):
self.version = new_version
self.start_time = time.time()
self.config = ConfigLoader.ConfigLoader()
self.r_serv = self.config.get_redis_conn("ARDB_DB")
self.f_version = float(self.version[1:])
self.current_f_version = self.r_serv.get('ail:version')
if self.current_f_version:
self.current_f_version = float(self.current_f_version[1:])
else:
self.current_f_version = 0
def update(self):
"""
AIL DB update
"""
pass
def end_update(self):
"""
Update DB version
"""
# Set current ail version
self.r_serv.hset('ail:update_date', self.version, datetime.datetime.now().strftime("%Y%m%d"))
# Set current ail version
if self.f_version > self.current_f_version:
self.r_serv.set('ail:version', self.version)
def run_update(self):
self.update()
self.end_update()

View file

@ -7,13 +7,13 @@ fi
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_KVROCKS" ] && echo "Needs the env var AIL_KVROCKS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_BIN. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=AIL_KVROCKS:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
@ -25,7 +25,7 @@ bash ${AIL_BIN}/LAUNCH.sh -ks
wait
echo ""
bash ${AIL_BIN}/LAUNCH.sh -lav
bash ${AIL_BIN}/LAUNCH.sh -lkv
wait
echo ""

View file

@ -1,15 +0,0 @@
#!/bin/bash
YELLOW="\\033[1;33m"
DEFAULT="\\033[0;39m"
echo -e $YELLOW"\t"
echo -e "* ------------------------------------------------------------------"
echo -e "\t"
echo -e " - - - - - - - - PLEASE RELAUNCH AIL - - - - - - - - "
echo -e "\t"
echo -e "* ------------------------------------------------------------------"
echo -e "\t"
echo -e "\t"$DEFAULT
# fix invalid Updater version (kill parent):
kill -SIGUSR1 `ps --pid $$ -oppid=`; exit

View file

@ -1,165 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
from lib import ConfigLoader
def update_tracked_terms(main_key, tracked_container_key):
for tracked_item in r_serv_term.smembers(main_key):
all_items = r_serv_term.smembers(tracked_container_key.format(tracked_item))
for item_path in all_items:
if PASTES_FOLDER in item_path:
new_item_path = item_path.replace(PASTES_FOLDER, '', 1)
r_serv_term.sadd(tracked_container_key.format(tracked_item), new_item_path)
r_serv_term.srem(tracked_container_key.format(tracked_item), item_path)
def update_hash_item(has_type):
#get all hash items:
all_hash_items = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type))
for item_path in all_hash_items:
if PASTES_FOLDER in item_path:
base64_key = '{}_paste:{}'.format(has_type, item_path)
hash_key = 'hash_paste:{}'.format(item_path)
if r_serv_metadata.exists(base64_key):
new_base64_key = base64_key.replace(PASTES_FOLDER, '', 1)
res = r_serv_metadata.renamenx(base64_key, new_base64_key)
if res == 0:
print('same key, double name: {}'.format(item_path))
# fusion
all_key = r_serv_metadata.smembers(base64_key)
for elem in all_key:
r_serv_metadata.sadd(new_base64_key, elem)
r_serv_metadata.srem(base64_key, elem)
if r_serv_metadata.exists(hash_key):
new_hash_key = hash_key.replace(PASTES_FOLDER, '', 1)
res = r_serv_metadata.renamenx(hash_key, new_hash_key)
if res == 0:
print('same key, double name: {}'.format(item_path))
# fusion
all_key = r_serv_metadata.smembers(hash_key)
for elem in all_key:
r_serv_metadata.sadd(new_hash_key, elem)
r_serv_metadata.srem(hash_key, elem)
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
r_serv_term = config_loader.get_redis_conn("ARDB_TermFreq")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
r_serv.set('ail:current_background_script', 'metadata')
## Update metadata ##
print('Updating ARDB_Metadata ...')
index = 0
start = time.time()
#update stats
r_serv.set('ail:current_background_script_stat', 0)
# Update base64
update_hash_item('base64')
#update stats
r_serv.set('ail:current_background_script_stat', 20)
# Update binary
update_hash_item('binary')
#update stats
r_serv.set('ail:current_background_script_stat', 40)
# Update binary
update_hash_item('hexadecimal')
#update stats
r_serv.set('ail:current_background_script_stat', 60)
total_onion = r_serv_tag.scard('infoleak:submission=\"crawler\"')
nb_updated = 0
last_progress = 0
# Update onion metadata
all_crawled_items = r_serv_tag.smembers('infoleak:submission=\"crawler\"')
for item_path in all_crawled_items:
domain = None
if PASTES_FOLDER in item_path:
old_item_metadata = 'paste_metadata:{}'.format(item_path)
item_path = item_path.replace(PASTES_FOLDER, '', 1)
new_item_metadata = 'paste_metadata:{}'.format(item_path)
res = r_serv_metadata.renamenx(old_item_metadata, new_item_metadata)
#key already exist
if res == 0:
r_serv_metadata.delete(old_item_metadata)
# update domain port
domain = r_serv_metadata.hget(new_item_metadata, 'domain')
if domain:
if domain[-3:] != ':80':
r_serv_metadata.hset(new_item_metadata, 'domain', '{}:80'.format(domain))
super_father = r_serv_metadata.hget(new_item_metadata, 'super_father')
if super_father:
if PASTES_FOLDER in super_father:
r_serv_metadata.hset(new_item_metadata, 'super_father', super_father.replace(PASTES_FOLDER, '', 1))
father = r_serv_metadata.hget(new_item_metadata, 'father')
if father:
if PASTES_FOLDER in father:
r_serv_metadata.hset(new_item_metadata, 'father', father.replace(PASTES_FOLDER, '', 1))
nb_updated += 1
progress = int((nb_updated * 30) /total_onion)
print('{}/{} updated {}%'.format(nb_updated, total_onion, progress + 60))
# update progress stats
if progress != last_progress:
r_serv.set('ail:current_background_script_stat', progress + 60)
last_progress = progress
#update stats
r_serv.set('ail:current_background_script_stat', 90)
## update tracked term/set/regex
# update tracked term
update_tracked_terms('TrackedSetTermSet', 'tracked_{}')
#update stats
r_serv.set('ail:current_background_script_stat', 93)
# update tracked set
update_tracked_terms('TrackedSetSet', 'set_{}')
#update stats
r_serv.set('ail:current_background_script_stat', 96)
# update tracked regex
update_tracked_terms('TrackedRegexSet', 'regex_{}')
#update stats
r_serv.set('ail:current_background_script_stat', 100)
##
end = time.time()
print('Updating ARDB_Metadata Done => {} paths: {} s'.format(index, end - start))
print()
r_serv.sadd('ail:update_v1.5', 'metadata')
##
#Key, Dynamic Update
##
#paste_children
#nb_seen_hash, base64_hash, binary_hash
#paste_onion_external_links
#misp_events, hive_cases
##

View file

@ -1,129 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
def substract_date(date_from, date_to):
date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
delta = date_to - date_from # timedelta
l_date = []
for i in range(delta.days + 1):
date = date_from + datetime.timedelta(i)
l_date.append( date.strftime('%Y%m%d') )
return l_date
def get_date_epoch(date):
return int(datetime.datetime(int(date[0:4]), int(date[4:6]), int(date[6:8])).timestamp())
def get_domain_root_from_paste_childrens(item_father, domain):
item_children = r_serv_metadata.smembers('paste_children:{}'.format(item_father))
domain_root = ''
for item_path in item_children:
# remove absolute_path
if PASTES_FOLDER in item_path:
r_serv_metadata.srem('paste_children:{}'.format(item_father), item_path)
item_path = item_path.replace(PASTES_FOLDER, '', 1)
r_serv_metadata.sadd('paste_children:{}'.format(item_father), item_path)
if domain in item_path:
domain_root = item_path
return domain_root
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
r_serv.set('ail:current_background_script', 'onions')
r_serv.set('ail:current_background_script_stat', 0)
## Update Onion ##
print('Updating ARDB_Onion ...')
index = 0
start = time.time()
# clean down domain from db
date_from = '20180929'
date_today = datetime.date.today().strftime("%Y%m%d")
for date in substract_date(date_from, date_today):
onion_down = r_serv_onion.smembers('onion_down:{}'.format(date))
#print(onion_down)
for onion_domain in onion_down:
if not r_serv_onion.sismember('full_onion_up', onion_domain):
# delete history
all_onion_history = r_serv_onion.lrange('onion_history:{}'.format(onion_domain), 0 ,-1)
if all_onion_history:
for date_history in all_onion_history:
#print('onion_history:{}:{}'.format(onion_domain, date_history))
r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
r_serv_onion.delete('onion_history:{}'.format(onion_domain))
#stats
total_domain = r_serv_onion.scard('full_onion_up')
nb_updated = 0
last_progress = 0
# clean up domain
all_domain_up = r_serv_onion.smembers('full_onion_up')
for onion_domain in all_domain_up:
# delete history
all_onion_history = r_serv_onion.lrange('onion_history:{}'.format(onion_domain), 0 ,-1)
if all_onion_history:
for date_history in all_onion_history:
print('--------')
print('onion_history:{}:{}'.format(onion_domain, date_history))
item_father = r_serv_onion.lrange('onion_history:{}:{}'.format(onion_domain, date_history), 0, 0)
print('item_father: {}'.format(item_father))
try:
item_father = item_father[0]
except IndexError:
r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
continue
#print(item_father)
# delete old history
r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
# create new history
root_key = get_domain_root_from_paste_childrens(item_father, onion_domain)
if root_key:
r_serv_onion.zadd(f'crawler_history_onion:{onion_domain}:80', {root_key: get_date_epoch(date_history)})
print('crawler_history_onion:{}:80 {} {}'.format(onion_domain, get_date_epoch(date_history), root_key))
#update service metadata: paste_parent
r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key)
r_serv_onion.delete('onion_history:{}'.format(onion_domain))
r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'ports', '80')
r_serv_onion.hdel('onion_metadata:{}'.format(onion_domain), 'last_seen')
nb_updated += 1
progress = int((nb_updated * 100) /total_domain)
print('{}/{} updated {}%'.format(nb_updated, total_domain, progress))
# update progress stats
if progress != last_progress:
r_serv.set('ail:current_background_script_stat', progress)
last_progress = progress
end = time.time()
print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
print()
print('Done in {} s'.format(end - start_deb))
r_serv.sadd('ail:update_v1.5', 'onions')

View file

@ -1,117 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
from hashlib import sha256
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
def rreplace(s, old, new, occurrence):
li = s.rsplit(old, occurrence)
return new.join(li)
def substract_date(date_from, date_to):
date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
delta = date_to - date_from # timedelta
l_date = []
for i in range(delta.days + 1):
date = date_from + datetime.timedelta(i)
l_date.append( date.strftime('%Y%m%d') )
return l_date
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"))
NEW_SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot')
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
r_serv.set('ail:current_background_script', 'crawled_screenshot')
r_serv.set('ail:current_background_script_stat', 0)
## Update Onion ##
print('Updating ARDB_Onion ...')
index = 0
start = time.time()
# clean down domain from db
date_from = '20180801'
date_today = datetime.date.today().strftime("%Y%m%d")
list_date = substract_date(date_from, date_today)
nb_done = 0
last_progress = 0
total_to_update = len(list_date)
for date in list_date:
screenshot_dir = os.path.join(SCREENSHOT_FOLDER, date[0:4], date[4:6], date[6:8])
if os.path.isdir(screenshot_dir):
print(screenshot_dir)
for file in os.listdir(screenshot_dir):
if file.endswith(".png"):
index += 1
#print(file)
img_path = os.path.join(screenshot_dir, file)
with open(img_path, 'br') as f:
image_content = f.read()
hash = sha256(image_content).hexdigest()
img_dir_path = os.path.join(hash[0:2], hash[2:4], hash[4:6], hash[6:8], hash[8:10], hash[10:12])
filename_img = os.path.join(NEW_SCREENSHOT_FOLDER, img_dir_path, hash[12:] +'.png')
dirname = os.path.dirname(filename_img)
if not os.path.exists(dirname):
os.makedirs(dirname)
if not os.path.exists(filename_img):
os.rename(img_path, filename_img)
else:
os.remove(img_path)
item = os.path.join('crawled', date[0:4], date[4:6], date[6:8], file[:-4])
# add item metadata
r_serv_metadata.hset('paste_metadata:{}'.format(item), 'screenshot', hash)
# add sha256 metadata
r_serv_onion.sadd('screenshot:{}'.format(hash), item)
if file.endswith('.pnghar.txt'):
har_path = os.path.join(screenshot_dir, file)
new_file = rreplace(file, '.pnghar.txt', '.json', 1)
new_har_path = os.path.join(screenshot_dir, new_file)
os.rename(har_path, new_har_path)
progress = int((nb_done * 100) /total_to_update)
# update progress stats
if progress != last_progress:
r_serv.set('ail:current_background_script_stat', progress)
print('{}/{} screenshot updated {}%'.format(nb_done, total_to_update, progress))
last_progress = progress
nb_done += 1
r_serv.set('ail:current_background_script_stat', 100)
end = time.time()
print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
print()
print('Done in {} s'.format(end - start_deb))
r_serv.set('ail:current_background_script_stat', 100)
r_serv.sadd('ail:update_v1.5', 'crawled_screenshot')
if r_serv.scard('ail:update_v1.5') != 5:
r_serv.set('ail:update_error', 'Update v1.5 Failed, please relaunch the bin/update-background.py script')

View file

@ -1,135 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import redis
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
from lib import ConfigLoader
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
r_important_paste_2018 = redis.StrictRedis(
host=config_loader.get_config_str("ARDB_Metadata", "host"),
port=config_loader.get_config_int("ARDB_Metadata", "port"),
db=2018,
decode_responses=True)
r_important_paste_2019 = redis.StrictRedis(
host=config_loader.get_config_str("ARDB_Metadata", "host"),
port=config_loader.get_config_int("ARDB_Metadata", "port"),
db=2019,
decode_responses=True)
config_loader = None
r_serv.set('ail:current_background_script', 'tags')
r_serv.set('ail:current_background_script_stat', 0)
if r_serv.sismember('ail:update_v1.5', 'onions') and r_serv.sismember('ail:update_v1.5', 'metadata'):
print('Updating ARDB_Tags ...')
index = 0
nb_tags_to_update = 0
nb_updated = 0
last_progress = 0
start = time.time()
tags_list = r_serv_tag.smembers('list_tags')
# create temp tags metadata
tag_metadata = {}
for tag in tags_list:
tag_metadata[tag] = {}
tag_metadata[tag]['first_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'first_seen')
if tag_metadata[tag]['first_seen'] is None:
tag_metadata[tag]['first_seen'] = 99999999
else:
tag_metadata[tag]['first_seen'] = int(tag_metadata[tag]['first_seen'])
tag_metadata[tag]['last_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
if tag_metadata[tag]['last_seen'] is None:
tag_metadata[tag]['last_seen'] = 0
else:
tag_metadata[tag]['last_seen'] = int(tag_metadata[tag]['last_seen'])
nb_tags_to_update += r_serv_tag.scard(tag)
if nb_tags_to_update == 0:
nb_tags_to_update = 1
for tag in tags_list:
all_item = r_serv_tag.smembers(tag)
for item_path in all_item:
splitted_item_path = item_path.split('/')
#print(tag)
#print(item_path)
try:
item_date = int( ''.join([splitted_item_path[-4], splitted_item_path[-3], splitted_item_path[-2]]) )
except IndexError:
r_serv_tag.srem(tag, item_path)
continue
# remove absolute path
new_path = item_path.replace(PASTES_FOLDER, '', 1)
if new_path != item_path:
# save in queue absolute path to remove
r_serv_tag.sadd('maj:v1.5:absolute_path_to_rename', item_path)
# update metadata first_seen
if item_date < tag_metadata[tag]['first_seen']:
tag_metadata[tag]['first_seen'] = item_date
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date)
# update metadata last_seen
if item_date > tag_metadata[tag]['last_seen']:
tag_metadata[tag]['last_seen'] = item_date
last_seen_db = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
if last_seen_db:
if item_date > int(last_seen_db):
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date)
else:
tag_metadata[tag]['last_seen'] = last_seen_db
r_serv_tag.sadd('{}:{}'.format(tag, item_date), new_path)
r_serv_tag.hincrby('daily_tags:{}'.format(item_date), tag, 1)
# clean db
r_serv_tag.srem(tag, item_path)
index = index + 1
nb_updated += 1
progress = int((nb_updated * 100) /nb_tags_to_update)
print('{}/{} updated {}%'.format(nb_updated, nb_tags_to_update, progress))
# update progress stats
if progress != last_progress:
r_serv.set('ail:current_background_script_stat', progress)
last_progress = progress
#flush browse importante pastes db
try:
r_important_paste_2018.flushdb()
except Exception:
pass
try:
r_important_paste_2019.flushdb()
except Exception:
pass
end = time.time()
print('Updating ARDB_Tags Done => {} paths: {} s'.format(index, end - start))
r_serv.sadd('ail:update_v1.5', 'tags')

View file

@ -1,70 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
def tags_key_fusion(old_item_path_key, new_item_path_key):
print('fusion:')
print(old_item_path_key)
print(new_item_path_key)
for tag in r_serv_metadata.smembers(old_item_path_key):
r_serv_metadata.sadd(new_item_path_key, tag)
r_serv_metadata.srem(old_item_path_key, tag)
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
config_loader = None
if r_serv.sismember('ail:update_v1.5', 'tags'):
r_serv.set('ail:current_background_script', 'tags_background')
r_serv.set('ail:current_background_script_stat', 0)
print('Updating ARDB_Tags ...')
start = time.time()
# update item metadata tags
tag_not_updated = True
total_to_update = r_serv_tag.scard('maj:v1.5:absolute_path_to_rename')
nb_updated = 0
last_progress = 0
if total_to_update > 0:
while tag_not_updated:
item_path = r_serv_tag.srandmember('maj:v1.5:absolute_path_to_rename')
old_tag_item_key = 'tag:{}'.format(item_path)
new_item_path = item_path.replace(PASTES_FOLDER, '', 1)
new_tag_item_key = 'tag:{}'.format(new_item_path)
res = r_serv_metadata.renamenx(old_tag_item_key, new_tag_item_key)
if res == 0:
tags_key_fusion(old_tag_item_key, new_tag_item_key)
nb_updated += 1
r_serv_tag.srem('maj:v1.5:absolute_path_to_rename', item_path)
if r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') == 0:
tag_not_updated = False
else:
progress = int((nb_updated * 100) / total_to_update)
print('{}/{} Tags updated {}%'.format(nb_updated, total_to_update, progress))
# update progress stats
if progress != last_progress:
r_serv.set('ail:current_background_script_stat', progress)
last_progress = progress
end = time.time()
print('Updating ARDB_Tags Done: {} s'.format(end - start))
r_serv.sadd('ail:update_v1.5', 'tags_background')

View file

@ -1,54 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
print()
print('Updating ARDB_Onion ...')
index = 0
start = time.time()
# update crawler queue
for elem in r_serv_onion.smembers('onion_crawler_queue'):
if PASTES_FOLDER in elem:
r_serv_onion.srem('onion_crawler_queue', elem)
r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1))
index = index +1
for elem in r_serv_onion.smembers('onion_crawler_priority_queue'):
if PASTES_FOLDER in elem:
r_serv_onion.srem('onion_crawler_queue', elem)
r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1))
index = index +1
end = time.time()
print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
print()
# Add background update
r_serv.sadd('ail:to_update', 'v1.5')
#Set current ail version
r_serv.set('ail:version', 'v1.5')
#Set current ail version
r_serv.set('ail:update_date_v1.5', datetime.datetime.now().strftime("%Y%m%d"))
print('Done in {} s'.format(end - start_deb))

View file

@ -1,60 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -k &
wait
echo ""
bash -c "bash ${AIL_HOME}/update/bin/Update_Redis.sh"
#bash -c "bash ${AIL_HOME}/update/bin/Update_ARDB.sh"
echo ""
echo -e $GREEN"Update DomainClassifier"$DEFAULT
echo ""
pip3 install --upgrade --force-reinstall git+https://github.com/D4-project/BGP-Ranking.git/@28013297efb039d2ebbce96ee2d89493f6ae56b0#subdirectory=client&egg=pybgpranking
pip3 install --upgrade --force-reinstall git+https://github.com/adulau/DomainClassifier.git
wait
echo ""
echo ""
echo -e $GREEN"Update Web thirdparty"$DEFAULT
echo ""
bash -c "(cd ${AIL_FLASK}; ./update_thirdparty.sh &)"
wait
echo ""
bash ${AIL_BIN}LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Fixing ARDB ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v1.5/Update.py &
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks &
wait
echo ""
exit 0

View file

@ -1,25 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
r_serv = config_loader.get_redis_conn("ARDB_DB")
config_loader = None
# Set current ail version
r_serv.set('ail:version', 'v1.7')
# Set current ail version
r_serv.set('ail:update_date_v1.7', datetime.datetime.now().strftime("%Y%m%d"))

View file

@ -1,65 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks &
wait
echo ""
echo -e $GREEN"Update DomainClassifier"$DEFAULT
echo ""
cd $AIL_HOME
git clone https://github.com/kazu-yamamoto/pgpdump.git
cd pgpdump
./configure
make
sudo make install
wait
echo ""
echo ""
echo -e $GREEN"Update requirement"$DEFAULT
echo ""
pip3 install beautifulsoup4
bash ${AIL_BIN}LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v1.7/Update.py &
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks &
wait
echo ""
echo -e $GREEN"Update thirdparty ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -t &
wait
echo ""
exit 0

View file

@ -1,33 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
r_serv = config_loader.get_redis_conn("ARDB_DB")
config_loader = None
# Set current ail version
r_serv.set('ail:version', 'v2.0')
# use new update_date format
date_tag_to_replace = ['v1.5', 'v1.7']
for tag in date_tag_to_replace:
if r_serv.exists('ail:update_date_{}'.format(tag)):
date_tag = r_serv.get('ail:update_date_{}'.format(tag))
r_serv.hset('ail:update_date', tag, date_tag)
r_serv.delete('ail:update_date_{}'.format(tag))
# Set current ail version
r_serv.hset('ail:update_date', 'v2.0', datetime.datetime.now().strftime("%Y%m%d"))

View file

@ -1,75 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
echo ""
echo -e $GREEN"Create Self-Signed Certificate"$DEFAULT
echo ""
pushd ${AIL_BIN}/helper/gen_cert
bash gen_root.sh
wait
bash gen_cert.sh
wait
popd
cp ${AIL_BIN}/helper/gen_cert/server.crt ${AIL_FLASK}/server.crt
cp ${AIL_BIN}/helper/gen_cert/server.key ${AIL_FLASK}/server.key
echo ""
echo -e $GREEN"Update requirement"$DEFAULT
echo ""
pip3 install flask-login
wait
echo ""
pip3 install bcrypt
wait
echo ""
echo ""
bash ${AIL_BIN}/LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v2.0/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Update thirdparty ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -t
wait
echo ""
echo ""
echo -e $GREEN"Create Default User"$DEFAULT
echo ""
python3 ${AIL_FLASK}create_default_user.py
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,118 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from packages import Term
from lib import ConfigLoader
def rreplace(s, old, new, occurrence):
li = s.rsplit(old, occurrence)
return new.join(li)
def get_item_id(full_path):
return full_path.replace(PASTES_FOLDER, '', 1)
def get_item_date(id_item):
l_dir = id_item.split('/')
return f'{l_dir[-4]}{l_dir[-3]}{l_dir[-2]}'
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '')
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_term_stats = config_loader.get_redis_conn("ARDB_Trending")
r_serv_termfreq = config_loader.get_redis_conn("ARDB_TermFreq")
config_loader = None
r_serv_term_stats.flushdb()
# Disabled. Checkout the v2.2 branch if you need it
# # convert all regex:
# all_regex = r_serv_termfreq.smembers('TrackedRegexSet')
# for regex in all_regex:
# tags = list(r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(regex)))
# mails = list(r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(regex)))
#
# new_term = regex[1:-1]
# res = Term.parse_json_term_to_add({"term": new_term, "type": 'regex', "tags": tags, "mails": mails, "level": 1},
# 'admin@admin.test')
# if res[1] == 200:
# term_uuid = res[0]['uuid']
# list_items = r_serv_termfreq.smembers('regex_{}'.format(regex))
# for paste_item in list_items:
# item_id = get_item_id(paste_item)
# item_date = get_item_date(item_id)
# Term.add_tracked_item(term_uuid, item_id, item_date)
#
# # Invalid Tracker => remove it
# else:
# print('Invalid Regex Removed: {}'.format(regex))
# print(res[0])
# # allow reprocess
# r_serv_termfreq.srem('TrackedRegexSet', regex)
#
# all_tokens = r_serv_termfreq.smembers('TrackedSetTermSet')
# for token in all_tokens:
# tags = list(r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(token)))
# mails = list(r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(token)))
#
# res = Term.parse_json_term_to_add({"term": token, "type": 'word', "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test')
# if res[1] == 200:
# term_uuid = res[0]['uuid']
# list_items = r_serv_termfreq.smembers('tracked_{}'.format(token))
# for paste_item in list_items:
# item_id = get_item_id(paste_item)
# item_date = get_item_date(item_id)
# Term.add_tracked_item(term_uuid, item_id, item_date)
# # Invalid Tracker => remove it
# else:
# print('Invalid Token Removed: {}'.format(token))
# print(res[0])
# # allow reprocess
# r_serv_termfreq.srem('TrackedSetTermSet', token)
#
# all_set = r_serv_termfreq.smembers('TrackedSetSet')
# for curr_set in all_set:
# tags = list(r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(curr_set)))
# mails = list(r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(curr_set)))
#
# to_remove = ',{}'.format(curr_set.split(',')[-1])
# new_set = rreplace(curr_set, to_remove, '', 1)
# new_set = new_set[2:]
# new_set = new_set.replace(',', '')
#
# res = Term.parse_json_term_to_add({"term": new_set, "type": 'set', "nb_words": 1, "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test')
# if res[1] == 200:
# term_uuid = res[0]['uuid']
# list_items = r_serv_termfreq.smembers('tracked_{}'.format(curr_set))
# for paste_item in list_items:
# item_id = get_item_id(paste_item)
# item_date = get_item_date(item_id)
# Term.add_tracked_item(term_uuid, item_id, item_date)
# # Invalid Tracker => remove it
# else:
# print('Invalid Set Removed: {}'.format(curr_set))
# print(res[0])
# # allow reprocess
# r_serv_termfreq.srem('TrackedSetSet', curr_set)
r_serv_termfreq.flushdb()
# Set current ail version
r_serv.set('ail:version', 'v2.2')
# Set current ail version
r_serv.hset('ail:update_date', 'v2.2', datetime.datetime.now().strftime("%Y%m%d"))

View file

@ -1,39 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v2.2/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,35 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
new_version = 'v2.5'
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
r_serv = config_loader.get_redis_conn("ARDB_DB")
config_loader = None
r_serv.zadd('ail:all_role', {'user': 3})
r_serv.zadd('ail:all_role', {'user_no_api': 4})
r_serv.zadd('ail:all_role', {'read_only': 5})
for user in r_serv.hkeys('user:all'):
r_serv.sadd('user_role:user', user)
r_serv.sadd('user_role:user_no_api', user)
r_serv.sadd('user_role:read_only', user)
# Set current ail version
r_serv.set('ail:version', new_version)
# Set current ail version
r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d"))

View file

@ -1,39 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v2.5/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,27 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
new_version = 'v2.6'
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
r_serv = config_loader.get_redis_conn("ARDB_DB")
config_loader = None
r_serv.sadd('ail:to_update', new_version)
# Set current ail version
r_serv.set('ail:version', new_version)
# Set current ail version
r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d"))

View file

@ -1,39 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v2.6/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,90 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
from pyfaup.faup import Faup
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
def get_domain(item_id):
item_id = item_id.split('/')
item_id = item_id[-1]
return item_id[:-36]
def get_all_item(s_sha256):
return r_serv_onion.smembers(f'screenshot:{s_sha256}')
def sanitize_domain(domain):
faup.decode(domain)
domain_sanitized = faup.get()
domain_sanitized = domain_sanitized['domain']
try:
domain_sanitized = domain_sanitized.decode()
except:
pass
return domain_sanitized.lower()
def update_db(s_sha256):
screenshot_items = get_all_item(s_sha256)
if screenshot_items:
for item_id in screenshot_items:
item_id = item_id.replace(PASTES_FOLDER+'/', '', 1) # remove root path
domain = get_domain(item_id)
domain_sanitized = sanitize_domain(domain)
if domain != domain_sanitized:
r_serv_onion.sadd('incorrect_domain', domain)
domain = domain_sanitized
r_serv_onion.sadd('domain_screenshot:{}'.format(domain), s_sha256)
r_serv_onion.sadd('screenshot_domain:{}'.format(s_sha256), domain)
else:
pass
# broken screenshot
r_serv_onion.sadd('broken_screenshot', s_sha256)
if __name__ == '__main__':
start_deb = time.time()
faup = Faup()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes"))
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot')
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
r_serv_db.set('ail:update_in_progress', 'v2.6')
r_serv_db.set('ail:current_background_update', 'v2.6')
r_serv_db.set('ail:current_background_script_stat', 20)
r_serv_db.set('ail:current_background_script', 'screenshot update')
nb = 0
if os.path.isdir(SCREENSHOT_FOLDER):
for root, dirs, files in os.walk(SCREENSHOT_FOLDER, topdown=False):
# print(dirs)
for name in files:
nb = nb + 1
screenshot_sha256 = os.path.join(root, name)
screenshot_sha256 = screenshot_sha256[:-4] # remove .png
screenshot_sha256 = screenshot_sha256.replace(SCREENSHOT_FOLDER, '', 1)
screenshot_sha256 = screenshot_sha256.replace('/', '')
update_db(screenshot_sha256)
# print('Screenshot updated: {}'.format(nb))
if nb % 1000 == 0:
r_serv_db.set('ail:current_background_script', 'screenshot updated: {}'.format(nb))
r_serv_db.set('ail:current_background_script_stat', 100)
end = time.time()
print('ALL screenshot updated: {} in {} s'.format(nb, end - start_deb))

View file

@ -1,37 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import datetime
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
new_version = 'v2.7'
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_tags = config_loader.get_redis_conn("ARDB_Tags")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
r_serv.sadd('ail:to_update', new_version)
#### Update tags ####
r_serv_tags.sunionstore('list_tags:item', 'list_tags', [])
r_serv_onion.sunionstore('domain_update_v2.7', 'full_onion_up', [])
r_serv_onion.delete('incorrect_domain')
r_serv.set('ail:update_v2.7:deletetagrange', 1)
#### ####
# Set current ail version
r_serv.set('ail:version', new_version)
# Set current ail version
r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d"))

View file

@ -1,39 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v2.7/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,127 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
from pyfaup.faup import Faup
sys.path.append(os.environ['AIL_BIN'])
from packages import Date
from lib import ConfigLoader
def sanitize_domain(domain):
faup.decode(domain)
domain_sanitized = faup.get()
domain_sanitized = domain_sanitized['domain']
try:
domain_sanitized = domain_sanitized.decode()
except:
pass
return domain_sanitized.lower()
def get_all_obj_tags(obj_type):
return list(r_serv_tags.smembers(f'list_tags:{obj_type}'))
def add_global_tag(tag, object_type=None):
r_serv_tags.sadd('list_tags', tag)
if object_type:
r_serv_tags.sadd('list_tags:{}'.format(object_type), tag)
def get_obj_tag(object_id):
res = r_serv_metadata.smembers('tag:{}'.format(object_id))
if res:
return list(res)
else:
return []
def delete_domain_tag_daterange():
all_domains_tags = get_all_obj_tags('domain')
nb_updated = 0
nb_to_update = len(all_domains_tags)
if nb_to_update == 0:
nb_to_update = 1
refresh_time = time.time()
l_dates = Date.substract_date('20191008', Date.get_today_date_str())
for tag in all_domains_tags:
for date_day in l_dates:
r_serv_tags.delete('domain:{}:{}'.format(tag, date_day))
nb_updated += 1
refresh_time = update_progress(refresh_time, nb_updated, nb_to_update)
r_serv_db.delete('ail:update_v2.7:deletetagrange')
def update_domain_tags(domain):
domain_sanitized = sanitize_domain(domain)
if domain != domain_sanitized:
r_serv_onion.sadd('incorrect_domain', domain)
domain = domain_sanitized
domain_tags = get_obj_tag(domain)
for tag in domain_tags:
# delete incorrect tags
if tag == 'infoleak:submission="crawler"' or tag == 'infoleak:submission="manual"':
r_serv_metadata.srem('tag:{}'.format(domain), tag)
else:
add_global_tag(tag, object_type='domain')
r_serv_tags.sadd('{}:{}'.format('domain', tag), domain)
def update_progress(refresh_time, nb_updated, nb_elem_to_update):
if time.time() - refresh_time > 10:
progress = int((nb_updated * 100) / nb_elem_to_update)
print('{}/{} updated {}%'.format(nb_updated, nb_elem_to_update, progress))
r_serv_db.set('ail:current_background_script_stat', progress)
refresh_time = time.time()
return refresh_time
def update_db():
nb_updated = 0
nb_to_update = r_serv_onion.scard('domain_update_v2.7')
refresh_time = time.time()
r_serv_db.set('ail:current_background_script_stat', 0)
r_serv_db.set('ail:current_background_script', 'domain tags update')
domain = r_serv_onion.spop('domain_update_v2.7')
while domain is not None:
update_domain_tags(domain)
nb_updated += 1
refresh_time = update_progress(refresh_time, nb_updated, nb_to_update)
domain = r_serv_onion.spop('domain_update_v2.7')
if r_serv_db.exists('ail:update_v2.7:deletetagrange'):
r_serv_db.set('ail:current_background_script_stat', 0)
r_serv_db.set('ail:current_background_script', 'tags: remove deprecated keys')
delete_domain_tag_daterange()
# sort all crawled domain
r_serv_onion.sort('full_onion_up', alpha=True)
r_serv_onion.sort('full_regular_up', alpha=True)
if __name__ == '__main__':
start_deb = time.time()
faup = Faup()
config_loader = ConfigLoader.ConfigLoader()
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_tags = config_loader.get_redis_conn("ARDB_Tags")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
update_version = 'v2.7'
r_serv_db.set('ail:update_in_progress', update_version)
r_serv_db.set('ail:current_background_update', update_version)
r_serv_db.set('ail:current_background_script_stat', 0)
r_serv_db.set('ail:current_background_script', 'tags update')
update_db()
r_serv_db.set('ail:current_background_script_stat', 100)
end = time.time()
print('ALL domains tags updated in {} s'.format(end - start_deb))

View file

@ -1,43 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v3.0/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Update thirdparty ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -t
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v3.1.1')
updater.run_update()

View file

@ -1,52 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -ldbv &
wait
echo ""
# SUBMODULES #
git submodule init
git submodule update
echo -e $GREEN"Installing YARA ..."$DEFAULT
pip3 install yara-python
bash ${AIL_BIN}/LAUNCH.sh -t
# SUBMODULES #
git submodule init
git submodule update
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v3.1.1/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,23 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v3.1')
updater.run_update()

View file

@ -1,46 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -lav &
wait
echo ""
pip3 install scrapy
pip3 install scrapy-splash
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v3.1/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Update thirdparty ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -t
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v3.2')
updater.run_update()

View file

@ -1,52 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -ldbv &
wait
echo ""
# SUBMODULES #
git submodule init
git submodule update
echo -e $GREEN"Installing YARA ..."$DEFAULT
pip3 install yara-python
bash ${AIL_BIN}/LAUNCH.sh -t
# SUBMODULES #
git submodule init
git submodule update
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v3.2/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v3.3')
updater.run_update()

View file

@ -1,54 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -ldbv &
wait
echo ""
# SUBMODULES #
git submodule update
# echo ""
# echo -e $GREEN"installing KVORCKS ..."$DEFAULT
# cd ${AIL_HOME}
# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git
# pushd kvrocks/
# make -j4
# popd
echo -e $GREEN"Installing html2text ..."$DEFAULT
pip3 install html2text
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v3.3/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,34 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
self.r_serv_onion = self.config.get_redis_conn("ARDB_Onion")
def update(self):
"""
Update Domain Languages
"""
self.r_serv_onion.sunionstore('domain_update_v3.4', 'full_onion_up', 'full_regular_up')
self.r_serv.set('update:nb_elem_to_convert', self.r_serv_onion.scard('domain_update_v3.4'))
self.r_serv.set('update:nb_elem_converted', 0)
# Add background update
self.r_serv.sadd('ail:to_update', self.version)
if __name__ == '__main__':
updater = Updater('v3.4')
updater.run_update()

View file

@ -1,54 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# bash ${AIL_BIN}/LAUNCH.sh -ldbv &
# wait
# echo ""
# SUBMODULES #
git submodule update
# echo ""
# echo -e $GREEN"installing KVORCKS ..."$DEFAULT
# cd ${AIL_HOME}
# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git
# pushd kvrocks/
# make -j4
# popd
echo -e $GREEN"Installing html2text ..."$DEFAULT
pip3 install pycld3
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v3.4/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,121 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ConfigLoader
from lib.objects.Items import Item
def get_domain_type(domain_name):
if str(domain_name).endswith('.onion'):
return 'onion'
else:
return 'regular'
def add_domain_language(domain_name, language):
language = language.split('-')[0]
domain_type = get_domain_type(domain_name)
r_serv_onion.sadd('all_domains_languages', language)
r_serv_onion.sadd(f'all_domains_languages:{domain_type}', language)
r_serv_onion.sadd(f'language:domains:{domain_type}:{language}', domain_name)
r_serv_onion.sadd(f'domain:language:{domain_name}', language)
def add_domain_languages_by_item_id(domain_name, item_id):
item = Item(item_id)
for lang in item.get_languages():
add_domain_language(domain_name, lang.language)
def update_update_stats():
nb_updated = int(r_serv_db.get('update:nb_elem_converted'))
progress = int((nb_updated * 100) / nb_elem_to_update)
print(f'{nb_updated}/{nb_elem_to_update} updated {progress}%')
r_serv_db.set('ail:current_background_script_stat', progress)
def update_domain_language(domain_obj, item_id):
domain_name = domain_obj.get_domain_name()
add_domain_languages_by_item_id(domain_name, item_id)
def get_domain_history(domain_type, domain_name):
return r_serv_onion.zrange(f'crawler_history_{domain_type}:{domain_name}:80', 0, -1, withscores=True)
def get_item_children(item_id):
return r_serv_metadata.smembers(f'paste_children:{item_id}')
def get_domain_items(domain_name, root_item_id):
dom_item = get_domain_item_children(domain_name, root_item_id)
dom_item.append(root_item_id)
return dom_item
def is_item_in_domain(domain_name, item_id):
is_in_domain = False
domain_length = len(domain_name)
if len(item_id) > (domain_length+48):
if item_id[-36-domain_length:-36] == domain_name:
is_in_domain = True
return is_in_domain
def get_domain_item_children(domain_name, root_item_id):
all_items = []
for item_id in get_item_children(root_item_id):
if is_item_in_domain(domain_name, item_id):
all_items.append(item_id)
all_items.extend(get_domain_item_children(domain_name, item_id))
return all_items
def get_domain_crawled_item_root(domain_name, domain_type, epoch):
res = r_serv_onion.zrevrangebyscore(f'crawler_history_{domain_type}:{domain_name}:80', int(epoch), int(epoch))
return {"root_item": res[0], "epoch": int(epoch)}
def get_domain_items_crawled(domain_name, domain_type, epoch):
item_crawled = []
item_root = get_domain_crawled_item_root(domain_name, domain_type, epoch)
if item_root:
if item_root['root_item'] != str(item_root['epoch']) and item_root['root_item']:
for item_id in get_domain_items(domain_name, item_root['root_item']):
item_crawled.append(item_id)
return item_crawled
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
r_serv_db.set('ail:current_background_script', 'domain languages update')
nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert')
if not nb_elem_to_update:
nb_elem_to_update = 1
else:
nb_elem_to_update = int(nb_elem_to_update)
# _delete_all_domains_languages()
while True:
domain = r_serv_onion.spop('domain_update_v3.4')
if domain is not None:
print(domain)
domain = str(domain)
domain_t = get_domain_type(domain)
for domain_history in get_domain_history(domain_t, domain):
domain_items = get_domain_items_crawled(domain, domain_t, domain_history[1])
for id_item in domain_items:
update_domain_language(domain, id_item)
r_serv_db.incr('update:nb_elem_converted')
update_update_stats()
else:
r_serv_db.set('ail:current_background_script_stat', 100)
sys.exit(0)

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v3.5')
updater.run_update()

View file

@ -1,35 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo -e $GREEN"Installing PyAIL ..."$DEFAULT
pip3 install -U pyail
echo -e $GREEN"Installing D4 CLIENT ..."$DEFAULT
pip3 install -U d4-pyclient
echo ""
echo -e $GREEN"Updating DomainClassifier ..."$DEFAULT
pip3 install -U DomainClassifier
exit 0

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v3.6')
updater.run_update()

View file

@ -1,39 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo -e $GREEN"Updating D4 CLIENT ..."$DEFAULT
pip3 install -U d4-pyclient
echo ""
echo -e $GREEN"Installing nose ..."$DEFAULT
pip3 install -U nose
echo -e $GREEN"Installing coverage ..."$DEFAULT
pip3 install -U coverage
echo ""
echo -e $GREEN"Updating DomainClassifier ..."$DEFAULT
pip3 install -U DomainClassifier
exit 0

View file

@ -1,40 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
from lib import Tracker
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
def update(self):
"""
Update Domain Languages
"""
print('Fixing Tracker_uuid list ...')
Tracker.fix_all_tracker_uuid_list()
nb = 0
for tracker_uuid in Tracker.get_trackers():
self.r_serv.sadd('trackers_update_v3.7', tracker_uuid)
nb += 1
self.r_serv.set('update:nb_elem_to_convert', nb)
self.r_serv.set('update:nb_elem_converted',0)
# Add background update
self.r_serv.sadd('ail:to_update', self.version)
if __name__ == '__main__':
updater = Updater('v3.7')
updater.run_update()

View file

@ -1,44 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo -e $GREEN"Updating thirdparty ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ut
wait
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v3.7/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View file

@ -1,53 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ConfigLoader
from lib import Tracker
def update_update_stats():
nb_updated = int(r_serv_db.get('update:nb_elem_converted'))
progress = int((nb_updated * 100) / nb_elem_to_update)
print(f'{nb_updated}/{nb_elem_to_update} updated {progress}%')
r_serv_db.set('ail:current_background_script_stat', progress)
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_tracker = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None
Tracker.r_serv_tracker = r_serv_tracker
r_serv_db.set('ail:current_background_script', 'trackers update')
nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert')
if not nb_elem_to_update:
nb_elem_to_update = 1
else:
nb_elem_to_update = int(nb_elem_to_update)
while True:
tracker_uuid = r_serv_db.spop('trackers_update_v3.7')
if tracker_uuid is not None:
print(tracker_uuid)
# FIX STATS
Tracker.fix_tracker_stats_per_day(tracker_uuid)
# MAP TRACKER - ITEM_ID
Tracker.fix_tracker_item_link(tracker_uuid)
r_serv_db.incr('update:nb_elem_converted')
update_update_stats()
else:
r_serv_db.set('ail:current_background_script_stat', 100)
sys.exit(0)

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v4.0')
updater.run_update()

View file

@ -1,29 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo ""
echo -e $GREEN"Installing nose ..."$DEFAULT
pip3 install -U websockets
exit 0

View file

@ -1,31 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import redis
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
def update(self):
r_tracking = redis.StrictRedis(host='localhost',
port=6382,
db=2,
decode_responses=True)
# FLUSH OLD DB
r_tracking.flushdb()
if __name__ == '__main__':
updater = Updater('v4.1')
updater.run_update()

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v4.2.1')
updater.run_update()

View file

@ -1,29 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo ""
echo -e $GREEN"Updating pusblogger ..."$DEFAULT
pip3 install -U pubsublogger
exit 0

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v4.2')
updater.run_update()

View file

@ -1,33 +0,0 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo ""
echo -e $GREEN"Installing typo-squatting ..."$DEFAULT
pip3 install -U ail_typo_squatting
echo ""
echo -e $GREEN"Updating d4-client ..."$DEFAULT
pip3 install -U d4-pyclient
exit 0

View file

@ -8,7 +8,8 @@ sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.old_ail_updater import AIL_Updater
from update.bin.ail_updater import AIL_Updater
from lib import ail_updates
class Updater(AIL_Updater):
"""default Updater."""
@ -18,5 +19,6 @@ class Updater(AIL_Updater):
if __name__ == '__main__':
updater = Updater('v3.0')
updater = Updater('v5.2')
updater.run_update()
ail_updates.add_background_update('v5.2')

View file

@ -2,13 +2,11 @@
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
@ -22,4 +20,12 @@ wait
# SUBMODULES #
git submodule update
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v5.2/Update.py
wait
echo ""
echo ""
exit 0

27
update/v5.2/compress_har.py Executable file
View file

@ -0,0 +1,27 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import gzip
import os
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ail_updates
from lib import crawlers
if __name__ == '__main__':
update = ail_updates.AILBackgroundUpdate('v5.2')
HAR_DIR = crawlers.HAR_DIR
hars_ids = crawlers.get_all_har_ids()
update.set_nb_to_update(len(hars_ids))
n = 0
for har_id in hars_ids:
crawlers._gzip_har(har_id)
update.inc_nb_updated()
if n % 100 == 0:
update.update_progress()
crawlers._gzip_all_hars()

View file

@ -17,9 +17,6 @@ from flask_login import LoginManager, current_user, login_user, logout_user, log
import importlib
from os.path import join
# # TODO: put me in lib/Tag
from pytaxonomies import Taxonomies
sys.path.append('./modules/')
sys.path.append(os.environ['AIL_BIN'])
@ -51,6 +48,9 @@ from blueprints.objects_decoded import objects_decoded
from blueprints.objects_subtypes import objects_subtypes
from blueprints.objects_title import objects_title
from blueprints.objects_cookie_name import objects_cookie_name
from blueprints.objects_etag import objects_etag
from blueprints.objects_hhhash import objects_hhhash
from blueprints.objects_chat import objects_chat
Flask_dir = os.environ['AIL_FLASK']
@ -106,6 +106,9 @@ app.register_blueprint(objects_decoded, url_prefix=baseUrl)
app.register_blueprint(objects_subtypes, url_prefix=baseUrl)
app.register_blueprint(objects_title, url_prefix=baseUrl)
app.register_blueprint(objects_cookie_name, url_prefix=baseUrl)
app.register_blueprint(objects_etag, url_prefix=baseUrl)
app.register_blueprint(objects_hhhash, url_prefix=baseUrl)
app.register_blueprint(objects_chat, url_prefix=baseUrl)
# ========= =========#
@ -250,16 +253,6 @@ default_taxonomies = ["infoleak", "gdpr", "fpf", "dark-web"]
for taxonomy in default_taxonomies:
Tag.enable_taxonomy_tags(taxonomy)
# ========== INITIAL tags auto export ============
# taxonomies = Taxonomies()
#
# infoleak_tags = taxonomies.get('infoleak').machinetags()
# infoleak_automatic_tags = []
# for tag in taxonomies.get('infoleak').machinetags():
# if tag.split('=')[0][:] == 'infoleak:automatic-detection':
# r_serv_db.sadd('list_export_tags', tag)
#
# r_serv_db.sadd('list_export_tags', 'infoleak:submission="manual"')
# ============ MAIN ============
if __name__ == "__main__":

Some files were not shown because too many files have changed in this diff Show more