mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-26 15:57:16 +00:00
chg: [ail queues] merge
This commit is contained in:
commit
c19b1f34e3
131 changed files with 4383 additions and 3039 deletions
|
@ -27,7 +27,7 @@ fi
|
||||||
export PATH=$AIL_VENV/bin:$PATH
|
export PATH=$AIL_VENV/bin:$PATH
|
||||||
export PATH=$AIL_HOME:$PATH
|
export PATH=$AIL_HOME:$PATH
|
||||||
export PATH=$AIL_REDIS:$PATH
|
export PATH=$AIL_REDIS:$PATH
|
||||||
export PATH=$AIL_ARDB:$PATH
|
export PATH=$AIL_KVROCKS:$PATH
|
||||||
export PATH=$AIL_BIN:$PATH
|
export PATH=$AIL_BIN:$PATH
|
||||||
export PATH=$AIL_FLASK:$PATH
|
export PATH=$AIL_FLASK:$PATH
|
||||||
|
|
||||||
|
@ -685,9 +685,6 @@ while [ "$1" != "" ]; do
|
||||||
-lrv | --launchRedisVerify ) launch_redis;
|
-lrv | --launchRedisVerify ) launch_redis;
|
||||||
wait_until_redis_is_ready;
|
wait_until_redis_is_ready;
|
||||||
;;
|
;;
|
||||||
-lav | --launchARDBVerify ) launch_ardb;
|
|
||||||
wait_until_ardb_is_ready;
|
|
||||||
;;
|
|
||||||
-lkv | --launchKVORCKSVerify ) launch_kvrocks;
|
-lkv | --launchKVORCKSVerify ) launch_kvrocks;
|
||||||
wait_until_kvrocks_is_ready;
|
wait_until_kvrocks_is_ready;
|
||||||
;;
|
;;
|
||||||
|
|
|
@ -17,6 +17,7 @@ from lib import ail_logger
|
||||||
from lib import crawlers
|
from lib import crawlers
|
||||||
from lib.ConfigLoader import ConfigLoader
|
from lib.ConfigLoader import ConfigLoader
|
||||||
from lib.objects import CookiesNames
|
from lib.objects import CookiesNames
|
||||||
|
from lib.objects import Etags
|
||||||
from lib.objects.Domains import Domain
|
from lib.objects.Domains import Domain
|
||||||
from lib.objects.Items import Item
|
from lib.objects.Items import Item
|
||||||
from lib.objects import Screenshots
|
from lib.objects import Screenshots
|
||||||
|
@ -59,6 +60,7 @@ class Crawler(AbstractModule):
|
||||||
self.root_item = None
|
self.root_item = None
|
||||||
self.date = None
|
self.date = None
|
||||||
self.items_dir = None
|
self.items_dir = None
|
||||||
|
self.original_domain = None
|
||||||
self.domain = None
|
self.domain = None
|
||||||
|
|
||||||
# TODO Replace with warning list ???
|
# TODO Replace with warning list ???
|
||||||
|
@ -98,7 +100,7 @@ class Crawler(AbstractModule):
|
||||||
self.crawler_scheduler.update_queue()
|
self.crawler_scheduler.update_queue()
|
||||||
self.crawler_scheduler.process_queue()
|
self.crawler_scheduler.process_queue()
|
||||||
|
|
||||||
self.refresh_lacus_status() # TODO LOG ERROR
|
self.refresh_lacus_status() # TODO LOG ERROR
|
||||||
if not self.is_lacus_up:
|
if not self.is_lacus_up:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -121,11 +123,19 @@ class Crawler(AbstractModule):
|
||||||
if capture:
|
if capture:
|
||||||
try:
|
try:
|
||||||
status = self.lacus.get_capture_status(capture.uuid)
|
status = self.lacus.get_capture_status(capture.uuid)
|
||||||
if status != crawlers.CaptureStatus.DONE: # TODO ADD GLOBAL TIMEOUT-> Save start time ### print start time
|
if status == crawlers.CaptureStatus.DONE:
|
||||||
|
return capture
|
||||||
|
elif status == crawlers.CaptureStatus.UNKNOWN:
|
||||||
|
capture_start = capture.get_start_time(r_str=False)
|
||||||
|
if int(time.time()) - capture_start > 600: # TODO ADD in new crawler config
|
||||||
|
task = capture.get_task()
|
||||||
|
task.reset()
|
||||||
|
capture.delete()
|
||||||
|
else:
|
||||||
|
capture.update(status)
|
||||||
|
else:
|
||||||
capture.update(status)
|
capture.update(status)
|
||||||
print(capture.uuid, crawlers.CaptureStatus(status).name, int(time.time()))
|
print(capture.uuid, crawlers.CaptureStatus(status).name, int(time.time()))
|
||||||
else:
|
|
||||||
return capture
|
|
||||||
|
|
||||||
except ConnectionError:
|
except ConnectionError:
|
||||||
print(capture.uuid)
|
print(capture.uuid)
|
||||||
|
@ -181,6 +191,7 @@ class Crawler(AbstractModule):
|
||||||
print(domain)
|
print(domain)
|
||||||
|
|
||||||
self.domain = Domain(domain)
|
self.domain = Domain(domain)
|
||||||
|
self.original_domain = Domain(domain)
|
||||||
|
|
||||||
epoch = int(time.time())
|
epoch = int(time.time())
|
||||||
parent_id = task.get_parent()
|
parent_id = task.get_parent()
|
||||||
|
@ -203,12 +214,20 @@ class Crawler(AbstractModule):
|
||||||
# Origin + History + tags
|
# Origin + History + tags
|
||||||
if self.root_item:
|
if self.root_item:
|
||||||
self.domain.set_last_origin(parent_id)
|
self.domain.set_last_origin(parent_id)
|
||||||
self.domain.add_history(epoch, root_item=self.root_item)
|
|
||||||
# Tags
|
# Tags
|
||||||
for tag in task.get_tags():
|
for tag in task.get_tags():
|
||||||
self.domain.add_tag(tag)
|
self.domain.add_tag(tag)
|
||||||
elif self.domain.was_up():
|
self.domain.add_history(epoch, root_item=self.root_item)
|
||||||
self.domain.add_history(epoch, root_item=epoch)
|
|
||||||
|
if self.domain != self.original_domain:
|
||||||
|
self.original_domain.update_daterange(self.date.replace('/', ''))
|
||||||
|
if self.root_item:
|
||||||
|
self.original_domain.set_last_origin(parent_id)
|
||||||
|
# Tags
|
||||||
|
for tag in task.get_tags():
|
||||||
|
self.domain.add_tag(tag)
|
||||||
|
self.original_domain.add_history(epoch, root_item=self.root_item)
|
||||||
|
crawlers.update_last_crawled_domain(self.original_domain.get_domain_type(), self.original_domain.id, epoch)
|
||||||
|
|
||||||
crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch)
|
crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch)
|
||||||
print('capture:', capture.uuid, 'completed')
|
print('capture:', capture.uuid, 'completed')
|
||||||
|
@ -263,7 +282,7 @@ class Crawler(AbstractModule):
|
||||||
title_content = crawlers.extract_title_from_html(entries['html'])
|
title_content = crawlers.extract_title_from_html(entries['html'])
|
||||||
if title_content:
|
if title_content:
|
||||||
title = Titles.create_title(title_content)
|
title = Titles.create_title(title_content)
|
||||||
title.add(item.get_date(), item_id)
|
title.add(item.get_date(), item)
|
||||||
|
|
||||||
# SCREENSHOT
|
# SCREENSHOT
|
||||||
if self.screenshot:
|
if self.screenshot:
|
||||||
|
@ -287,7 +306,12 @@ class Crawler(AbstractModule):
|
||||||
for cookie_name in crawlers.extract_cookies_names_from_har(entries['har']):
|
for cookie_name in crawlers.extract_cookies_names_from_har(entries['har']):
|
||||||
print(cookie_name)
|
print(cookie_name)
|
||||||
cookie = CookiesNames.create(cookie_name)
|
cookie = CookiesNames.create(cookie_name)
|
||||||
cookie.add(self.date.replace('/', ''), self.domain.id)
|
cookie.add(self.date.replace('/', ''), self.domain)
|
||||||
|
for etag_content in crawlers.extract_etag_from_har(entries['har']):
|
||||||
|
print(etag_content)
|
||||||
|
etag = Etags.create(etag_content)
|
||||||
|
etag.add(self.date.replace('/', ''), self.domain)
|
||||||
|
crawlers.extract_hhhash(entries['har'], self.domain.id, self.date.replace('/', ''))
|
||||||
|
|
||||||
# Next Children
|
# Next Children
|
||||||
entries_children = entries.get('children')
|
entries_children = entries.get('children')
|
||||||
|
|
|
@ -8,9 +8,12 @@ Import Content
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
|
import logging
|
||||||
|
import logging.config
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from abc import ABC
|
from abc import ABC
|
||||||
|
from ssl import create_default_context
|
||||||
|
|
||||||
import smtplib
|
import smtplib
|
||||||
from email.mime.multipart import MIMEMultipart
|
from email.mime.multipart import MIMEMultipart
|
||||||
|
@ -22,17 +25,22 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
##################################
|
##################################
|
||||||
# Import Project packages
|
# Import Project packages
|
||||||
##################################
|
##################################
|
||||||
|
from lib import ail_logger
|
||||||
from exporter.abstract_exporter import AbstractExporter
|
from exporter.abstract_exporter import AbstractExporter
|
||||||
from lib.ConfigLoader import ConfigLoader
|
from lib.ConfigLoader import ConfigLoader
|
||||||
# from lib.objects.abstract_object import AbstractObject
|
# from lib.objects.abstract_object import AbstractObject
|
||||||
# from lib.Tracker import Tracker
|
# from lib.Tracker import Tracker
|
||||||
|
|
||||||
|
logging.config.dictConfig(ail_logger.get_config(name='modules'))
|
||||||
|
|
||||||
|
|
||||||
class MailExporter(AbstractExporter, ABC):
|
class MailExporter(AbstractExporter, ABC):
|
||||||
def __init__(self, host=None, port=None, password=None, user='', sender=''):
|
def __init__(self, host=None, port=None, password=None, user='', sender='', cert_required=None, ca_file=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
config_loader = ConfigLoader()
|
config_loader = ConfigLoader()
|
||||||
|
|
||||||
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||||
|
|
||||||
if host:
|
if host:
|
||||||
self.host = host
|
self.host = host
|
||||||
self.port = port
|
self.port = port
|
||||||
|
@ -45,6 +53,15 @@ class MailExporter(AbstractExporter, ABC):
|
||||||
self.pw = config_loader.get_config_str("Notifications", "sender_pw")
|
self.pw = config_loader.get_config_str("Notifications", "sender_pw")
|
||||||
if self.pw == 'None':
|
if self.pw == 'None':
|
||||||
self.pw = None
|
self.pw = None
|
||||||
|
if cert_required is not None:
|
||||||
|
self.cert_required = bool(cert_required)
|
||||||
|
self.ca_file = ca_file
|
||||||
|
else:
|
||||||
|
self.cert_required = config_loader.get_config_boolean("Notifications", "cert_required")
|
||||||
|
if self.cert_required:
|
||||||
|
self.ca_file = config_loader.get_config_str("Notifications", "ca_file")
|
||||||
|
else:
|
||||||
|
self.ca_file = None
|
||||||
if user:
|
if user:
|
||||||
self.user = user
|
self.user = user
|
||||||
else:
|
else:
|
||||||
|
@ -67,8 +84,12 @@ class MailExporter(AbstractExporter, ABC):
|
||||||
smtp_server = smtplib.SMTP(self.host, self.port)
|
smtp_server = smtplib.SMTP(self.host, self.port)
|
||||||
smtp_server.starttls()
|
smtp_server.starttls()
|
||||||
except smtplib.SMTPNotSupportedError:
|
except smtplib.SMTPNotSupportedError:
|
||||||
print("The server does not support the STARTTLS extension.")
|
self.logger.info(f"The server {self.host}:{self.port} does not support the STARTTLS extension.")
|
||||||
smtp_server = smtplib.SMTP_SSL(self.host, self.port)
|
if self.cert_required:
|
||||||
|
context = create_default_context(cafile=self.ca_file)
|
||||||
|
else:
|
||||||
|
context = None
|
||||||
|
smtp_server = smtplib.SMTP_SSL(self.host, self.port, context=context)
|
||||||
|
|
||||||
smtp_server.ehlo()
|
smtp_server.ehlo()
|
||||||
if self.user is not None:
|
if self.user is not None:
|
||||||
|
@ -80,7 +101,7 @@ class MailExporter(AbstractExporter, ABC):
|
||||||
return smtp_server
|
return smtp_server
|
||||||
# except Exception as err:
|
# except Exception as err:
|
||||||
# traceback.print_tb(err.__traceback__)
|
# traceback.print_tb(err.__traceback__)
|
||||||
# logger.warning(err)
|
# self.logger.warning(err)
|
||||||
|
|
||||||
def _export(self, recipient, subject, body):
|
def _export(self, recipient, subject, body):
|
||||||
mime_msg = MIMEMultipart()
|
mime_msg = MIMEMultipart()
|
||||||
|
@ -95,8 +116,8 @@ class MailExporter(AbstractExporter, ABC):
|
||||||
smtp_client.quit()
|
smtp_client.quit()
|
||||||
# except Exception as err:
|
# except Exception as err:
|
||||||
# traceback.print_tb(err.__traceback__)
|
# traceback.print_tb(err.__traceback__)
|
||||||
# logger.warning(err)
|
# self.logger.warning(err)
|
||||||
print(f'Send notification: {subject} to {recipient}')
|
self.logger.info(f'Send notification: {subject} to {recipient}')
|
||||||
|
|
||||||
class MailExporterTracker(MailExporter):
|
class MailExporterTracker(MailExporter):
|
||||||
|
|
||||||
|
|
|
@ -87,13 +87,16 @@ class FeederImporter(AbstractImporter):
|
||||||
feeder_name = feeder.get_name()
|
feeder_name = feeder.get_name()
|
||||||
print(f'importing: {feeder_name} feeder')
|
print(f'importing: {feeder_name} feeder')
|
||||||
|
|
||||||
item_id = feeder.get_item_id()
|
item_id = feeder.get_item_id() # TODO replace me with object global id
|
||||||
# process meta
|
# process meta
|
||||||
if feeder.get_json_meta():
|
if feeder.get_json_meta():
|
||||||
feeder.process_meta()
|
feeder.process_meta()
|
||||||
gzip64_content = feeder.get_gzip64_content()
|
|
||||||
|
|
||||||
return f'{feeder_name} {item_id} {gzip64_content}'
|
if feeder_name == 'telegram':
|
||||||
|
return item_id # TODO support UI dashboard
|
||||||
|
else:
|
||||||
|
gzip64_content = feeder.get_gzip64_content()
|
||||||
|
return f'{feeder_name} {item_id} {gzip64_content}'
|
||||||
|
|
||||||
|
|
||||||
class FeederModuleImporter(AbstractModule):
|
class FeederModuleImporter(AbstractModule):
|
||||||
|
|
|
@ -35,7 +35,7 @@ class PystemonImporter(AbstractImporter):
|
||||||
print(item_id)
|
print(item_id)
|
||||||
if item_id:
|
if item_id:
|
||||||
print(item_id)
|
print(item_id)
|
||||||
full_item_path = os.path.join(self.dir_pystemon, item_id) # TODO SANITIZE PATH
|
full_item_path = os.path.join(self.dir_pystemon, item_id) # TODO SANITIZE PATH
|
||||||
# Check if pystemon file exists
|
# Check if pystemon file exists
|
||||||
if not os.path.isfile(full_item_path):
|
if not os.path.isfile(full_item_path):
|
||||||
print(f'Error: {full_item_path}, file not found')
|
print(f'Error: {full_item_path}, file not found')
|
||||||
|
@ -47,7 +47,12 @@ class PystemonImporter(AbstractImporter):
|
||||||
if not content:
|
if not content:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return self.create_message(item_id, content, source='pystemon')
|
if full_item_path[-3:] == '.gz':
|
||||||
|
gzipped = True
|
||||||
|
else:
|
||||||
|
gzipped = False
|
||||||
|
|
||||||
|
return self.create_message(item_id, content, gzipped=gzipped, source='pystemon')
|
||||||
|
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
print(f'Error: {full_item_path}, IOError')
|
print(f'Error: {full_item_path}, IOError')
|
||||||
|
|
|
@ -89,7 +89,7 @@ class AbstractImporter(ABC): # TODO ail queues
|
||||||
if not gzipped:
|
if not gzipped:
|
||||||
content = self.b64_gzip(content)
|
content = self.b64_gzip(content)
|
||||||
elif not b64:
|
elif not b64:
|
||||||
content = self.b64(gzipped)
|
content = self.b64(content)
|
||||||
if not content:
|
if not content:
|
||||||
return None
|
return None
|
||||||
if isinstance(content, bytes):
|
if isinstance(content, bytes):
|
||||||
|
|
|
@ -17,7 +17,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
##################################
|
##################################
|
||||||
from importer.feeders.Default import DefaultFeeder
|
from importer.feeders.Default import DefaultFeeder
|
||||||
from lib.objects.Usernames import Username
|
from lib.objects.Usernames import Username
|
||||||
from lib import item_basic
|
from lib.objects.Items import Item
|
||||||
|
|
||||||
|
|
||||||
class JabberFeeder(DefaultFeeder):
|
class JabberFeeder(DefaultFeeder):
|
||||||
|
@ -36,7 +36,7 @@ class JabberFeeder(DefaultFeeder):
|
||||||
self.item_id = f'{item_id}.gz'
|
self.item_id = f'{item_id}.gz'
|
||||||
return self.item_id
|
return self.item_id
|
||||||
|
|
||||||
def process_meta(self):
|
def process_meta(self): # TODO replace me by message
|
||||||
"""
|
"""
|
||||||
Process JSON meta field.
|
Process JSON meta field.
|
||||||
"""
|
"""
|
||||||
|
@ -44,10 +44,12 @@ class JabberFeeder(DefaultFeeder):
|
||||||
# item_basic.add_map_obj_id_item_id(jabber_id, item_id, 'jabber_id') ##############################################
|
# item_basic.add_map_obj_id_item_id(jabber_id, item_id, 'jabber_id') ##############################################
|
||||||
to = str(self.json_data['meta']['jabber:to'])
|
to = str(self.json_data['meta']['jabber:to'])
|
||||||
fr = str(self.json_data['meta']['jabber:from'])
|
fr = str(self.json_data['meta']['jabber:from'])
|
||||||
date = item_basic.get_item_date(item_id)
|
|
||||||
|
item = Item(self.item_id)
|
||||||
|
date = item.get_date()
|
||||||
|
|
||||||
user_to = Username(to, 'jabber')
|
user_to = Username(to, 'jabber')
|
||||||
user_fr = Username(fr, 'jabber')
|
user_fr = Username(fr, 'jabber')
|
||||||
user_to.add(date, self.item_id)
|
user_to.add(date, item)
|
||||||
user_fr.add(date, self.item_id)
|
user_fr.add(date, item)
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -16,8 +16,28 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
# Import Project packages
|
# Import Project packages
|
||||||
##################################
|
##################################
|
||||||
from importer.feeders.Default import DefaultFeeder
|
from importer.feeders.Default import DefaultFeeder
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
from lib.objects.Chats import Chat
|
||||||
|
from lib.objects import Messages
|
||||||
|
from lib.objects import UsersAccount
|
||||||
from lib.objects.Usernames import Username
|
from lib.objects.Usernames import Username
|
||||||
from lib import item_basic
|
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import gzip
|
||||||
|
def gunzip_bytes_obj(bytes_obj):
|
||||||
|
gunzipped_bytes_obj = None
|
||||||
|
try:
|
||||||
|
in_ = io.BytesIO()
|
||||||
|
in_.write(bytes_obj)
|
||||||
|
in_.seek(0)
|
||||||
|
|
||||||
|
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
|
||||||
|
gunzipped_bytes_obj = fo.read()
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Global; Invalid Gzip file: {e}')
|
||||||
|
|
||||||
|
return gunzipped_bytes_obj
|
||||||
|
|
||||||
class TelegramFeeder(DefaultFeeder):
|
class TelegramFeeder(DefaultFeeder):
|
||||||
|
|
||||||
|
@ -26,31 +46,90 @@ class TelegramFeeder(DefaultFeeder):
|
||||||
self.name = 'telegram'
|
self.name = 'telegram'
|
||||||
|
|
||||||
# define item id
|
# define item id
|
||||||
def get_item_id(self):
|
def get_item_id(self): # TODO rename self.item_id
|
||||||
# TODO use telegram message date
|
# Get message date
|
||||||
date = datetime.date.today().strftime("%Y/%m/%d")
|
timestamp = self.json_data['meta']['date']['timestamp'] # TODO CREATE DEFAULT TIMESTAMP
|
||||||
channel_id = str(self.json_data['meta']['channel_id'])
|
# if self.json_data['meta'].get('date'):
|
||||||
message_id = str(self.json_data['meta']['message_id'])
|
# date = datetime.datetime.fromtimestamp( self.json_data['meta']['date']['timestamp'])
|
||||||
item_id = f'{channel_id}_{message_id}'
|
# date = date.strftime('%Y/%m/%d')
|
||||||
item_id = os.path.join('telegram', date, item_id)
|
# else:
|
||||||
self.item_id = f'{item_id}.gz'
|
# date = datetime.date.today().strftime("%Y/%m/%d")
|
||||||
|
chat_id = str(self.json_data['meta']['chat']['id'])
|
||||||
|
message_id = str(self.json_data['meta']['id'])
|
||||||
|
self.item_id = Messages.create_obj_id('telegram', chat_id, message_id, timestamp) # TODO rename self.item_id
|
||||||
return self.item_id
|
return self.item_id
|
||||||
|
|
||||||
def process_meta(self):
|
def process_meta(self):
|
||||||
"""
|
"""
|
||||||
Process JSON meta field.
|
Process JSON meta field.
|
||||||
"""
|
"""
|
||||||
# channel_id = str(self.json_data['meta']['channel_id'])
|
# message chat
|
||||||
# message_id = str(self.json_data['meta']['message_id'])
|
meta = self.json_data['meta']
|
||||||
# telegram_id = f'{channel_id}_{message_id}'
|
mess_id = self.json_data['meta']['id']
|
||||||
# item_basic.add_map_obj_id_item_id(telegram_id, item_id, 'telegram_id') #########################################
|
if meta.get('reply_to'):
|
||||||
user = None
|
reply_to_id = meta['reply_to']['id']
|
||||||
if self.json_data['meta'].get('user'):
|
else:
|
||||||
user = str(self.json_data['meta']['user'])
|
reply_to_id = None
|
||||||
elif self.json_data['meta'].get('channel'):
|
|
||||||
user = str(self.json_data['meta']['channel'].get('username'))
|
timestamp = meta['date']['timestamp']
|
||||||
if user:
|
date = datetime.datetime.fromtimestamp(timestamp)
|
||||||
date = item_basic.get_item_date(self.item_id)
|
date = date.strftime('%Y%m%d')
|
||||||
username = Username(user, 'telegram')
|
|
||||||
username.add(date, self.item_id)
|
if self.json_data.get('translation'):
|
||||||
|
translation = self.json_data['translation']
|
||||||
|
else:
|
||||||
|
translation = None
|
||||||
|
decoded = base64.standard_b64decode(self.json_data['data'])
|
||||||
|
content = gunzip_bytes_obj(decoded)
|
||||||
|
message = Messages.create(self.item_id, content, translation=translation)
|
||||||
|
|
||||||
|
if meta.get('chat'):
|
||||||
|
chat = Chat(meta['chat']['id'], 'telegram')
|
||||||
|
|
||||||
|
if meta['chat'].get('username'):
|
||||||
|
chat_username = Username(meta['chat']['username'], 'telegram')
|
||||||
|
chat.update_username_timeline(chat_username.get_global_id(), timestamp)
|
||||||
|
|
||||||
|
# Chat---Message
|
||||||
|
chat.add(date)
|
||||||
|
chat.add_message(message.get_global_id(), timestamp, mess_id, reply_id=reply_to_id)
|
||||||
|
else:
|
||||||
|
chat = None
|
||||||
|
|
||||||
|
# message sender
|
||||||
|
if meta.get('sender'): # TODO handle message channel forward - check if is user
|
||||||
|
user_id = meta['sender']['id']
|
||||||
|
user_account = UsersAccount.UserAccount(user_id, 'telegram')
|
||||||
|
# UserAccount---Message
|
||||||
|
user_account.add(date, obj=message)
|
||||||
|
# UserAccount---Chat
|
||||||
|
user_account.add_correlation(chat.type, chat.get_subtype(r_str=True), chat.id)
|
||||||
|
|
||||||
|
if meta['sender'].get('firstname'):
|
||||||
|
user_account.set_first_name(meta['sender']['firstname'])
|
||||||
|
if meta['sender'].get('lastname'):
|
||||||
|
user_account.set_last_name(meta['sender']['lastname'])
|
||||||
|
if meta['sender'].get('phone'):
|
||||||
|
user_account.set_phone(meta['sender']['phone'])
|
||||||
|
|
||||||
|
if meta['sender'].get('username'):
|
||||||
|
username = Username(meta['sender']['username'], 'telegram')
|
||||||
|
# TODO timeline or/and correlation ????
|
||||||
|
user_account.add_correlation(username.type, username.get_subtype(r_str=True), username.id)
|
||||||
|
user_account.update_username_timeline(username.get_global_id(), timestamp)
|
||||||
|
|
||||||
|
# Username---Message
|
||||||
|
username.add(date) # TODO # correlation message ???
|
||||||
|
|
||||||
|
# if chat: # TODO Chat---Username correlation ???
|
||||||
|
# # Chat---Username
|
||||||
|
# chat.add_correlation(username.type, username.get_subtype(r_str=True), username.id)
|
||||||
|
|
||||||
|
# if meta.get('fwd_from'):
|
||||||
|
# if meta['fwd_from'].get('post_author') # user first name
|
||||||
|
|
||||||
|
# TODO reply threads ????
|
||||||
|
# message edit ????
|
||||||
|
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -17,7 +17,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
##################################
|
##################################
|
||||||
from importer.feeders.Default import DefaultFeeder
|
from importer.feeders.Default import DefaultFeeder
|
||||||
from lib.objects.Usernames import Username
|
from lib.objects.Usernames import Username
|
||||||
from lib import item_basic
|
from lib.objects.Items import Item
|
||||||
|
|
||||||
class TwitterFeeder(DefaultFeeder):
|
class TwitterFeeder(DefaultFeeder):
|
||||||
|
|
||||||
|
@ -40,9 +40,9 @@ class TwitterFeeder(DefaultFeeder):
|
||||||
'''
|
'''
|
||||||
# tweet_id = str(self.json_data['meta']['twitter:tweet_id'])
|
# tweet_id = str(self.json_data['meta']['twitter:tweet_id'])
|
||||||
# item_basic.add_map_obj_id_item_id(tweet_id, item_id, 'twitter_id') ############################################
|
# item_basic.add_map_obj_id_item_id(tweet_id, item_id, 'twitter_id') ############################################
|
||||||
|
item = Item(self.item_id)
|
||||||
date = item_basic.get_item_date(self.item_id)
|
date = item.get_date()
|
||||||
user = str(self.json_data['meta']['twitter:id'])
|
user = str(self.json_data['meta']['twitter:id'])
|
||||||
username = Username(user, 'twitter')
|
username = Username(user, 'twitter')
|
||||||
username.add(date, item_id)
|
username.add(date, item)
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -235,18 +235,27 @@ class Investigation(object):
|
||||||
objs.append(dict_obj)
|
objs.append(dict_obj)
|
||||||
return objs
|
return objs
|
||||||
|
|
||||||
|
def get_objects_comment(self, obj_global_id):
|
||||||
|
return r_tracking.hget(f'investigations:objs:comment:{self.uuid}', obj_global_id)
|
||||||
|
|
||||||
|
def set_objects_comment(self, obj_global_id, comment):
|
||||||
|
if comment:
|
||||||
|
r_tracking.hset(f'investigations:objs:comment:{self.uuid}', obj_global_id, comment)
|
||||||
|
|
||||||
# # TODO: def register_object(self, Object): in OBJECT CLASS
|
# # TODO: def register_object(self, Object): in OBJECT CLASS
|
||||||
|
|
||||||
def register_object(self, obj_id, obj_type, subtype):
|
def register_object(self, obj_id, obj_type, subtype, comment=''):
|
||||||
r_tracking.sadd(f'investigations:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
|
r_tracking.sadd(f'investigations:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
|
||||||
r_tracking.sadd(f'obj:investigations:{obj_type}:{subtype}:{obj_id}', self.uuid)
|
r_tracking.sadd(f'obj:investigations:{obj_type}:{subtype}:{obj_id}', self.uuid)
|
||||||
|
if comment:
|
||||||
|
self.set_objects_comment(f'{obj_type}:{subtype}:{obj_id}', comment)
|
||||||
timestamp = int(time.time())
|
timestamp = int(time.time())
|
||||||
self.set_last_change(timestamp)
|
self.set_last_change(timestamp)
|
||||||
|
|
||||||
|
|
||||||
def unregister_object(self, obj_id, obj_type, subtype):
|
def unregister_object(self, obj_id, obj_type, subtype):
|
||||||
r_tracking.srem(f'investigations:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
|
r_tracking.srem(f'investigations:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
|
||||||
r_tracking.srem(f'obj:investigations:{obj_type}:{subtype}:{obj_id}', self.uuid)
|
r_tracking.srem(f'obj:investigations:{obj_type}:{subtype}:{obj_id}', self.uuid)
|
||||||
|
r_tracking.hdel(f'investigations:objs:comment:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
|
||||||
timestamp = int(time.time())
|
timestamp = int(time.time())
|
||||||
self.set_last_change(timestamp)
|
self.set_last_change(timestamp)
|
||||||
|
|
||||||
|
@ -351,7 +360,7 @@ def get_investigations_selector():
|
||||||
for investigation_uuid in get_all_investigations():
|
for investigation_uuid in get_all_investigations():
|
||||||
investigation = Investigation(investigation_uuid)
|
investigation = Investigation(investigation_uuid)
|
||||||
name = investigation.get_info()
|
name = investigation.get_info()
|
||||||
l_investigations.append({"id":investigation_uuid, "name": name})
|
l_investigations.append({"id": investigation_uuid, "name": name})
|
||||||
return l_investigations
|
return l_investigations
|
||||||
|
|
||||||
#{id:'8dc4b81aeff94a9799bd70ba556fa345',name:"Paris"}
|
#{id:'8dc4b81aeff94a9799bd70ba556fa345',name:"Paris"}
|
||||||
|
@ -453,7 +462,11 @@ def api_register_object(json_dict):
|
||||||
if subtype == 'None':
|
if subtype == 'None':
|
||||||
subtype = ''
|
subtype = ''
|
||||||
obj_id = json_dict.get('id', '').replace(' ', '')
|
obj_id = json_dict.get('id', '').replace(' ', '')
|
||||||
res = investigation.register_object(obj_id, obj_type, subtype)
|
|
||||||
|
comment = json_dict.get('comment', '')
|
||||||
|
# if comment:
|
||||||
|
# comment = escape(comment)
|
||||||
|
res = investigation.register_object(obj_id, obj_type, subtype, comment=comment)
|
||||||
return res, 200
|
return res, 200
|
||||||
|
|
||||||
def api_unregister_object(json_dict):
|
def api_unregister_object(json_dict):
|
||||||
|
|
|
@ -338,7 +338,7 @@ def get_galaxy_meta(galaxy_name, nb_active_tags=False):
|
||||||
else:
|
else:
|
||||||
meta['icon'] = f'fas fa-{icon}'
|
meta['icon'] = f'fas fa-{icon}'
|
||||||
if nb_active_tags:
|
if nb_active_tags:
|
||||||
meta['nb_active_tags'] = get_galaxy_nb_tags_enabled(galaxy)
|
meta['nb_active_tags'] = get_galaxy_nb_tags_enabled(galaxy.type)
|
||||||
meta['nb_tags'] = len(get_galaxy_tags(galaxy.type))
|
meta['nb_tags'] = len(get_galaxy_tags(galaxy.type))
|
||||||
return meta
|
return meta
|
||||||
|
|
||||||
|
|
|
@ -207,6 +207,13 @@ class Tracker:
|
||||||
if filters:
|
if filters:
|
||||||
self._set_field('filters', json.dumps(filters))
|
self._set_field('filters', json.dumps(filters))
|
||||||
|
|
||||||
|
def del_filters(self, tracker_type, to_track):
|
||||||
|
filters = self.get_filters()
|
||||||
|
for obj_type in filters:
|
||||||
|
r_tracker.srem(f'trackers:objs:{tracker_type}:{obj_type}', to_track)
|
||||||
|
r_tracker.srem(f'trackers:uuid:{tracker_type}:{to_track}', f'{self.uuid}:{obj_type}')
|
||||||
|
r_tracker.hdel(f'tracker:{self.uuid}', 'filters')
|
||||||
|
|
||||||
def get_tracked(self):
|
def get_tracked(self):
|
||||||
return self._get_field('tracked')
|
return self._get_field('tracked')
|
||||||
|
|
||||||
|
@ -513,6 +520,7 @@ class Tracker:
|
||||||
self._set_mails(mails)
|
self._set_mails(mails)
|
||||||
|
|
||||||
# Filters
|
# Filters
|
||||||
|
self.del_filters(old_type, old_to_track)
|
||||||
if not filters:
|
if not filters:
|
||||||
filters = {}
|
filters = {}
|
||||||
for obj_type in get_objects_tracked():
|
for obj_type in get_objects_tracked():
|
||||||
|
@ -522,9 +530,6 @@ class Tracker:
|
||||||
for obj_type in filters:
|
for obj_type in filters:
|
||||||
r_tracker.sadd(f'trackers:objs:{tracker_type}:{obj_type}', to_track)
|
r_tracker.sadd(f'trackers:objs:{tracker_type}:{obj_type}', to_track)
|
||||||
r_tracker.sadd(f'trackers:uuid:{tracker_type}:{to_track}', f'{self.uuid}:{obj_type}')
|
r_tracker.sadd(f'trackers:uuid:{tracker_type}:{to_track}', f'{self.uuid}:{obj_type}')
|
||||||
if tracker_type != old_type:
|
|
||||||
r_tracker.srem(f'trackers:objs:{old_type}:{obj_type}', old_to_track)
|
|
||||||
r_tracker.srem(f'trackers:uuid:{old_type}:{old_to_track}', f'{self.uuid}:{obj_type}')
|
|
||||||
|
|
||||||
# Refresh Trackers
|
# Refresh Trackers
|
||||||
trigger_trackers_refresh(tracker_type)
|
trigger_trackers_refresh(tracker_type)
|
||||||
|
@ -650,14 +655,14 @@ def get_user_trackers_meta(user_id, tracker_type=None):
|
||||||
metas = []
|
metas = []
|
||||||
for tracker_uuid in get_user_trackers(user_id, tracker_type=tracker_type):
|
for tracker_uuid in get_user_trackers(user_id, tracker_type=tracker_type):
|
||||||
tracker = Tracker(tracker_uuid)
|
tracker = Tracker(tracker_uuid)
|
||||||
metas.append(tracker.get_meta(options={'mails', 'sparkline', 'tags'}))
|
metas.append(tracker.get_meta(options={'description', 'mails', 'sparkline', 'tags'}))
|
||||||
return metas
|
return metas
|
||||||
|
|
||||||
def get_global_trackers_meta(tracker_type=None):
|
def get_global_trackers_meta(tracker_type=None):
|
||||||
metas = []
|
metas = []
|
||||||
for tracker_uuid in get_global_trackers(tracker_type=tracker_type):
|
for tracker_uuid in get_global_trackers(tracker_type=tracker_type):
|
||||||
tracker = Tracker(tracker_uuid)
|
tracker = Tracker(tracker_uuid)
|
||||||
metas.append(tracker.get_meta(options={'mails', 'sparkline', 'tags'}))
|
metas.append(tracker.get_meta(options={'description', 'mails', 'sparkline', 'tags'}))
|
||||||
return metas
|
return metas
|
||||||
|
|
||||||
def get_users_trackers_meta():
|
def get_users_trackers_meta():
|
||||||
|
|
|
@ -247,7 +247,10 @@ class User(UserMixin):
|
||||||
self.id = "__anonymous__"
|
self.id = "__anonymous__"
|
||||||
|
|
||||||
def exists(self):
|
def exists(self):
|
||||||
return self.id != "__anonymous__"
|
if self.id == "__anonymous__":
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return r_serv_db.exists(f'ail:user:metadata:{self.id}')
|
||||||
|
|
||||||
# return True or False
|
# return True or False
|
||||||
# def is_authenticated():
|
# def is_authenticated():
|
||||||
|
@ -287,3 +290,6 @@ class User(UserMixin):
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def get_role(self):
|
||||||
|
return r_serv_db.hget(f'ail:user:metadata:{self.id}', 'role')
|
||||||
|
|
|
@ -15,8 +15,8 @@ config_loader = ConfigLoader()
|
||||||
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
|
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
|
||||||
config_loader = None
|
config_loader = None
|
||||||
|
|
||||||
AIL_OBJECTS = sorted({'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'favicon', 'item', 'pgp',
|
AIL_OBJECTS = sorted({'chat', 'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'etag', 'favicon', 'hhhash', 'item',
|
||||||
'screenshot', 'title', 'username'})
|
'pgp', 'screenshot', 'title', 'user-account', 'username'})
|
||||||
|
|
||||||
def get_ail_uuid():
|
def get_ail_uuid():
|
||||||
ail_uuid = r_serv_db.get('ail:uuid')
|
ail_uuid = r_serv_db.get('ail:uuid')
|
||||||
|
@ -38,9 +38,11 @@ def get_all_objects():
|
||||||
return AIL_OBJECTS
|
return AIL_OBJECTS
|
||||||
|
|
||||||
def get_objects_with_subtypes():
|
def get_objects_with_subtypes():
|
||||||
return ['cryptocurrency', 'pgp', 'username']
|
return ['chat', 'cryptocurrency', 'pgp', 'username']
|
||||||
|
|
||||||
def get_object_all_subtypes(obj_type):
|
def get_object_all_subtypes(obj_type):
|
||||||
|
if obj_type == 'chat':
|
||||||
|
return ['discord', 'jabber', 'telegram']
|
||||||
if obj_type == 'cryptocurrency':
|
if obj_type == 'cryptocurrency':
|
||||||
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash']
|
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash']
|
||||||
if obj_type == 'pgp':
|
if obj_type == 'pgp':
|
||||||
|
@ -66,6 +68,14 @@ def get_all_objects_with_subtypes_tuple():
|
||||||
str_objs.append((obj_type, ''))
|
str_objs.append((obj_type, ''))
|
||||||
return str_objs
|
return str_objs
|
||||||
|
|
||||||
|
def unpack_obj_global_id(global_id, r_type='tuple'):
|
||||||
|
if r_type == 'dict':
|
||||||
|
obj = global_id.split(':', 2)
|
||||||
|
return {'type': obj[0], 'subtype': obj[1], 'id': obj['2']}
|
||||||
|
else: # tuple(type, subtype, id)
|
||||||
|
return global_id.split(':', 2)
|
||||||
|
|
||||||
|
|
||||||
##-- AIL OBJECTS --##
|
##-- AIL OBJECTS --##
|
||||||
|
|
||||||
#### Redis ####
|
#### Redis ####
|
||||||
|
|
|
@ -15,38 +15,15 @@ config_loader = ConfigLoader()
|
||||||
r_db = config_loader.get_db_conn("Kvrocks_DB")
|
r_db = config_loader.get_db_conn("Kvrocks_DB")
|
||||||
config_loader = None
|
config_loader = None
|
||||||
|
|
||||||
BACKGROUND_UPDATES = {
|
# # # # # # # #
|
||||||
'v1.5': {
|
# #
|
||||||
'nb_updates': 5,
|
# UPDATE #
|
||||||
'message': 'Tags and Screenshots'
|
# #
|
||||||
},
|
# # # # # # # #
|
||||||
'v2.4': {
|
|
||||||
'nb_updates': 1,
|
|
||||||
'message': ' Domains Tags and Correlations'
|
|
||||||
},
|
|
||||||
'v2.6': {
|
|
||||||
'nb_updates': 1,
|
|
||||||
'message': 'Domains Tags and Correlations'
|
|
||||||
},
|
|
||||||
'v2.7': {
|
|
||||||
'nb_updates': 1,
|
|
||||||
'message': 'Domains Tags'
|
|
||||||
},
|
|
||||||
'v3.4': {
|
|
||||||
'nb_updates': 1,
|
|
||||||
'message': 'Domains Languages'
|
|
||||||
},
|
|
||||||
'v3.7': {
|
|
||||||
'nb_updates': 1,
|
|
||||||
'message': 'Trackers first_seen/last_seen'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def get_ail_version():
|
def get_ail_version():
|
||||||
return r_db.get('ail:version')
|
return r_db.get('ail:version')
|
||||||
|
|
||||||
|
|
||||||
def get_ail_float_version():
|
def get_ail_float_version():
|
||||||
version = get_ail_version()
|
version = get_ail_version()
|
||||||
if version:
|
if version:
|
||||||
|
@ -55,6 +32,179 @@ def get_ail_float_version():
|
||||||
version = 0
|
version = 0
|
||||||
return version
|
return version
|
||||||
|
|
||||||
|
# # # - - # # #
|
||||||
|
|
||||||
|
# # # # # # # # # # # #
|
||||||
|
# #
|
||||||
|
# UPDATE BACKGROUND #
|
||||||
|
# #
|
||||||
|
# # # # # # # # # # # #
|
||||||
|
|
||||||
|
|
||||||
|
BACKGROUND_UPDATES = {
|
||||||
|
'v5.2': {
|
||||||
|
'message': 'Compress HAR',
|
||||||
|
'scripts': ['compress_har.py']
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
class AILBackgroundUpdate:
|
||||||
|
"""
|
||||||
|
AIL Background Update.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, version):
|
||||||
|
self.version = version
|
||||||
|
|
||||||
|
def _get_field(self, field):
|
||||||
|
return r_db.hget('ail:update:background', field)
|
||||||
|
|
||||||
|
def _set_field(self, field, value):
|
||||||
|
r_db.hset('ail:update:background', field, value)
|
||||||
|
|
||||||
|
def get_version(self):
|
||||||
|
return self.version
|
||||||
|
|
||||||
|
def get_message(self):
|
||||||
|
return BACKGROUND_UPDATES.get(self.version, {}).get('message', '')
|
||||||
|
|
||||||
|
def get_error(self):
|
||||||
|
return self._get_field('error')
|
||||||
|
|
||||||
|
def set_error(self, error): # TODO ADD LOGS
|
||||||
|
self._set_field('error', error)
|
||||||
|
|
||||||
|
def get_nb_scripts(self):
|
||||||
|
return int(len(BACKGROUND_UPDATES.get(self.version, {}).get('scripts', [''])))
|
||||||
|
|
||||||
|
def get_scripts(self):
|
||||||
|
return BACKGROUND_UPDATES.get(self.version, {}).get('scripts', [])
|
||||||
|
|
||||||
|
def get_nb_scripts_done(self):
|
||||||
|
done = self._get_field('done')
|
||||||
|
try:
|
||||||
|
done = int(done)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
done = 0
|
||||||
|
return done
|
||||||
|
|
||||||
|
def inc_nb_scripts_done(self):
|
||||||
|
self._set_field('done', self.get_nb_scripts_done() + 1)
|
||||||
|
|
||||||
|
def get_script(self):
|
||||||
|
return self._get_field('script')
|
||||||
|
|
||||||
|
def get_script_path(self):
|
||||||
|
path = os.path.basename(self.get_script())
|
||||||
|
if path:
|
||||||
|
return os.path.join(os.environ['AIL_HOME'], 'update', self.version, path)
|
||||||
|
|
||||||
|
def get_nb_to_update(self): # TODO use cache ?????
|
||||||
|
nb_to_update = self._get_field('nb_to_update')
|
||||||
|
if not nb_to_update:
|
||||||
|
nb_to_update = 1
|
||||||
|
return int(nb_to_update)
|
||||||
|
|
||||||
|
def set_nb_to_update(self, nb):
|
||||||
|
self._set_field('nb_to_update', int(nb))
|
||||||
|
|
||||||
|
def get_nb_updated(self): # TODO use cache ?????
|
||||||
|
nb_updated = self._get_field('nb_updated')
|
||||||
|
if not nb_updated:
|
||||||
|
nb_updated = 0
|
||||||
|
return int(nb_updated)
|
||||||
|
|
||||||
|
def inc_nb_updated(self): # TODO use cache ?????
|
||||||
|
r_db.hincrby('ail:update:background', 'nb_updated', 1)
|
||||||
|
|
||||||
|
def get_progress(self): # TODO use cache ?????
|
||||||
|
return self._get_field('progress')
|
||||||
|
|
||||||
|
def set_progress(self, progress):
|
||||||
|
self._set_field('progress', progress)
|
||||||
|
|
||||||
|
def update_progress(self):
|
||||||
|
nb_updated = self.get_nb_updated()
|
||||||
|
nb_to_update = self.get_nb_to_update()
|
||||||
|
if nb_updated == nb_to_update:
|
||||||
|
progress = 100
|
||||||
|
elif nb_updated > nb_to_update:
|
||||||
|
progress = 99
|
||||||
|
else:
|
||||||
|
progress = int((nb_updated * 100) / nb_to_update)
|
||||||
|
self.set_progress(progress)
|
||||||
|
print(f'{nb_updated}/{nb_to_update} updated {progress}%')
|
||||||
|
return progress
|
||||||
|
|
||||||
|
def is_running(self):
|
||||||
|
return r_db.hget('ail:update:background', 'version') == self.version
|
||||||
|
|
||||||
|
def get_meta(self, options=set()):
|
||||||
|
meta = {'version': self.get_version(),
|
||||||
|
'error': self.get_error(),
|
||||||
|
'script': self.get_script(),
|
||||||
|
'script_progress': self.get_progress(),
|
||||||
|
'nb_update': self.get_nb_scripts(),
|
||||||
|
'nb_completed': self.get_nb_scripts_done()}
|
||||||
|
meta['progress'] = int(meta['nb_completed'] * 100 / meta['nb_update'])
|
||||||
|
if 'message' in options:
|
||||||
|
meta['message'] = self.get_message()
|
||||||
|
return meta
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
self._set_field('version', self.version)
|
||||||
|
r_db.hdel('ail:update:background', 'error')
|
||||||
|
|
||||||
|
def start_script(self, script):
|
||||||
|
self.clear()
|
||||||
|
self._set_field('script', script)
|
||||||
|
self.set_progress(0)
|
||||||
|
|
||||||
|
def end_script(self):
|
||||||
|
self.set_progress(100)
|
||||||
|
self.inc_nb_scripts_done()
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
r_db.hdel('ail:update:background', 'error')
|
||||||
|
r_db.hdel('ail:update:background', 'progress')
|
||||||
|
r_db.hdel('ail:update:background', 'nb_updated')
|
||||||
|
r_db.hdel('ail:update:background', 'nb_to_update')
|
||||||
|
|
||||||
|
def end(self):
|
||||||
|
r_db.delete('ail:update:background')
|
||||||
|
r_db.srem('ail:updates:background', self.version)
|
||||||
|
|
||||||
|
|
||||||
|
# To Add in update script
|
||||||
|
def add_background_update(version):
|
||||||
|
r_db.sadd('ail:updates:background', version)
|
||||||
|
|
||||||
|
def is_update_background_running():
|
||||||
|
return r_db.exists('ail:update:background')
|
||||||
|
|
||||||
|
def get_update_background_version():
|
||||||
|
return r_db.hget('ail:update:background', 'version')
|
||||||
|
|
||||||
|
def get_update_background_meta(options=set()):
|
||||||
|
version = get_update_background_version()
|
||||||
|
if version:
|
||||||
|
return AILBackgroundUpdate(version).get_meta(options=options)
|
||||||
|
else:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def get_update_background_to_launch():
|
||||||
|
to_launch = []
|
||||||
|
updates = r_db.smembers('ail:updates:background')
|
||||||
|
for version in BACKGROUND_UPDATES:
|
||||||
|
if version in updates:
|
||||||
|
to_launch.append(version)
|
||||||
|
return to_launch
|
||||||
|
|
||||||
|
# # # - - # # #
|
||||||
|
|
||||||
|
##########################################################################################
|
||||||
|
##########################################################################################
|
||||||
|
##########################################################################################
|
||||||
|
|
||||||
def get_ail_all_updates(date_separator='-'):
|
def get_ail_all_updates(date_separator='-'):
|
||||||
dict_update = r_db.hgetall('ail:update_date')
|
dict_update = r_db.hgetall('ail:update_date')
|
||||||
|
@ -87,111 +237,6 @@ def check_version(version):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
#### UPDATE BACKGROUND ####
|
|
||||||
|
|
||||||
def exits_background_update_to_launch():
|
|
||||||
return r_db.scard('ail:update:to_update') != 0
|
|
||||||
|
|
||||||
|
|
||||||
def is_version_in_background_update(version):
|
|
||||||
return r_db.sismember('ail:update:to_update', version)
|
|
||||||
|
|
||||||
|
|
||||||
def get_all_background_updates_to_launch():
|
|
||||||
return r_db.smembers('ail:update:to_update')
|
|
||||||
|
|
||||||
|
|
||||||
def get_current_background_update():
|
|
||||||
return r_db.get('ail:update:update_in_progress')
|
|
||||||
|
|
||||||
|
|
||||||
def get_current_background_update_script():
|
|
||||||
return r_db.get('ail:update:current_background_script')
|
|
||||||
|
|
||||||
|
|
||||||
def get_current_background_update_script_path(version, script_name):
|
|
||||||
return os.path.join(os.environ['AIL_HOME'], 'update', version, script_name)
|
|
||||||
|
|
||||||
|
|
||||||
def get_current_background_nb_update_completed():
|
|
||||||
return r_db.scard('ail:update:update_in_progress:completed')
|
|
||||||
|
|
||||||
|
|
||||||
def get_current_background_update_progress():
|
|
||||||
progress = r_db.get('ail:update:current_background_script_stat')
|
|
||||||
if not progress:
|
|
||||||
progress = 0
|
|
||||||
return int(progress)
|
|
||||||
|
|
||||||
|
|
||||||
def get_background_update_error():
|
|
||||||
return r_db.get('ail:update:error')
|
|
||||||
|
|
||||||
|
|
||||||
def add_background_updates_to_launch(version):
|
|
||||||
return r_db.sadd('ail:update:to_update', version)
|
|
||||||
|
|
||||||
|
|
||||||
def start_background_update(version):
|
|
||||||
r_db.delete('ail:update:error')
|
|
||||||
r_db.set('ail:update:update_in_progress', version)
|
|
||||||
|
|
||||||
|
|
||||||
def set_current_background_update_script(script_name):
|
|
||||||
r_db.set('ail:update:current_background_script', script_name)
|
|
||||||
r_db.set('ail:update:current_background_script_stat', 0)
|
|
||||||
|
|
||||||
|
|
||||||
def set_current_background_update_progress(progress):
|
|
||||||
r_db.set('ail:update:current_background_script_stat', progress)
|
|
||||||
|
|
||||||
|
|
||||||
def set_background_update_error(error):
|
|
||||||
r_db.set('ail:update:error', error)
|
|
||||||
|
|
||||||
|
|
||||||
def end_background_update_script():
|
|
||||||
r_db.sadd('ail:update:update_in_progress:completed')
|
|
||||||
|
|
||||||
|
|
||||||
def end_background_update(version):
|
|
||||||
r_db.delete('ail:update:update_in_progress')
|
|
||||||
r_db.delete('ail:update:current_background_script')
|
|
||||||
r_db.delete('ail:update:current_background_script_stat')
|
|
||||||
r_db.delete('ail:update:update_in_progress:completed')
|
|
||||||
r_db.srem('ail:update:to_update', version)
|
|
||||||
|
|
||||||
|
|
||||||
def clear_background_update():
|
|
||||||
r_db.delete('ail:update:error')
|
|
||||||
r_db.delete('ail:update:update_in_progress')
|
|
||||||
r_db.delete('ail:update:current_background_script')
|
|
||||||
r_db.delete('ail:update:current_background_script_stat')
|
|
||||||
r_db.delete('ail:update:update_in_progress:completed')
|
|
||||||
|
|
||||||
|
|
||||||
def get_update_background_message(version):
|
|
||||||
return BACKGROUND_UPDATES[version]['message']
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: Detect error in subprocess
|
|
||||||
def get_update_background_metadata():
|
|
||||||
dict_update = {}
|
|
||||||
version = get_current_background_update()
|
|
||||||
if version:
|
|
||||||
dict_update['version'] = version
|
|
||||||
dict_update['script'] = get_current_background_update_script()
|
|
||||||
dict_update['script_progress'] = get_current_background_update_progress()
|
|
||||||
dict_update['nb_update'] = BACKGROUND_UPDATES[dict_update['version']]['nb_updates']
|
|
||||||
dict_update['nb_completed'] = get_current_background_nb_update_completed()
|
|
||||||
dict_update['progress'] = int(dict_update['nb_completed'] * 100 / dict_update['nb_update'])
|
|
||||||
dict_update['error'] = get_background_update_error()
|
|
||||||
return dict_update
|
|
||||||
|
|
||||||
|
|
||||||
##-- UPDATE BACKGROUND --##
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
res = check_version('v3.1..1')
|
res = check_version('v3.1..1')
|
||||||
print(res)
|
print(res)
|
||||||
|
|
|
@ -41,17 +41,22 @@ config_loader = None
|
||||||
##################################
|
##################################
|
||||||
|
|
||||||
CORRELATION_TYPES_BY_OBJ = {
|
CORRELATION_TYPES_BY_OBJ = {
|
||||||
|
"chat": ["user-account"], # message or direct correlation like cve, bitcoin, ... ???
|
||||||
"cookie-name": ["domain"],
|
"cookie-name": ["domain"],
|
||||||
"cryptocurrency": ["domain", "item"],
|
"cryptocurrency": ["domain", "item", "message"],
|
||||||
"cve": ["domain", "item"],
|
"cve": ["domain", "item", "message"],
|
||||||
"decoded": ["domain", "item"],
|
"decoded": ["domain", "item", "message"],
|
||||||
"domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "favicon", "item", "pgp", "title", "screenshot", "username"],
|
"domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
|
||||||
|
"etag": ["domain"],
|
||||||
"favicon": ["domain", "item"], # TODO Decoded
|
"favicon": ["domain", "item"], # TODO Decoded
|
||||||
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"],
|
"hhhash": ["domain"],
|
||||||
"pgp": ["domain", "item"],
|
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
|
||||||
|
"message": ["cve", "cryptocurrency", "decoded", "pgp", "user-account"], # chat ??
|
||||||
|
"pgp": ["domain", "item", "message"],
|
||||||
"screenshot": ["domain", "item"],
|
"screenshot": ["domain", "item"],
|
||||||
"title": ["domain", "item"],
|
"title": ["domain", "item"],
|
||||||
"username": ["domain", "item"],
|
"user-account": ["chat", "message"],
|
||||||
|
"username": ["domain", "item", "message"], # TODO chat-user/account
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_obj_correl_types(obj_type):
|
def get_obj_correl_types(obj_type):
|
||||||
|
@ -63,6 +68,8 @@ def sanityze_obj_correl_types(obj_type, correl_types):
|
||||||
correl_types = set(correl_types).intersection(obj_correl_types)
|
correl_types = set(correl_types).intersection(obj_correl_types)
|
||||||
if not correl_types:
|
if not correl_types:
|
||||||
correl_types = obj_correl_types
|
correl_types = obj_correl_types
|
||||||
|
if not correl_types:
|
||||||
|
return []
|
||||||
return correl_types
|
return correl_types
|
||||||
|
|
||||||
def get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type):
|
def get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type):
|
||||||
|
@ -169,18 +176,18 @@ def get_obj_str_id(obj_type, subtype, obj_id):
|
||||||
subtype = ''
|
subtype = ''
|
||||||
return f'{obj_type}:{subtype}:{obj_id}'
|
return f'{obj_type}:{subtype}:{obj_id}'
|
||||||
|
|
||||||
def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, flask_context=False):
|
def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, objs_hidden=set(), flask_context=False):
|
||||||
links = set()
|
links = set()
|
||||||
nodes = set()
|
nodes = set()
|
||||||
meta = {'complete': True, 'objs': set()}
|
meta = {'complete': True, 'objs': set()}
|
||||||
|
|
||||||
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
|
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
|
||||||
|
|
||||||
_get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=filter_types, previous_str_obj='')
|
_get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj='')
|
||||||
return obj_str_id, nodes, links, meta
|
return obj_str_id, nodes, links, meta
|
||||||
|
|
||||||
|
|
||||||
def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], previous_str_obj=''):
|
def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], objs_hidden=set(), previous_str_obj=''):
|
||||||
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
|
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
|
||||||
meta['objs'].add(obj_str_id)
|
meta['objs'].add(obj_str_id)
|
||||||
nodes.add(obj_str_id)
|
nodes.add(obj_str_id)
|
||||||
|
@ -191,6 +198,10 @@ def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id,
|
||||||
for str_obj in obj_correlations[correl_type]:
|
for str_obj in obj_correlations[correl_type]:
|
||||||
subtype2, obj2_id = str_obj.split(':', 1)
|
subtype2, obj2_id = str_obj.split(':', 1)
|
||||||
obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id)
|
obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id)
|
||||||
|
# filter objects to hide
|
||||||
|
if obj2_str_id in objs_hidden:
|
||||||
|
continue
|
||||||
|
|
||||||
meta['objs'].add(obj2_str_id)
|
meta['objs'].add(obj2_str_id)
|
||||||
|
|
||||||
if obj2_str_id == previous_str_obj:
|
if obj2_str_id == previous_str_obj:
|
||||||
|
@ -204,5 +215,5 @@ def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id,
|
||||||
|
|
||||||
if level > 0:
|
if level > 0:
|
||||||
next_level = level - 1
|
next_level = level - 1
|
||||||
_get_correlations_graph_node(links, nodes, meta, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, previous_str_obj=obj_str_id)
|
_get_correlations_graph_node(links, nodes, meta, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj=obj_str_id)
|
||||||
|
|
||||||
|
|
|
@ -39,6 +39,7 @@ from packages import git_status
|
||||||
from packages import Date
|
from packages import Date
|
||||||
from lib.ConfigLoader import ConfigLoader
|
from lib.ConfigLoader import ConfigLoader
|
||||||
from lib.objects.Domains import Domain
|
from lib.objects.Domains import Domain
|
||||||
|
from lib.objects import HHHashs
|
||||||
from lib.objects.Items import Item
|
from lib.objects.Items import Item
|
||||||
|
|
||||||
config_loader = ConfigLoader()
|
config_loader = ConfigLoader()
|
||||||
|
@ -134,7 +135,7 @@ def unpack_url(url):
|
||||||
# # # # # # # # TODO CREATE NEW OBJECT
|
# # # # # # # # TODO CREATE NEW OBJECT
|
||||||
|
|
||||||
def get_favicon_from_html(html, domain, url):
|
def get_favicon_from_html(html, domain, url):
|
||||||
favicon_urls = extract_favicon_from_html(html, url)
|
favicon_urls, favicons = extract_favicon_from_html(html, url)
|
||||||
# add root favicon
|
# add root favicon
|
||||||
if not favicon_urls:
|
if not favicon_urls:
|
||||||
favicon_urls.add(f'{urlparse(url).scheme}://{domain}/favicon.ico')
|
favicon_urls.add(f'{urlparse(url).scheme}://{domain}/favicon.ico')
|
||||||
|
@ -162,7 +163,6 @@ def extract_favicon_from_html(html, url):
|
||||||
# - <meta name="msapplication-TileColor" content="#aaaaaa"> <meta name="theme-color" content="#ffffff">
|
# - <meta name="msapplication-TileColor" content="#aaaaaa"> <meta name="theme-color" content="#ffffff">
|
||||||
# - <meta name="msapplication-config" content="/icons/browserconfig.xml">
|
# - <meta name="msapplication-config" content="/icons/browserconfig.xml">
|
||||||
|
|
||||||
|
|
||||||
# Root Favicon
|
# Root Favicon
|
||||||
f = get_faup()
|
f = get_faup()
|
||||||
f.decode(url)
|
f.decode(url)
|
||||||
|
@ -244,13 +244,6 @@ def extract_description_from_html(html):
|
||||||
return description['content']
|
return description['content']
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def extract_description_from_html(html):
|
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
|
||||||
description = soup.find('meta', attrs={'name': 'description'})
|
|
||||||
if description:
|
|
||||||
return description['content']
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def extract_keywords_from_html(html):
|
def extract_keywords_from_html(html):
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
keywords = soup.find('meta', attrs={'name': 'keywords'})
|
keywords = soup.find('meta', attrs={'name': 'keywords'})
|
||||||
|
@ -264,6 +257,7 @@ def extract_author_from_html(html):
|
||||||
if keywords:
|
if keywords:
|
||||||
return keywords['content']
|
return keywords['content']
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
# # # - - # # #
|
# # # - - # # #
|
||||||
|
|
||||||
|
|
||||||
|
@ -275,7 +269,7 @@ def extract_author_from_html(html):
|
||||||
|
|
||||||
def create_har_id(date, item_id):
|
def create_har_id(date, item_id):
|
||||||
item_id = item_id.split('/')[-1]
|
item_id = item_id.split('/')[-1]
|
||||||
return os.path.join(date, f'{item_id}.json')
|
return os.path.join(date, f'{item_id}.json.gz')
|
||||||
|
|
||||||
def save_har(har_id, har_content):
|
def save_har(har_id, har_content):
|
||||||
# create dir
|
# create dir
|
||||||
|
@ -284,8 +278,8 @@ def save_har(har_id, har_content):
|
||||||
os.makedirs(har_dir)
|
os.makedirs(har_dir)
|
||||||
# save HAR
|
# save HAR
|
||||||
filename = os.path.join(get_har_dir(), har_id)
|
filename = os.path.join(get_har_dir(), har_id)
|
||||||
with open(filename, 'w') as f:
|
with gzip.open(filename, 'wb') as f:
|
||||||
f.write(json.dumps(har_content))
|
f.write(json.dumps(har_content).encode())
|
||||||
|
|
||||||
def get_all_har_ids():
|
def get_all_har_ids():
|
||||||
har_ids = []
|
har_ids = []
|
||||||
|
@ -299,9 +293,10 @@ def get_all_har_ids():
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
for file in [f for f in os.listdir(today_root_dir) if os.path.isfile(os.path.join(today_root_dir, f))]:
|
if os.path.exists(today_root_dir):
|
||||||
har_id = os.path.relpath(os.path.join(today_root_dir, file), HAR_DIR)
|
for file in [f for f in os.listdir(today_root_dir) if os.path.isfile(os.path.join(today_root_dir, f))]:
|
||||||
har_ids.append(har_id)
|
har_id = os.path.relpath(os.path.join(today_root_dir, file), HAR_DIR)
|
||||||
|
har_ids.append(har_id)
|
||||||
|
|
||||||
for ydir in sorted(dirs_year, reverse=False):
|
for ydir in sorted(dirs_year, reverse=False):
|
||||||
search_dear = os.path.join(HAR_DIR, ydir)
|
search_dear = os.path.join(HAR_DIR, ydir)
|
||||||
|
@ -312,14 +307,17 @@ def get_all_har_ids():
|
||||||
har_ids.append(har_id)
|
har_ids.append(har_id)
|
||||||
return har_ids
|
return har_ids
|
||||||
|
|
||||||
def extract_cookies_names_from_har_by_har_id(har_id):
|
def get_har_content(har_id):
|
||||||
har_path = os.path.join(HAR_DIR, har_id)
|
har_path = os.path.join(HAR_DIR, har_id)
|
||||||
with open(har_path) as f:
|
try:
|
||||||
try:
|
with gzip.open(har_path) as f:
|
||||||
har_content = json.loads(f.read())
|
try:
|
||||||
except json.decoder.JSONDecodeError:
|
return json.loads(f.read())
|
||||||
har_content = {}
|
except json.decoder.JSONDecodeError:
|
||||||
return extract_cookies_names_from_har(har_content)
|
return {}
|
||||||
|
except Exception as e:
|
||||||
|
print(e) # TODO LOGS
|
||||||
|
return {}
|
||||||
|
|
||||||
def extract_cookies_names_from_har(har):
|
def extract_cookies_names_from_har(har):
|
||||||
cookies = set()
|
cookies = set()
|
||||||
|
@ -334,17 +332,110 @@ def extract_cookies_names_from_har(har):
|
||||||
cookies.add(name)
|
cookies.add(name)
|
||||||
return cookies
|
return cookies
|
||||||
|
|
||||||
def _reprocess_all_hars():
|
def _reprocess_all_hars_cookie_name():
|
||||||
from lib.objects import CookiesNames
|
from lib.objects import CookiesNames
|
||||||
for har_id in get_all_har_ids():
|
for har_id in get_all_har_ids():
|
||||||
domain = har_id.split('/')[-1]
|
domain = har_id.split('/')[-1]
|
||||||
domain = domain[:-41]
|
domain = domain[:-44]
|
||||||
date = har_id.split('/')
|
date = har_id.split('/')
|
||||||
date = f'{date[-4]}{date[-3]}{date[-2]}'
|
date = f'{date[-4]}{date[-3]}{date[-2]}'
|
||||||
for cookie_name in extract_cookies_names_from_har_by_har_id(har_id):
|
for cookie_name in extract_cookies_names_from_har(get_har_content(har_id)):
|
||||||
print(domain, date, cookie_name)
|
print(domain, date, cookie_name)
|
||||||
cookie = CookiesNames.create(cookie_name)
|
cookie = CookiesNames.create(cookie_name)
|
||||||
cookie.add(date, domain)
|
cookie.add(date, Domain(domain))
|
||||||
|
|
||||||
|
def extract_etag_from_har(har): # TODO check response url
|
||||||
|
etags = set()
|
||||||
|
for entrie in har.get('log', {}).get('entries', []):
|
||||||
|
for header in entrie.get('response', {}).get('headers', []):
|
||||||
|
if header.get('name') == 'etag':
|
||||||
|
# print(header)
|
||||||
|
etag = header.get('value')
|
||||||
|
if etag:
|
||||||
|
etags.add(etag)
|
||||||
|
return etags
|
||||||
|
|
||||||
|
def _reprocess_all_hars_etag():
|
||||||
|
from lib.objects import Etags
|
||||||
|
for har_id in get_all_har_ids():
|
||||||
|
domain = har_id.split('/')[-1]
|
||||||
|
domain = domain[:-44]
|
||||||
|
date = har_id.split('/')
|
||||||
|
date = f'{date[-4]}{date[-3]}{date[-2]}'
|
||||||
|
for etag_content in extract_etag_from_har(get_har_content(har_id)):
|
||||||
|
print(domain, date, etag_content)
|
||||||
|
etag = Etags.create(etag_content)
|
||||||
|
etag.add(date, Domain(domain))
|
||||||
|
|
||||||
|
def extract_hhhash_by_id(har_id, domain, date):
|
||||||
|
return extract_hhhash(get_har_content(har_id), domain, date)
|
||||||
|
|
||||||
|
def extract_hhhash(har, domain, date):
|
||||||
|
hhhashs = set()
|
||||||
|
urls = set()
|
||||||
|
for entrie in har.get('log', {}).get('entries', []):
|
||||||
|
url = entrie.get('request').get('url')
|
||||||
|
if url not in urls:
|
||||||
|
# filter redirect
|
||||||
|
if entrie.get('response').get('status') == 200: # != 301:
|
||||||
|
# print(url, entrie.get('response').get('status'))
|
||||||
|
|
||||||
|
f = get_faup()
|
||||||
|
f.decode(url)
|
||||||
|
domain_url = f.get().get('domain')
|
||||||
|
if domain_url == domain:
|
||||||
|
|
||||||
|
headers = entrie.get('response').get('headers')
|
||||||
|
|
||||||
|
hhhash_header = HHHashs.build_hhhash_headers(headers)
|
||||||
|
hhhash = HHHashs.hhhash_headers(hhhash_header)
|
||||||
|
|
||||||
|
if hhhash not in hhhashs:
|
||||||
|
print('', url, hhhash)
|
||||||
|
|
||||||
|
# -----
|
||||||
|
obj = HHHashs.create(hhhash_header, hhhash)
|
||||||
|
obj.add(date, Domain(domain))
|
||||||
|
|
||||||
|
hhhashs.add(hhhash)
|
||||||
|
urls.add(url)
|
||||||
|
print()
|
||||||
|
print()
|
||||||
|
print('HHHASH:')
|
||||||
|
for hhhash in hhhashs:
|
||||||
|
print(hhhash)
|
||||||
|
return hhhashs
|
||||||
|
|
||||||
|
def _reprocess_all_hars_hhhashs():
|
||||||
|
for har_id in get_all_har_ids():
|
||||||
|
print()
|
||||||
|
print(har_id)
|
||||||
|
domain = har_id.split('/')[-1]
|
||||||
|
domain = domain[:-44]
|
||||||
|
date = har_id.split('/')
|
||||||
|
date = f'{date[-4]}{date[-3]}{date[-2]}'
|
||||||
|
extract_hhhash_by_id(har_id, domain, date)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _gzip_har(har_id):
|
||||||
|
har_path = os.path.join(HAR_DIR, har_id)
|
||||||
|
new_id = f'{har_path}.gz'
|
||||||
|
if not har_id.endswith('.gz'):
|
||||||
|
if not os.path.exists(new_id):
|
||||||
|
with open(har_path, 'rb') as f:
|
||||||
|
content = f.read()
|
||||||
|
if content:
|
||||||
|
with gzip.open(new_id, 'wb') as f:
|
||||||
|
r = f.write(content)
|
||||||
|
print(r)
|
||||||
|
if os.path.exists(new_id) and os.path.exists(har_path):
|
||||||
|
os.remove(har_path)
|
||||||
|
print('delete:', har_path)
|
||||||
|
|
||||||
|
def _gzip_all_hars():
|
||||||
|
for har_id in get_all_har_ids():
|
||||||
|
_gzip_har(har_id)
|
||||||
|
|
||||||
# # # - - # # #
|
# # # - - # # #
|
||||||
|
|
||||||
|
@ -662,8 +753,7 @@ class Cookie:
|
||||||
meta[field] = value
|
meta[field] = value
|
||||||
if r_json:
|
if r_json:
|
||||||
data = json.dumps(meta, indent=4, sort_keys=True)
|
data = json.dumps(meta, indent=4, sort_keys=True)
|
||||||
meta = {'data': data}
|
meta = {'data': data, 'uuid': self.uuid}
|
||||||
meta['uuid'] = self.uuid
|
|
||||||
return meta
|
return meta
|
||||||
|
|
||||||
def edit(self, cookie_dict):
|
def edit(self, cookie_dict):
|
||||||
|
@ -775,7 +865,7 @@ def unpack_imported_json_cookie(json_cookie):
|
||||||
|
|
||||||
## - - ##
|
## - - ##
|
||||||
#### COOKIEJAR API ####
|
#### COOKIEJAR API ####
|
||||||
def api_import_cookies_from_json(user_id, cookiejar_uuid, json_cookies_str): # # TODO: add catch
|
def api_import_cookies_from_json(user_id, cookiejar_uuid, json_cookies_str): # # TODO: add catch
|
||||||
resp = api_verify_cookiejar_acl(cookiejar_uuid, user_id)
|
resp = api_verify_cookiejar_acl(cookiejar_uuid, user_id)
|
||||||
if resp:
|
if resp:
|
||||||
return resp
|
return resp
|
||||||
|
@ -944,8 +1034,8 @@ class CrawlerScheduler:
|
||||||
minutes = 0
|
minutes = 0
|
||||||
current_time = datetime.now().timestamp()
|
current_time = datetime.now().timestamp()
|
||||||
time_next_run = (datetime.now() + relativedelta(months=int(months), weeks=int(weeks),
|
time_next_run = (datetime.now() + relativedelta(months=int(months), weeks=int(weeks),
|
||||||
days=int(days), hours=int(hours),
|
days=int(days), hours=int(hours),
|
||||||
minutes=int(minutes))).timestamp()
|
minutes=int(minutes))).timestamp()
|
||||||
# Make sure the next capture is not scheduled for in a too short interval
|
# Make sure the next capture is not scheduled for in a too short interval
|
||||||
interval_next_capture = time_next_run - current_time
|
interval_next_capture = time_next_run - current_time
|
||||||
if interval_next_capture < self.min_frequency:
|
if interval_next_capture < self.min_frequency:
|
||||||
|
@ -1225,8 +1315,13 @@ class CrawlerCapture:
|
||||||
if task_uuid:
|
if task_uuid:
|
||||||
return CrawlerTask(task_uuid)
|
return CrawlerTask(task_uuid)
|
||||||
|
|
||||||
def get_start_time(self):
|
def get_start_time(self, r_str=True):
|
||||||
return self.get_task().get_start_time()
|
start_time = self.get_task().get_start_time()
|
||||||
|
if r_str:
|
||||||
|
return start_time
|
||||||
|
else:
|
||||||
|
start_time = datetime.strptime(start_time, "%Y/%m/%d - %H:%M.%S").timestamp()
|
||||||
|
return int(start_time)
|
||||||
|
|
||||||
def get_status(self):
|
def get_status(self):
|
||||||
status = r_cache.hget(f'crawler:capture:{self.uuid}', 'status')
|
status = r_cache.hget(f'crawler:capture:{self.uuid}', 'status')
|
||||||
|
@ -1239,7 +1334,8 @@ class CrawlerCapture:
|
||||||
|
|
||||||
def create(self, task_uuid):
|
def create(self, task_uuid):
|
||||||
if self.exists():
|
if self.exists():
|
||||||
raise Exception(f'Error: Capture {self.uuid} already exists')
|
print(f'Capture {self.uuid} already exists') # TODO LOGS
|
||||||
|
return None
|
||||||
launch_time = int(time.time())
|
launch_time = int(time.time())
|
||||||
r_crawler.hset(f'crawler:task:{task_uuid}', 'capture', self.uuid)
|
r_crawler.hset(f'crawler:task:{task_uuid}', 'capture', self.uuid)
|
||||||
r_crawler.hset('crawler:captures:tasks', self.uuid, task_uuid)
|
r_crawler.hset('crawler:captures:tasks', self.uuid, task_uuid)
|
||||||
|
@ -1492,6 +1588,11 @@ class CrawlerTask:
|
||||||
def start(self):
|
def start(self):
|
||||||
self._set_field('start_time', datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
|
self._set_field('start_time', datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
priority = 49
|
||||||
|
r_crawler.hdel(f'crawler:task:{self.uuid}', 'start_time')
|
||||||
|
self.add_to_db_crawler_queue(priority)
|
||||||
|
|
||||||
# Crawler
|
# Crawler
|
||||||
def remove(self): # zrem cache + DB
|
def remove(self): # zrem cache + DB
|
||||||
capture_uuid = self.get_capture()
|
capture_uuid = self.get_capture()
|
||||||
|
@ -1622,14 +1723,16 @@ def api_add_crawler_task(data, user_id=None):
|
||||||
|
|
||||||
if frequency:
|
if frequency:
|
||||||
# TODO verify user
|
# TODO verify user
|
||||||
return create_schedule(frequency, user_id, url, depth=depth_limit, har=har, screenshot=screenshot, header=None,
|
task_uuid = create_schedule(frequency, user_id, url, depth=depth_limit, har=har, screenshot=screenshot, header=None,
|
||||||
cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags), 200
|
cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags)
|
||||||
else:
|
else:
|
||||||
# TODO HEADERS
|
# TODO HEADERS
|
||||||
# TODO USER AGENT
|
# TODO USER AGENT
|
||||||
return create_task(url, depth=depth_limit, har=har, screenshot=screenshot, header=None,
|
task_uuid = create_task(url, depth=depth_limit, har=har, screenshot=screenshot, header=None,
|
||||||
cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags,
|
cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags,
|
||||||
parent='manual', priority=90), 200
|
parent='manual', priority=90)
|
||||||
|
|
||||||
|
return {'uuid': task_uuid}, 200
|
||||||
|
|
||||||
|
|
||||||
#### ####
|
#### ####
|
||||||
|
@ -1702,13 +1805,13 @@ class CrawlerProxy:
|
||||||
self.uuid = proxy_uuid
|
self.uuid = proxy_uuid
|
||||||
|
|
||||||
def get_description(self):
|
def get_description(self):
|
||||||
return r_crawler.hgrt(f'crawler:proxy:{self.uuif}', 'description')
|
return r_crawler.hget(f'crawler:proxy:{self.uuid}', 'description')
|
||||||
|
|
||||||
# Host
|
# Host
|
||||||
# Port
|
# Port
|
||||||
# Type -> need test
|
# Type -> need test
|
||||||
def get_url(self):
|
def get_url(self):
|
||||||
return r_crawler.hgrt(f'crawler:proxy:{self.uuif}', 'url')
|
return r_crawler.hget(f'crawler:proxy:{self.uuid}', 'url')
|
||||||
|
|
||||||
#### CRAWLER LACUS ####
|
#### CRAWLER LACUS ####
|
||||||
|
|
||||||
|
@ -1770,7 +1873,11 @@ def ping_lacus():
|
||||||
ping = False
|
ping = False
|
||||||
req_error = {'error': 'Lacus URL undefined', 'status_code': 400}
|
req_error = {'error': 'Lacus URL undefined', 'status_code': 400}
|
||||||
else:
|
else:
|
||||||
ping = lacus.is_up
|
try:
|
||||||
|
ping = lacus.is_up
|
||||||
|
except:
|
||||||
|
req_error = {'error': 'Failed to connect Lacus URL', 'status_code': 400}
|
||||||
|
ping = False
|
||||||
update_lacus_connection_status(ping, req_error=req_error)
|
update_lacus_connection_status(ping, req_error=req_error)
|
||||||
return ping
|
return ping
|
||||||
|
|
||||||
|
@ -1787,7 +1894,7 @@ def api_save_lacus_url_key(data):
|
||||||
# unpack json
|
# unpack json
|
||||||
manager_url = data.get('url', None)
|
manager_url = data.get('url', None)
|
||||||
api_key = data.get('api_key', None)
|
api_key = data.get('api_key', None)
|
||||||
if not manager_url: # or not api_key:
|
if not manager_url: # or not api_key:
|
||||||
return {'status': 'error', 'reason': 'No url or API key supplied'}, 400
|
return {'status': 'error', 'reason': 'No url or API key supplied'}, 400
|
||||||
# check if is valid url
|
# check if is valid url
|
||||||
try:
|
try:
|
||||||
|
@ -1830,7 +1937,7 @@ def api_set_crawler_max_captures(data):
|
||||||
save_nb_max_captures(nb_captures)
|
save_nb_max_captures(nb_captures)
|
||||||
return nb_captures, 200
|
return nb_captures, 200
|
||||||
|
|
||||||
## TEST ##
|
## TEST ##
|
||||||
|
|
||||||
def is_test_ail_crawlers_successful():
|
def is_test_ail_crawlers_successful():
|
||||||
return r_db.hget('crawler:tor:test', 'success') == 'True'
|
return r_db.hget('crawler:tor:test', 'success') == 'True'
|
||||||
|
@ -1903,14 +2010,16 @@ def test_ail_crawlers():
|
||||||
# TODO MOVE ME IN CRAWLER OR FLASK
|
# TODO MOVE ME IN CRAWLER OR FLASK
|
||||||
load_blacklist()
|
load_blacklist()
|
||||||
|
|
||||||
# if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# delete_captures()
|
# delete_captures()
|
||||||
|
|
||||||
# item_id = 'crawled/2023/02/20/data.gz'
|
|
||||||
# item = Item(item_id)
|
|
||||||
# content = item.get_content()
|
|
||||||
# temp_url = ''
|
|
||||||
# r = extract_favicon_from_html(content, temp_url)
|
|
||||||
# print(r)
|
|
||||||
# _reprocess_all_hars()
|
|
||||||
|
|
||||||
|
# item_id = 'crawled/2023/02/20/data.gz'
|
||||||
|
# item = Item(item_id)
|
||||||
|
# content = item.get_content()
|
||||||
|
# temp_url = ''
|
||||||
|
# r = extract_favicon_from_html(content, temp_url)
|
||||||
|
# print(r)
|
||||||
|
# _reprocess_all_hars_cookie_name()
|
||||||
|
# _reprocess_all_hars_etag()
|
||||||
|
# _gzip_all_hars()
|
||||||
|
_reprocess_all_hars_hhhashs()
|
||||||
|
|
|
@ -129,7 +129,7 @@ def get_item_url(item_id):
|
||||||
|
|
||||||
def get_item_har(item_id):
|
def get_item_har(item_id):
|
||||||
har = '/'.join(item_id.rsplit('/')[-4:])
|
har = '/'.join(item_id.rsplit('/')[-4:])
|
||||||
har = f'{har}.json'
|
har = f'{har}.json.gz'
|
||||||
path = os.path.join(ConfigLoader.get_hars_dir(), har)
|
path = os.path.join(ConfigLoader.get_hars_dir(), har)
|
||||||
if os.path.isfile(path):
|
if os.path.isfile(path):
|
||||||
return har
|
return har
|
||||||
|
|
|
@ -104,9 +104,13 @@ def _get_word_regex(word):
|
||||||
|
|
||||||
def convert_byte_offset_to_string(b_content, offset):
|
def convert_byte_offset_to_string(b_content, offset):
|
||||||
byte_chunk = b_content[:offset + 1]
|
byte_chunk = b_content[:offset + 1]
|
||||||
string_chunk = byte_chunk.decode()
|
try:
|
||||||
offset = len(string_chunk) - 1
|
string_chunk = byte_chunk.decode()
|
||||||
return offset
|
offset = len(string_chunk) - 1
|
||||||
|
return offset
|
||||||
|
except UnicodeDecodeError as e:
|
||||||
|
logger.error(f'Yara offset converter error, {str(e)}\n{offset}/{len(b_content)}')
|
||||||
|
return convert_byte_offset_to_string(b_content, offset - 1)
|
||||||
|
|
||||||
|
|
||||||
# TODO RETRO HUNTS
|
# TODO RETRO HUNTS
|
||||||
|
|
309
bin/lib/objects/Chats.py
Executable file
309
bin/lib/objects/Chats.py
Executable file
|
@ -0,0 +1,309 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from flask import url_for
|
||||||
|
# from pymisp import MISPObject
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from lib import ail_core
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
|
||||||
|
from lib.data_retention_engine import update_obj_date
|
||||||
|
from lib.objects import ail_objects
|
||||||
|
from lib.timeline_engine import Timeline
|
||||||
|
|
||||||
|
from lib.correlations_engine import get_correlation_by_correl_type
|
||||||
|
|
||||||
|
config_loader = ConfigLoader()
|
||||||
|
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||||
|
r_object = config_loader.get_db_conn("Kvrocks_Objects")
|
||||||
|
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||||
|
config_loader = None
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
################################################################################
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
class Chat(AbstractSubtypeObject): # TODO # ID == username ?????
|
||||||
|
"""
|
||||||
|
AIL Chat Object. (strings)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, id, subtype):
|
||||||
|
super(Chat, self).__init__('chat', id, subtype)
|
||||||
|
|
||||||
|
# def get_ail_2_ail_payload(self):
|
||||||
|
# payload = {'raw': self.get_gzip_content(b64=True),
|
||||||
|
# 'compress': 'gzip'}
|
||||||
|
# return payload
|
||||||
|
|
||||||
|
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
||||||
|
def delete(self):
|
||||||
|
# # TODO:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_link(self, flask_context=False):
|
||||||
|
if flask_context:
|
||||||
|
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
|
||||||
|
else:
|
||||||
|
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
|
||||||
|
return url
|
||||||
|
|
||||||
|
def get_svg_icon(self): # TODO
|
||||||
|
# if self.subtype == 'telegram':
|
||||||
|
# style = 'fab'
|
||||||
|
# icon = '\uf2c6'
|
||||||
|
# elif self.subtype == 'discord':
|
||||||
|
# style = 'fab'
|
||||||
|
# icon = '\uf099'
|
||||||
|
# else:
|
||||||
|
# style = 'fas'
|
||||||
|
# icon = '\uf007'
|
||||||
|
style = 'fas'
|
||||||
|
icon = '\uf086'
|
||||||
|
return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius': 5}
|
||||||
|
|
||||||
|
def get_meta(self, options=set()):
|
||||||
|
meta = self._get_meta(options=options)
|
||||||
|
meta['id'] = self.id
|
||||||
|
meta['subtype'] = self.subtype
|
||||||
|
meta['tags'] = self.get_tags(r_list=True)
|
||||||
|
return meta
|
||||||
|
|
||||||
|
def get_misp_object(self):
|
||||||
|
# obj_attrs = []
|
||||||
|
# if self.subtype == 'telegram':
|
||||||
|
# obj = MISPObject('telegram-account', standalone=True)
|
||||||
|
# obj_attrs.append(obj.add_attribute('username', value=self.id))
|
||||||
|
#
|
||||||
|
# elif self.subtype == 'twitter':
|
||||||
|
# obj = MISPObject('twitter-account', standalone=True)
|
||||||
|
# obj_attrs.append(obj.add_attribute('name', value=self.id))
|
||||||
|
#
|
||||||
|
# else:
|
||||||
|
# obj = MISPObject('user-account', standalone=True)
|
||||||
|
# obj_attrs.append(obj.add_attribute('username', value=self.id))
|
||||||
|
#
|
||||||
|
# first_seen = self.get_first_seen()
|
||||||
|
# last_seen = self.get_last_seen()
|
||||||
|
# if first_seen:
|
||||||
|
# obj.first_seen = first_seen
|
||||||
|
# if last_seen:
|
||||||
|
# obj.last_seen = last_seen
|
||||||
|
# if not first_seen or not last_seen:
|
||||||
|
# self.logger.warning(
|
||||||
|
# f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
|
||||||
|
#
|
||||||
|
# for obj_attr in obj_attrs:
|
||||||
|
# for tag in self.get_tags():
|
||||||
|
# obj_attr.add_tag(tag)
|
||||||
|
# return obj
|
||||||
|
return
|
||||||
|
|
||||||
|
############################################################################
|
||||||
|
############################################################################
|
||||||
|
|
||||||
|
# others optional metas, ... -> # TODO ALL meta in hset
|
||||||
|
|
||||||
|
def get_name(self): # get username ????
|
||||||
|
pass
|
||||||
|
|
||||||
|
# users that send at least a message else participants/spectator
|
||||||
|
# correlation created by messages
|
||||||
|
def get_users(self):
|
||||||
|
users = set()
|
||||||
|
accounts = self.get_correlation('user-account').get('user-account', [])
|
||||||
|
for account in accounts:
|
||||||
|
users.add(account[1:])
|
||||||
|
return users
|
||||||
|
|
||||||
|
def _get_timeline_username(self):
|
||||||
|
return Timeline(self.get_global_id(), 'username')
|
||||||
|
|
||||||
|
def get_username(self):
|
||||||
|
return self._get_timeline_username().get_last_obj_id()
|
||||||
|
|
||||||
|
def get_usernames(self):
|
||||||
|
return self._get_timeline_username().get_objs_ids()
|
||||||
|
|
||||||
|
def update_username_timeline(self, username_global_id, timestamp):
|
||||||
|
self._get_timeline_username().add_timestamp(timestamp, username_global_id)
|
||||||
|
|
||||||
|
|
||||||
|
# def get_last_message_id(self):
|
||||||
|
#
|
||||||
|
# return r_object.hget(f'meta:{self.type}:{self.subtype}:{self.id}', 'last:message:id')
|
||||||
|
|
||||||
|
def get_obj_message_id(self, obj_id):
|
||||||
|
if obj_id.endswith('.gz'):
|
||||||
|
obj_id = obj_id[:-3]
|
||||||
|
return int(obj_id.split('_')[-1])
|
||||||
|
|
||||||
|
def _get_message_timestamp(self, obj_global_id):
|
||||||
|
return r_object.zscore(f'messages:{self.type}:{self.subtype}:{self.id}', obj_global_id)
|
||||||
|
|
||||||
|
def _get_messages(self):
|
||||||
|
return r_object.zrange(f'messages:{self.type}:{self.subtype}:{self.id}', 0, -1, withscores=True)
|
||||||
|
|
||||||
|
def get_message_meta(self, obj_global_id, parent=True, mess_datetime=None):
|
||||||
|
obj = ail_objects.get_obj_from_global_id(obj_global_id)
|
||||||
|
mess_dict = obj.get_meta(options={'content', 'link', 'parent', 'user-account'})
|
||||||
|
if mess_dict.get('parent') and parent:
|
||||||
|
mess_dict['reply_to'] = self.get_message_meta(mess_dict['parent'], parent=False)
|
||||||
|
if mess_dict.get('user-account'):
|
||||||
|
user_account = ail_objects.get_obj_from_global_id(mess_dict['user-account'])
|
||||||
|
mess_dict['user-account'] = {}
|
||||||
|
mess_dict['user-account']['type'] = user_account.get_type()
|
||||||
|
mess_dict['user-account']['subtype'] = user_account.get_subtype(r_str=True)
|
||||||
|
mess_dict['user-account']['id'] = user_account.get_id()
|
||||||
|
username = user_account.get_username()
|
||||||
|
if username:
|
||||||
|
username = ail_objects.get_obj_from_global_id(username).get_default_meta(link=False)
|
||||||
|
mess_dict['user-account']['username'] = username # TODO get username at the given timestamp ???
|
||||||
|
else:
|
||||||
|
mess_dict['user-account']['id'] = 'UNKNOWN'
|
||||||
|
|
||||||
|
if not mess_datetime:
|
||||||
|
obj_mess_id = self._get_message_timestamp(obj_global_id)
|
||||||
|
mess_datetime = datetime.fromtimestamp(obj_mess_id)
|
||||||
|
mess_dict['date'] = mess_datetime.isoformat(' ')
|
||||||
|
mess_dict['hour'] = mess_datetime.strftime('%H:%M:%S')
|
||||||
|
return mess_dict
|
||||||
|
|
||||||
|
|
||||||
|
def get_messages(self, start=0, page=1, nb=500): # TODO limit nb returned, # TODO add replies
|
||||||
|
start = 0
|
||||||
|
stop = -1
|
||||||
|
# r_object.delete(f'messages:{self.type}:{self.subtype}:{self.id}')
|
||||||
|
|
||||||
|
# TODO chat without username ???? -> chat ID ????
|
||||||
|
|
||||||
|
messages = {}
|
||||||
|
curr_date = None
|
||||||
|
for message in self._get_messages():
|
||||||
|
date = datetime.fromtimestamp(message[1])
|
||||||
|
date_day = date.strftime('%Y/%m/%d')
|
||||||
|
if date_day != curr_date:
|
||||||
|
messages[date_day] = []
|
||||||
|
curr_date = date_day
|
||||||
|
mess_dict = self.get_message_meta(message[0], parent=True, mess_datetime=date)
|
||||||
|
messages[date_day].append(mess_dict)
|
||||||
|
return messages
|
||||||
|
|
||||||
|
# Zset with ID ??? id -> item id ??? multiple id == media + text
|
||||||
|
# id -> media id
|
||||||
|
# How do we handle reply/thread ??? -> separate with new chats name/id ZSET ???
|
||||||
|
# Handle media ???
|
||||||
|
|
||||||
|
# list of message id -> obj_id
|
||||||
|
# list of obj_id ->
|
||||||
|
# abuse parent children ???
|
||||||
|
|
||||||
|
# def add(self, timestamp, obj_id, mess_id=0, username=None, user_id=None):
|
||||||
|
# date = # TODO get date from object
|
||||||
|
# self.update_daterange(date)
|
||||||
|
# update_obj_date(date, self.type, self.subtype)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# # daily
|
||||||
|
# r_object.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1)
|
||||||
|
# # all subtypes
|
||||||
|
# r_object.zincrby(f'{self.type}_all:{self.subtype}', 1, self.id)
|
||||||
|
#
|
||||||
|
# #######################################################################
|
||||||
|
# #######################################################################
|
||||||
|
#
|
||||||
|
# # Correlations
|
||||||
|
# self.add_correlation('item', '', item_id)
|
||||||
|
# # domain
|
||||||
|
# if is_crawled(item_id):
|
||||||
|
# domain = get_item_domain(item_id)
|
||||||
|
# self.add_correlation('domain', '', domain)
|
||||||
|
|
||||||
|
# TODO kvrocks exception if key don't exists
|
||||||
|
def get_obj_by_message_id(self, mess_id):
|
||||||
|
return r_object.hget(f'messages:ids:{self.type}:{self.subtype}:{self.id}', mess_id)
|
||||||
|
|
||||||
|
# importer -> use cache for previous reply SET to_add_id: previously_imported : expire SET key -> 30 mn
|
||||||
|
def add_message(self, obj_global_id, timestamp, mess_id, reply_id=None):
|
||||||
|
r_object.hset(f'messages:ids:{self.type}:{self.subtype}:{self.id}', mess_id, obj_global_id)
|
||||||
|
r_object.zadd(f'messages:{self.type}:{self.subtype}:{self.id}', {obj_global_id: timestamp})
|
||||||
|
|
||||||
|
if reply_id:
|
||||||
|
reply_obj = self.get_obj_by_message_id(reply_id)
|
||||||
|
if reply_obj:
|
||||||
|
self.add_obj_children(reply_obj, obj_global_id)
|
||||||
|
else:
|
||||||
|
self.add_message_cached_reply(reply_id, mess_id)
|
||||||
|
|
||||||
|
# ADD cached replies
|
||||||
|
for reply_obj in self.get_cached_message_reply(mess_id):
|
||||||
|
self.add_obj_children(obj_global_id, reply_obj)
|
||||||
|
|
||||||
|
def _get_message_cached_reply(self, message_id):
|
||||||
|
return r_cache.smembers(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{message_id}')
|
||||||
|
|
||||||
|
def get_cached_message_reply(self, message_id):
|
||||||
|
objs_global_id = []
|
||||||
|
for mess_id in self._get_message_cached_reply(message_id):
|
||||||
|
obj_global_id = self.get_obj_by_message_id(mess_id)
|
||||||
|
if obj_global_id:
|
||||||
|
objs_global_id.append(obj_global_id)
|
||||||
|
return objs_global_id
|
||||||
|
|
||||||
|
def add_message_cached_reply(self, reply_to_id, message_id):
|
||||||
|
r_cache.sadd(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{reply_to_id}', message_id)
|
||||||
|
r_cache.expire(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{reply_to_id}', 600)
|
||||||
|
|
||||||
|
# TODO nb replies = nb son ???? what if it create a onion item ??? -> need source filtering
|
||||||
|
|
||||||
|
|
||||||
|
# TODO factorize
|
||||||
|
def get_all_subtypes():
|
||||||
|
return ail_core.get_object_all_subtypes('chat')
|
||||||
|
|
||||||
|
def get_all():
|
||||||
|
objs = {}
|
||||||
|
for subtype in get_all_subtypes():
|
||||||
|
objs[subtype] = get_all_by_subtype(subtype)
|
||||||
|
return objs
|
||||||
|
|
||||||
|
def get_all_by_subtype(subtype):
|
||||||
|
return get_all_id('chat', subtype)
|
||||||
|
|
||||||
|
# # TODO FILTER NAME + Key + mail
|
||||||
|
# def sanitize_username_name_to_search(name_to_search, subtype): # TODO FILTER NAME
|
||||||
|
#
|
||||||
|
# return name_to_search
|
||||||
|
#
|
||||||
|
# def search_usernames_by_name(name_to_search, subtype, r_pos=False):
|
||||||
|
# usernames = {}
|
||||||
|
# # for subtype in subtypes:
|
||||||
|
# r_name = sanitize_username_name_to_search(name_to_search, subtype)
|
||||||
|
# if not name_to_search or isinstance(r_name, dict):
|
||||||
|
# # break
|
||||||
|
# return usernames
|
||||||
|
# r_name = re.compile(r_name)
|
||||||
|
# for user_name in get_all_usernames_by_subtype(subtype):
|
||||||
|
# res = re.search(r_name, user_name)
|
||||||
|
# if res:
|
||||||
|
# usernames[user_name] = {}
|
||||||
|
# if r_pos:
|
||||||
|
# usernames[user_name]['hl-start'] = res.start()
|
||||||
|
# usernames[user_name]['hl-end'] = res.end()
|
||||||
|
# return usernames
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
chat = Chat('test', 'telegram')
|
||||||
|
r = chat.get_messages()
|
||||||
|
print(r)
|
|
@ -138,7 +138,7 @@ class Decoded(AbstractDaterangeObject):
|
||||||
with open(filepath, 'rb') as f:
|
with open(filepath, 'rb') as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
return content
|
return content
|
||||||
elif r_str == 'bytesio':
|
elif r_type == 'bytesio':
|
||||||
with open(filepath, 'rb') as f:
|
with open(filepath, 'rb') as f:
|
||||||
content = BytesIO(f.read())
|
content = BytesIO(f.read())
|
||||||
return content
|
return content
|
||||||
|
@ -149,7 +149,7 @@ class Decoded(AbstractDaterangeObject):
|
||||||
with zipfile.ZipFile(zip_content, "w") as zf:
|
with zipfile.ZipFile(zip_content, "w") as zf:
|
||||||
# TODO: Fix password
|
# TODO: Fix password
|
||||||
# zf.setpassword(b"infected")
|
# zf.setpassword(b"infected")
|
||||||
zf.writestr(self.id, self.get_content().getvalue())
|
zf.writestr(self.id, self.get_content(r_type='bytesio').getvalue())
|
||||||
zip_content.seek(0)
|
zip_content.seek(0)
|
||||||
return zip_content
|
return zip_content
|
||||||
|
|
||||||
|
|
|
@ -389,10 +389,10 @@ class Domain(AbstractObject):
|
||||||
har = get_item_har(item_id)
|
har = get_item_har(item_id)
|
||||||
if har:
|
if har:
|
||||||
print(har)
|
print(har)
|
||||||
_write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json')
|
_write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json.gz')
|
||||||
# Screenshot
|
# Screenshot
|
||||||
screenshot = self._get_external_correlation('item', '', item_id, 'screenshot')
|
screenshot = self._get_external_correlation('item', '', item_id, 'screenshot')
|
||||||
if screenshot:
|
if screenshot and screenshot['screenshot']:
|
||||||
screenshot = screenshot['screenshot'].pop()[1:]
|
screenshot = screenshot['screenshot'].pop()[1:]
|
||||||
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8],
|
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8],
|
||||||
screenshot[8:10], screenshot[10:12], screenshot[12:])
|
screenshot[8:10], screenshot[10:12], screenshot[12:])
|
||||||
|
@ -595,21 +595,22 @@ def get_domains_up_by_filers(domain_types, date_from=None, date_to=None, tags=[]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def sanitize_domain_name_to_search(name_to_search, domain_type):
|
def sanitize_domain_name_to_search(name_to_search, domain_type):
|
||||||
|
if not name_to_search:
|
||||||
|
return ""
|
||||||
if domain_type == 'onion':
|
if domain_type == 'onion':
|
||||||
r_name = r'[a-z0-9\.]+'
|
r_name = r'[a-z0-9\.]+'
|
||||||
else:
|
else:
|
||||||
r_name = r'[a-zA-Z0-9-_\.]+'
|
r_name = r'[a-zA-Z0-9-_\.]+'
|
||||||
# invalid domain name
|
# invalid domain name
|
||||||
if not re.fullmatch(r_name, name_to_search):
|
if not re.fullmatch(r_name, name_to_search):
|
||||||
res = re.match(r_name, name_to_search)
|
return ""
|
||||||
return {'search': name_to_search, 'error': res.string.replace( res[0], '')}
|
|
||||||
return name_to_search.replace('.', '\.')
|
return name_to_search.replace('.', '\.')
|
||||||
|
|
||||||
def search_domain_by_name(name_to_search, domain_types, r_pos=False):
|
def search_domain_by_name(name_to_search, domain_types, r_pos=False):
|
||||||
domains = {}
|
domains = {}
|
||||||
for domain_type in domain_types:
|
for domain_type in domain_types:
|
||||||
r_name = sanitize_domain_name_to_search(name_to_search, domain_type)
|
r_name = sanitize_domain_name_to_search(name_to_search, domain_type)
|
||||||
if not name_to_search or isinstance(r_name, dict):
|
if not r_name:
|
||||||
break
|
break
|
||||||
r_name = re.compile(r_name)
|
r_name = re.compile(r_name)
|
||||||
for domain in get_domains_up_by_type(domain_type):
|
for domain in get_domains_up_by_type(domain_type):
|
||||||
|
|
121
bin/lib/objects/Etags.py
Executable file
121
bin/lib/objects/Etags.py
Executable file
|
@ -0,0 +1,121 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from hashlib import sha256
|
||||||
|
from flask import url_for
|
||||||
|
|
||||||
|
from pymisp import MISPObject
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
|
||||||
|
|
||||||
|
config_loader = ConfigLoader()
|
||||||
|
r_objects = config_loader.get_db_conn("Kvrocks_Objects")
|
||||||
|
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||||
|
config_loader = None
|
||||||
|
|
||||||
|
# TODO NEW ABSTRACT OBJECT -> daterange for all objects ????
|
||||||
|
|
||||||
|
class Etag(AbstractDaterangeObject):
|
||||||
|
"""
|
||||||
|
AIL Etag Object.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, obj_id):
|
||||||
|
super(Etag, self).__init__('etag', obj_id)
|
||||||
|
|
||||||
|
# def get_ail_2_ail_payload(self):
|
||||||
|
# payload = {'raw': self.get_gzip_content(b64=True),
|
||||||
|
# 'compress': 'gzip'}
|
||||||
|
# return payload
|
||||||
|
|
||||||
|
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
||||||
|
def delete(self):
|
||||||
|
# # TODO:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_content(self, r_type='str'):
|
||||||
|
if r_type == 'str':
|
||||||
|
return self._get_field('content')
|
||||||
|
|
||||||
|
def get_link(self, flask_context=False):
|
||||||
|
if flask_context:
|
||||||
|
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
|
||||||
|
else:
|
||||||
|
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
|
||||||
|
return url
|
||||||
|
|
||||||
|
# TODO # CHANGE COLOR
|
||||||
|
def get_svg_icon(self):
|
||||||
|
return {'style': 'fas', 'icon': '\uf02b', 'color': '#556F65', 'radius': 5}
|
||||||
|
|
||||||
|
def get_misp_object(self):
|
||||||
|
obj_attrs = []
|
||||||
|
obj = MISPObject('etag')
|
||||||
|
first_seen = self.get_first_seen()
|
||||||
|
last_seen = self.get_last_seen()
|
||||||
|
if first_seen:
|
||||||
|
obj.first_seen = first_seen
|
||||||
|
if last_seen:
|
||||||
|
obj.last_seen = last_seen
|
||||||
|
if not first_seen or not last_seen:
|
||||||
|
self.logger.warning(
|
||||||
|
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
|
||||||
|
|
||||||
|
obj_attrs.append(obj.add_attribute('etag', value=self.get_content()))
|
||||||
|
for obj_attr in obj_attrs:
|
||||||
|
for tag in self.get_tags():
|
||||||
|
obj_attr.add_tag(tag)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
def get_nb_seen(self):
|
||||||
|
return self.get_nb_correlation('domain')
|
||||||
|
|
||||||
|
def get_meta(self, options=set()):
|
||||||
|
meta = self._get_meta(options=options)
|
||||||
|
meta['id'] = self.id
|
||||||
|
meta['tags'] = self.get_tags(r_list=True)
|
||||||
|
meta['content'] = self.get_content()
|
||||||
|
return meta
|
||||||
|
|
||||||
|
def add(self, date, obj_id): # date = HAR Date
|
||||||
|
self._add(date, 'domain', '', obj_id)
|
||||||
|
|
||||||
|
def create(self, content, _first_seen=None, _last_seen=None):
|
||||||
|
if not isinstance(content, str):
|
||||||
|
content = content.decode()
|
||||||
|
self._set_field('content', content)
|
||||||
|
self._create()
|
||||||
|
|
||||||
|
|
||||||
|
def create(content):
|
||||||
|
if isinstance(content, str):
|
||||||
|
content = content.encode()
|
||||||
|
obj_id = sha256(content).hexdigest()
|
||||||
|
etag = Etag(obj_id)
|
||||||
|
if not etag.exists():
|
||||||
|
etag.create(content)
|
||||||
|
return etag
|
||||||
|
|
||||||
|
|
||||||
|
class Etags(AbstractDaterangeObjects):
|
||||||
|
"""
|
||||||
|
Etags Objects
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__('etag', Etag)
|
||||||
|
|
||||||
|
def sanitize_id_to_search(self, name_to_search):
|
||||||
|
return name_to_search # TODO
|
||||||
|
|
||||||
|
|
||||||
|
# if __name__ == '__main__':
|
||||||
|
# name_to_search = '98'
|
||||||
|
# print(search_cves_by_name(name_to_search))
|
138
bin/lib/objects/HHHashs.py
Executable file
138
bin/lib/objects/HHHashs.py
Executable file
|
@ -0,0 +1,138 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from flask import url_for
|
||||||
|
|
||||||
|
from pymisp import MISPObject
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
|
||||||
|
|
||||||
|
config_loader = ConfigLoader()
|
||||||
|
r_objects = config_loader.get_db_conn("Kvrocks_Objects")
|
||||||
|
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||||
|
config_loader = None
|
||||||
|
|
||||||
|
|
||||||
|
class HHHash(AbstractDaterangeObject):
|
||||||
|
"""
|
||||||
|
AIL HHHash Object.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, obj_id):
|
||||||
|
super(HHHash, self).__init__('hhhash', obj_id)
|
||||||
|
|
||||||
|
# def get_ail_2_ail_payload(self):
|
||||||
|
# payload = {'raw': self.get_gzip_content(b64=True),
|
||||||
|
# 'compress': 'gzip'}
|
||||||
|
# return payload
|
||||||
|
|
||||||
|
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
||||||
|
def delete(self):
|
||||||
|
# # TODO:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_content(self, r_type='str'):
|
||||||
|
if r_type == 'str':
|
||||||
|
return self._get_field('content')
|
||||||
|
|
||||||
|
def get_link(self, flask_context=False):
|
||||||
|
if flask_context:
|
||||||
|
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
|
||||||
|
else:
|
||||||
|
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
|
||||||
|
return url
|
||||||
|
|
||||||
|
# TODO # CHANGE COLOR
|
||||||
|
def get_svg_icon(self):
|
||||||
|
return {'style': 'fas', 'icon': '\uf036', 'color': '#71D090', 'radius': 5}
|
||||||
|
|
||||||
|
def get_misp_object(self):
|
||||||
|
obj_attrs = []
|
||||||
|
obj = MISPObject('hhhash')
|
||||||
|
first_seen = self.get_first_seen()
|
||||||
|
last_seen = self.get_last_seen()
|
||||||
|
if first_seen:
|
||||||
|
obj.first_seen = first_seen
|
||||||
|
if last_seen:
|
||||||
|
obj.last_seen = last_seen
|
||||||
|
if not first_seen or not last_seen:
|
||||||
|
self.logger.warning(
|
||||||
|
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
|
||||||
|
|
||||||
|
obj_attrs.append(obj.add_attribute('hhhash', value=self.get_id()))
|
||||||
|
obj_attrs.append(obj.add_attribute('hhhash-headers', value=self.get_content()))
|
||||||
|
obj_attrs.append(obj.add_attribute('hhhash-tool', value='lacus'))
|
||||||
|
for obj_attr in obj_attrs:
|
||||||
|
for tag in self.get_tags():
|
||||||
|
obj_attr.add_tag(tag)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
def get_nb_seen(self):
|
||||||
|
return self.get_nb_correlation('domain')
|
||||||
|
|
||||||
|
def get_meta(self, options=set()):
|
||||||
|
meta = self._get_meta(options=options)
|
||||||
|
meta['id'] = self.id
|
||||||
|
meta['tags'] = self.get_tags(r_list=True)
|
||||||
|
meta['content'] = self.get_content()
|
||||||
|
return meta
|
||||||
|
|
||||||
|
def add(self, date, obj_id): # date = HAR Date
|
||||||
|
self._add(date, 'domain', '', obj_id)
|
||||||
|
|
||||||
|
def create(self, hhhash_header, _first_seen=None, _last_seen=None): # TODO CREATE ADD FUNCTION -> urls set
|
||||||
|
self._set_field('content', hhhash_header)
|
||||||
|
self._create()
|
||||||
|
|
||||||
|
|
||||||
|
def create(hhhash_header, hhhash=None):
|
||||||
|
if not hhhash:
|
||||||
|
hhhash = hhhash_headers(hhhash_header)
|
||||||
|
hhhash = HHHash(hhhash)
|
||||||
|
if not hhhash.exists():
|
||||||
|
hhhash.create(hhhash_header)
|
||||||
|
return hhhash
|
||||||
|
|
||||||
|
def build_hhhash_headers(dict_headers): # filter_dup=True
|
||||||
|
hhhash = ''
|
||||||
|
previous_header = ''
|
||||||
|
for header in dict_headers:
|
||||||
|
header_name = header.get('name')
|
||||||
|
if header_name:
|
||||||
|
if header_name != previous_header: # remove dup headers, filter playwright invalid splitting
|
||||||
|
hhhash = f'{hhhash}:{header_name}'
|
||||||
|
previous_header = header_name
|
||||||
|
hhhash = hhhash[1:]
|
||||||
|
# print(hhhash)
|
||||||
|
return hhhash
|
||||||
|
|
||||||
|
def hhhash_headers(header_hhhash):
|
||||||
|
m = hashlib.sha256()
|
||||||
|
m.update(header_hhhash.encode())
|
||||||
|
digest = m.hexdigest()
|
||||||
|
return f"hhh:1:{digest}"
|
||||||
|
|
||||||
|
|
||||||
|
class HHHashs(AbstractDaterangeObjects):
|
||||||
|
"""
|
||||||
|
HHHashs Objects
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__('hhhash', HHHash)
|
||||||
|
|
||||||
|
def sanitize_id_to_search(self, name_to_search):
|
||||||
|
return name_to_search # TODO
|
||||||
|
|
||||||
|
|
||||||
|
# if __name__ == '__main__':
|
||||||
|
# name_to_search = '98'
|
||||||
|
# print(search_cves_by_name(name_to_search))
|
|
@ -264,10 +264,9 @@ class Item(AbstractObject):
|
||||||
"""
|
"""
|
||||||
if options is None:
|
if options is None:
|
||||||
options = set()
|
options = set()
|
||||||
meta = {'id': self.id,
|
meta = self.get_default_meta(tags=True)
|
||||||
'date': self.get_date(separator=True),
|
meta['date'] = self.get_date(separator=True)
|
||||||
'source': self.get_source(),
|
meta['source'] = self.get_source()
|
||||||
'tags': self.get_tags(r_list=True)}
|
|
||||||
# optional meta fields
|
# optional meta fields
|
||||||
if 'content' in options:
|
if 'content' in options:
|
||||||
meta['content'] = self.get_content()
|
meta['content'] = self.get_content()
|
||||||
|
@ -289,6 +288,8 @@ class Item(AbstractObject):
|
||||||
meta['mimetype'] = self.get_mimetype(content=content)
|
meta['mimetype'] = self.get_mimetype(content=content)
|
||||||
if 'investigations' in options:
|
if 'investigations' in options:
|
||||||
meta['investigations'] = self.get_investigations()
|
meta['investigations'] = self.get_investigations()
|
||||||
|
if 'link' in options:
|
||||||
|
meta['link'] = self.get_link(flask_context=True)
|
||||||
|
|
||||||
# meta['encoding'] = None
|
# meta['encoding'] = None
|
||||||
return meta
|
return meta
|
||||||
|
|
275
bin/lib/objects/Messages.py
Executable file
275
bin/lib/objects/Messages.py
Executable file
|
@ -0,0 +1,275 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import cld3
|
||||||
|
import html2text
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from pymisp import MISPObject
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from lib.ail_core import get_ail_uuid
|
||||||
|
from lib.objects.abstract_object import AbstractObject
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
from lib.data_retention_engine import update_obj_date, get_obj_date_first
|
||||||
|
# TODO Set all messages ???
|
||||||
|
|
||||||
|
|
||||||
|
from flask import url_for
|
||||||
|
|
||||||
|
config_loader = ConfigLoader()
|
||||||
|
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||||
|
r_object = config_loader.get_db_conn("Kvrocks_Objects")
|
||||||
|
# r_content = config_loader.get_db_conn("Kvrocks_Content")
|
||||||
|
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||||
|
config_loader = None
|
||||||
|
|
||||||
|
|
||||||
|
# TODO SAVE OR EXTRACT MESSAGE SOURCE FOR ICON ?????????
|
||||||
|
# TODO iterate on all objects
|
||||||
|
# TODO also add support for small objects ????
|
||||||
|
|
||||||
|
# CAN Message exists without CHAT -> no convert it to object
|
||||||
|
|
||||||
|
# ID: source:chat_id:message_id ????
|
||||||
|
#
|
||||||
|
# /!\ handle null chat and message id -> chat = uuid and message = timestamp ???
|
||||||
|
|
||||||
|
|
||||||
|
class Message(AbstractObject):
|
||||||
|
"""
|
||||||
|
AIL Message Object. (strings)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, id): # TODO subtype or use source ????
|
||||||
|
super(Message, self).__init__('message', id) # message::< telegram/1692189934.380827/ChatID_MessageID >
|
||||||
|
|
||||||
|
def exists(self):
|
||||||
|
if self.subtype is None:
|
||||||
|
return r_object.exists(f'meta:{self.type}:{self.id}')
|
||||||
|
else:
|
||||||
|
return r_object.exists(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}')
|
||||||
|
|
||||||
|
def get_source(self):
|
||||||
|
"""
|
||||||
|
Returns source/feeder name
|
||||||
|
"""
|
||||||
|
l_source = self.id.split('/')[:-2]
|
||||||
|
return os.path.join(*l_source)
|
||||||
|
|
||||||
|
def get_basename(self):
|
||||||
|
return os.path.basename(self.id)
|
||||||
|
|
||||||
|
def get_content(self, r_type='str'): # TODO ADD cache # TODO Compress content ???????
|
||||||
|
"""
|
||||||
|
Returns content
|
||||||
|
"""
|
||||||
|
content = self._get_field('content')
|
||||||
|
if r_type == 'str':
|
||||||
|
return content
|
||||||
|
elif r_type == 'bytes':
|
||||||
|
return content.encode()
|
||||||
|
|
||||||
|
def get_date(self):
|
||||||
|
timestamp = self.get_timestamp()
|
||||||
|
return datetime.fromtimestamp(float(timestamp)).strftime('%Y%m%d')
|
||||||
|
|
||||||
|
def get_timestamp(self):
|
||||||
|
dirs = self.id.split('/')
|
||||||
|
return dirs[-2]
|
||||||
|
|
||||||
|
def get_message_id(self): # TODO optimize
|
||||||
|
message_id = self.get_basename().rsplit('_', 1)[1]
|
||||||
|
# if message_id.endswith('.gz'):
|
||||||
|
# message_id = message_id[:-3]
|
||||||
|
return message_id
|
||||||
|
|
||||||
|
def get_chat_id(self): # TODO optimize -> use me to tag Chat
|
||||||
|
chat_id = self.get_basename().rsplit('_', 1)[0]
|
||||||
|
# if chat_id.endswith('.gz'):
|
||||||
|
# chat_id = chat_id[:-3]
|
||||||
|
return chat_id
|
||||||
|
|
||||||
|
def get_user_account(self):
|
||||||
|
user_account = self.get_correlation('user-account')
|
||||||
|
if user_account.get('user-account'):
|
||||||
|
return f'user-account:{user_account["user-account"].pop()}'
|
||||||
|
|
||||||
|
# Update value on import
|
||||||
|
# reply to -> parent ?
|
||||||
|
# reply/comment - > children ?
|
||||||
|
# nb views
|
||||||
|
# reactions
|
||||||
|
# nb fowards
|
||||||
|
# room ???
|
||||||
|
# message from channel ???
|
||||||
|
# message media
|
||||||
|
|
||||||
|
def get_translation(self): # TODO support multiple translated languages ?????
|
||||||
|
"""
|
||||||
|
Returns translated content
|
||||||
|
"""
|
||||||
|
return self._get_field('translated') # TODO multiples translation ... -> use set
|
||||||
|
|
||||||
|
def _set_translation(self, translation):
|
||||||
|
"""
|
||||||
|
Set translated content
|
||||||
|
"""
|
||||||
|
return self._set_field('translated', translation) # translation by hash ??? -> avoid translating multiple time
|
||||||
|
|
||||||
|
def get_html2text_content(self, content=None, ignore_links=False):
|
||||||
|
if not content:
|
||||||
|
content = self.get_content()
|
||||||
|
h = html2text.HTML2Text()
|
||||||
|
h.ignore_links = ignore_links
|
||||||
|
h.ignore_images = ignore_links
|
||||||
|
return h.handle(content)
|
||||||
|
|
||||||
|
# def get_ail_2_ail_payload(self):
|
||||||
|
# payload = {'raw': self.get_gzip_content(b64=True)}
|
||||||
|
# return payload
|
||||||
|
|
||||||
|
def get_link(self, flask_context=False):
|
||||||
|
if flask_context:
|
||||||
|
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
|
||||||
|
else:
|
||||||
|
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
|
||||||
|
return url
|
||||||
|
|
||||||
|
def get_svg_icon(self):
|
||||||
|
return {'style': 'fas', 'icon': '\uf4ad', 'color': '#4dffff', 'radius': 5}
|
||||||
|
|
||||||
|
def get_misp_object(self): # TODO
|
||||||
|
obj = MISPObject('instant-message', standalone=True)
|
||||||
|
obj_date = self.get_date()
|
||||||
|
if obj_date:
|
||||||
|
obj.first_seen = obj_date
|
||||||
|
else:
|
||||||
|
self.logger.warning(
|
||||||
|
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={obj_date}')
|
||||||
|
|
||||||
|
# obj_attrs = [obj.add_attribute('first-seen', value=obj_date),
|
||||||
|
# obj.add_attribute('raw-data', value=self.id, data=self.get_raw_content()),
|
||||||
|
# obj.add_attribute('sensor', value=get_ail_uuid())]
|
||||||
|
obj_attrs = []
|
||||||
|
for obj_attr in obj_attrs:
|
||||||
|
for tag in self.get_tags():
|
||||||
|
obj_attr.add_tag(tag)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
# def get_url(self):
|
||||||
|
# return r_object.hget(f'meta:item::{self.id}', 'url')
|
||||||
|
|
||||||
|
# options: set of optional meta fields
|
||||||
|
def get_meta(self, options=None):
|
||||||
|
"""
|
||||||
|
:type options: set
|
||||||
|
"""
|
||||||
|
if options is None:
|
||||||
|
options = set()
|
||||||
|
meta = self.get_default_meta(tags=True)
|
||||||
|
meta['date'] = self.get_date() # TODO replace me by timestamp ??????
|
||||||
|
meta['source'] = self.get_source()
|
||||||
|
# optional meta fields
|
||||||
|
if 'content' in options:
|
||||||
|
meta['content'] = self.get_content()
|
||||||
|
if 'parent' in options:
|
||||||
|
meta['parent'] = self.get_parent()
|
||||||
|
if 'investigations' in options:
|
||||||
|
meta['investigations'] = self.get_investigations()
|
||||||
|
if 'link' in options:
|
||||||
|
meta['link'] = self.get_link(flask_context=True)
|
||||||
|
if 'user-account' in options:
|
||||||
|
meta['user-account'] = self.get_user_account()
|
||||||
|
|
||||||
|
# meta['encoding'] = None
|
||||||
|
return meta
|
||||||
|
|
||||||
|
def _languages_cleaner(self, content=None):
|
||||||
|
if not content:
|
||||||
|
content = self.get_content()
|
||||||
|
# REMOVE URLS
|
||||||
|
regex = r'\b(?:http://|https://)?(?:[a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*\b'
|
||||||
|
url_regex = re.compile(regex)
|
||||||
|
urls = url_regex.findall(content)
|
||||||
|
urls = sorted(urls, key=len, reverse=True)
|
||||||
|
for url in urls:
|
||||||
|
content = content.replace(url, '')
|
||||||
|
# REMOVE PGP Blocks
|
||||||
|
regex_pgp_public_blocs = r'-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----'
|
||||||
|
regex_pgp_signature = r'-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----'
|
||||||
|
regex_pgp_message = r'-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----'
|
||||||
|
re.compile(regex_pgp_public_blocs)
|
||||||
|
re.compile(regex_pgp_signature)
|
||||||
|
re.compile(regex_pgp_message)
|
||||||
|
res = re.findall(regex_pgp_public_blocs, content)
|
||||||
|
for it in res:
|
||||||
|
content = content.replace(it, '')
|
||||||
|
res = re.findall(regex_pgp_signature, content)
|
||||||
|
for it in res:
|
||||||
|
content = content.replace(it, '')
|
||||||
|
res = re.findall(regex_pgp_message, content)
|
||||||
|
for it in res:
|
||||||
|
content = content.replace(it, '')
|
||||||
|
return content
|
||||||
|
|
||||||
|
def detect_languages(self, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7):
|
||||||
|
languages = []
|
||||||
|
## CLEAN CONTENT ##
|
||||||
|
content = self.get_html2text_content(ignore_links=True)
|
||||||
|
content = self._languages_cleaner(content=content)
|
||||||
|
# REMOVE USELESS SPACE
|
||||||
|
content = ' '.join(content.split())
|
||||||
|
# - CLEAN CONTENT - #
|
||||||
|
if len(content) >= min_len:
|
||||||
|
for lang in cld3.get_frequent_languages(content, num_langs=num_langs):
|
||||||
|
if lang.proportion >= min_proportion and lang.probability >= min_probability and lang.is_reliable:
|
||||||
|
languages.append(lang)
|
||||||
|
return languages
|
||||||
|
|
||||||
|
# def translate(self, content=None): # TODO translation plugin
|
||||||
|
# # TODO get text language
|
||||||
|
# if not content:
|
||||||
|
# content = self.get_content()
|
||||||
|
# translated = argostranslate.translate.translate(content, 'ru', 'en')
|
||||||
|
# # Save translation
|
||||||
|
# self._set_translation(translated)
|
||||||
|
# return translated
|
||||||
|
|
||||||
|
def create(self, content, translation, tags):
|
||||||
|
self._set_field('content', content)
|
||||||
|
# r_content.get(f'content:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', content)
|
||||||
|
if translation:
|
||||||
|
self._set_translation(translation)
|
||||||
|
for tag in tags:
|
||||||
|
self.add_tag(tag)
|
||||||
|
|
||||||
|
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
||||||
|
def delete(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def create_obj_id(source, chat_id, message_id, timestamp):
|
||||||
|
return f'{source}/{timestamp}/{chat_id}_{message_id}'
|
||||||
|
|
||||||
|
# TODO Check if already exists
|
||||||
|
# def create(source, chat_id, message_id, timestamp, content, tags=[]):
|
||||||
|
def create(obj_id, content, translation=None, tags=[]):
|
||||||
|
message = Message(obj_id)
|
||||||
|
if not message.exists():
|
||||||
|
message.create(content, translation, tags)
|
||||||
|
return message
|
||||||
|
|
||||||
|
|
||||||
|
# TODO Encode translation
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
r = 'test'
|
||||||
|
print(r)
|
|
@ -88,7 +88,7 @@ class Screenshot(AbstractObject):
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
def get_meta(self, options=set()):
|
def get_meta(self, options=set()):
|
||||||
meta = {'id': self.id}
|
meta = self.get_default_meta()
|
||||||
meta['img'] = get_screenshot_rel_path(self.id) ######### # TODO: Rename ME ??????
|
meta['img'] = get_screenshot_rel_path(self.id) ######### # TODO: Rename ME ??????
|
||||||
meta['tags'] = self.get_tags(r_list=True)
|
meta['tags'] = self.get_tags(r_list=True)
|
||||||
if 'tags_safe' in options:
|
if 'tags_safe' in options:
|
||||||
|
|
155
bin/lib/objects/UsersAccount.py
Executable file
155
bin/lib/objects/UsersAccount.py
Executable file
|
@ -0,0 +1,155 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
# import re
|
||||||
|
|
||||||
|
from flask import url_for
|
||||||
|
from pymisp import MISPObject
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from lib import ail_core
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
|
||||||
|
from lib.timeline_engine import Timeline
|
||||||
|
|
||||||
|
config_loader = ConfigLoader()
|
||||||
|
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||||
|
config_loader = None
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
################################################################################
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
class UserAccount(AbstractSubtypeObject):
|
||||||
|
"""
|
||||||
|
AIL User Object. (strings)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, id, subtype):
|
||||||
|
super(UserAccount, self).__init__('user-account', id, subtype)
|
||||||
|
|
||||||
|
# def get_ail_2_ail_payload(self):
|
||||||
|
# payload = {'raw': self.get_gzip_content(b64=True),
|
||||||
|
# 'compress': 'gzip'}
|
||||||
|
# return payload
|
||||||
|
|
||||||
|
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
||||||
|
def delete(self):
|
||||||
|
# # TODO:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_link(self, flask_context=False):
|
||||||
|
if flask_context:
|
||||||
|
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
|
||||||
|
else:
|
||||||
|
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
|
||||||
|
return url
|
||||||
|
|
||||||
|
def get_svg_icon(self): # TODO change icon/color
|
||||||
|
if self.subtype == 'telegram':
|
||||||
|
style = 'fab'
|
||||||
|
icon = '\uf2c6'
|
||||||
|
elif self.subtype == 'twitter':
|
||||||
|
style = 'fab'
|
||||||
|
icon = '\uf099'
|
||||||
|
else:
|
||||||
|
style = 'fas'
|
||||||
|
icon = '\uf007'
|
||||||
|
return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius': 5}
|
||||||
|
|
||||||
|
def get_first_name(self):
|
||||||
|
return self._get_field('firstname')
|
||||||
|
|
||||||
|
def get_last_name(self):
|
||||||
|
return self._get_field('lastname')
|
||||||
|
|
||||||
|
def get_phone(self):
|
||||||
|
return self._get_field('phone')
|
||||||
|
|
||||||
|
def set_first_name(self, firstname):
|
||||||
|
return self._set_field('firstname', firstname)
|
||||||
|
|
||||||
|
def set_last_name(self, lastname):
|
||||||
|
return self._set_field('lastname', lastname)
|
||||||
|
|
||||||
|
def set_phone(self, phone):
|
||||||
|
return self._set_field('phone', phone)
|
||||||
|
|
||||||
|
def _get_timeline_username(self):
|
||||||
|
return Timeline(self.get_global_id(), 'username')
|
||||||
|
|
||||||
|
def get_username(self):
|
||||||
|
return self._get_timeline_username().get_last_obj_id()
|
||||||
|
|
||||||
|
def get_usernames(self):
|
||||||
|
return self._get_timeline_username().get_objs_ids()
|
||||||
|
|
||||||
|
def update_username_timeline(self, username_global_id, timestamp):
|
||||||
|
self._get_timeline_username().add_timestamp(timestamp, username_global_id)
|
||||||
|
|
||||||
|
def get_meta(self, options=set()):
|
||||||
|
meta = self._get_meta(options=options)
|
||||||
|
meta['id'] = self.id
|
||||||
|
meta['subtype'] = self.subtype
|
||||||
|
meta['tags'] = self.get_tags(r_list=True)
|
||||||
|
if 'username' in options:
|
||||||
|
meta['username'] = self.get_username()
|
||||||
|
if 'usernames' in options:
|
||||||
|
meta['usernames'] = self.get_usernames()
|
||||||
|
return meta
|
||||||
|
|
||||||
|
def get_misp_object(self):
|
||||||
|
obj_attrs = []
|
||||||
|
if self.subtype == 'telegram':
|
||||||
|
obj = MISPObject('telegram-account', standalone=True)
|
||||||
|
obj_attrs.append(obj.add_attribute('username', value=self.id))
|
||||||
|
|
||||||
|
elif self.subtype == 'twitter':
|
||||||
|
obj = MISPObject('twitter-account', standalone=True)
|
||||||
|
obj_attrs.append(obj.add_attribute('name', value=self.id))
|
||||||
|
|
||||||
|
else:
|
||||||
|
obj = MISPObject('user-account', standalone=True)
|
||||||
|
obj_attrs.append(obj.add_attribute('username', value=self.id))
|
||||||
|
|
||||||
|
first_seen = self.get_first_seen()
|
||||||
|
last_seen = self.get_last_seen()
|
||||||
|
if first_seen:
|
||||||
|
obj.first_seen = first_seen
|
||||||
|
if last_seen:
|
||||||
|
obj.last_seen = last_seen
|
||||||
|
if not first_seen or not last_seen:
|
||||||
|
self.logger.warning(
|
||||||
|
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
|
||||||
|
|
||||||
|
for obj_attr in obj_attrs:
|
||||||
|
for tag in self.get_tags():
|
||||||
|
obj_attr.add_tag(tag)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
def get_user_by_username():
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_all_subtypes():
|
||||||
|
return ail_core.get_object_all_subtypes('user-account')
|
||||||
|
|
||||||
|
def get_all():
|
||||||
|
users = {}
|
||||||
|
for subtype in get_all_subtypes():
|
||||||
|
users[subtype] = get_all_by_subtype(subtype)
|
||||||
|
return users
|
||||||
|
|
||||||
|
def get_all_by_subtype(subtype):
|
||||||
|
return get_all_id('user-account', subtype)
|
||||||
|
|
||||||
|
|
||||||
|
# if __name__ == '__main__':
|
||||||
|
# name_to_search = 'co'
|
||||||
|
# subtype = 'telegram'
|
||||||
|
# print(search_usernames_by_name(name_to_search, subtype))
|
|
@ -45,10 +45,10 @@ class AbstractDaterangeObject(AbstractObject, ABC):
|
||||||
def exists(self):
|
def exists(self):
|
||||||
return r_object.exists(f'meta:{self.type}:{self.id}')
|
return r_object.exists(f'meta:{self.type}:{self.id}')
|
||||||
|
|
||||||
def _get_field(self, field):
|
def _get_field(self, field): # TODO remove me (NEW in abstract)
|
||||||
return r_object.hget(f'meta:{self.type}:{self.id}', field)
|
return r_object.hget(f'meta:{self.type}:{self.id}', field)
|
||||||
|
|
||||||
def _set_field(self, field, value):
|
def _set_field(self, field, value): # TODO remove me (NEW in abstract)
|
||||||
return r_object.hset(f'meta:{self.type}:{self.id}', field, value)
|
return r_object.hset(f'meta:{self.type}:{self.id}', field, value)
|
||||||
|
|
||||||
def get_first_seen(self, r_int=False):
|
def get_first_seen(self, r_int=False):
|
||||||
|
@ -82,9 +82,10 @@ class AbstractDaterangeObject(AbstractObject, ABC):
|
||||||
return int(nb)
|
return int(nb)
|
||||||
|
|
||||||
def _get_meta(self, options=[]):
|
def _get_meta(self, options=[]):
|
||||||
meta_dict = {'first_seen': self.get_first_seen(),
|
meta_dict = self.get_default_meta()
|
||||||
'last_seen': self.get_last_seen(),
|
meta_dict['first_seen'] = self.get_first_seen()
|
||||||
'nb_seen': self.get_nb_seen()}
|
meta_dict['last_seen'] = self.get_last_seen()
|
||||||
|
meta_dict['nb_seen'] = self.get_nb_seen()
|
||||||
if 'sparkline' in options:
|
if 'sparkline' in options:
|
||||||
meta_dict['sparkline'] = self.get_sparkline()
|
meta_dict['sparkline'] = self.get_sparkline()
|
||||||
return meta_dict
|
return meta_dict
|
||||||
|
|
|
@ -20,6 +20,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
##################################
|
##################################
|
||||||
from lib import ail_logger
|
from lib import ail_logger
|
||||||
from lib import Tag
|
from lib import Tag
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
from lib import Duplicate
|
from lib import Duplicate
|
||||||
from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type
|
from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type
|
||||||
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
|
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
|
||||||
|
@ -27,6 +28,11 @@ from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers
|
||||||
|
|
||||||
logging.config.dictConfig(ail_logger.get_config(name='ail'))
|
logging.config.dictConfig(ail_logger.get_config(name='ail'))
|
||||||
|
|
||||||
|
config_loader = ConfigLoader()
|
||||||
|
# r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||||
|
r_object = config_loader.get_db_conn("Kvrocks_Objects")
|
||||||
|
config_loader = None
|
||||||
|
|
||||||
class AbstractObject(ABC):
|
class AbstractObject(ABC):
|
||||||
"""
|
"""
|
||||||
Abstract Object
|
Abstract Object
|
||||||
|
@ -59,14 +65,28 @@ class AbstractObject(ABC):
|
||||||
def get_global_id(self):
|
def get_global_id(self):
|
||||||
return f'{self.get_type()}:{self.get_subtype(r_str=True)}:{self.get_id()}'
|
return f'{self.get_type()}:{self.get_subtype(r_str=True)}:{self.get_id()}'
|
||||||
|
|
||||||
def get_default_meta(self, tags=False):
|
def get_default_meta(self, tags=False, link=False):
|
||||||
dict_meta = {'id': self.get_id(),
|
dict_meta = {'id': self.get_id(),
|
||||||
'type': self.get_type(),
|
'type': self.get_type(),
|
||||||
'subtype': self.get_subtype()}
|
'subtype': self.get_subtype(r_str=True)}
|
||||||
if tags:
|
if tags:
|
||||||
dict_meta['tags'] = self.get_tags()
|
dict_meta['tags'] = self.get_tags()
|
||||||
|
if link:
|
||||||
|
dict_meta['link'] = self.get_link()
|
||||||
return dict_meta
|
return dict_meta
|
||||||
|
|
||||||
|
def _get_field(self, field):
|
||||||
|
if self.subtype is None:
|
||||||
|
return r_object.hget(f'meta:{self.type}:{self.id}', field)
|
||||||
|
else:
|
||||||
|
return r_object.hget(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', field)
|
||||||
|
|
||||||
|
def _set_field(self, field, value):
|
||||||
|
if self.subtype is None:
|
||||||
|
return r_object.hset(f'meta:{self.type}:{self.id}', field, value)
|
||||||
|
else:
|
||||||
|
return r_object.hset(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', field, value)
|
||||||
|
|
||||||
## Tags ##
|
## Tags ##
|
||||||
def get_tags(self, r_list=False):
|
def get_tags(self, r_list=False):
|
||||||
tags = Tag.get_object_tags(self.type, self.id, self.get_subtype(r_str=True))
|
tags = Tag.get_object_tags(self.type, self.id, self.get_subtype(r_str=True))
|
||||||
|
@ -198,6 +218,8 @@ class AbstractObject(ABC):
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
## Correlation ##
|
||||||
|
|
||||||
def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type):
|
def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type):
|
||||||
"""
|
"""
|
||||||
Get object correlation
|
Get object correlation
|
||||||
|
@ -253,3 +275,39 @@ class AbstractObject(ABC):
|
||||||
Get object correlations
|
Get object correlations
|
||||||
"""
|
"""
|
||||||
delete_obj_correlation(self.type, self.subtype, self.id, type2, subtype2, id2)
|
delete_obj_correlation(self.type, self.subtype, self.id, type2, subtype2, id2)
|
||||||
|
|
||||||
|
## -Correlation- ##
|
||||||
|
|
||||||
|
## Parent ##
|
||||||
|
|
||||||
|
def is_parent(self):
|
||||||
|
return r_object.exists(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}')
|
||||||
|
|
||||||
|
def is_children(self):
|
||||||
|
return r_object.hexists(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent')
|
||||||
|
|
||||||
|
def get_parent(self):
|
||||||
|
return r_object.hget(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent')
|
||||||
|
|
||||||
|
def get_children(self):
|
||||||
|
return r_object.smembers(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}')
|
||||||
|
|
||||||
|
def set_parent(self, obj_type=None, obj_subtype=None, obj_id=None, obj_global_id=None): # TODO ######################
|
||||||
|
if not obj_global_id:
|
||||||
|
if obj_subtype is None:
|
||||||
|
obj_subtype = ''
|
||||||
|
obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
|
||||||
|
r_object.hset(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent', obj_global_id)
|
||||||
|
|
||||||
|
def add_children(self, obj_type=None, obj_subtype=None, obj_id=None, obj_global_id=None): # TODO ######################
|
||||||
|
if not obj_global_id:
|
||||||
|
if obj_subtype is None:
|
||||||
|
obj_subtype = ''
|
||||||
|
obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
|
||||||
|
r_object.sadd(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', obj_global_id)
|
||||||
|
|
||||||
|
def add_obj_children(self, parent_global_id, son_global_id):
|
||||||
|
r_object.sadd(f'child:{parent_global_id}', son_global_id)
|
||||||
|
r_object.hset(f'meta:{son_global_id}', 'parent', parent_global_id)
|
||||||
|
|
||||||
|
## Parent ##
|
||||||
|
|
|
@ -151,7 +151,7 @@ class AbstractSubtypeObject(AbstractObject, ABC):
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
|
|
||||||
def add(self, date, item_id):
|
def add(self, date, obj=None):
|
||||||
self.update_daterange(date)
|
self.update_daterange(date)
|
||||||
update_obj_date(date, self.type, self.subtype)
|
update_obj_date(date, self.type, self.subtype)
|
||||||
# daily
|
# daily
|
||||||
|
@ -162,20 +162,22 @@ class AbstractSubtypeObject(AbstractObject, ABC):
|
||||||
#######################################################################
|
#######################################################################
|
||||||
#######################################################################
|
#######################################################################
|
||||||
|
|
||||||
# Correlations
|
if obj:
|
||||||
self.add_correlation('item', '', item_id)
|
# Correlations
|
||||||
# domain
|
self.add_correlation(obj.type, obj.get_subtype(r_str=True), obj.get_id())
|
||||||
if is_crawled(item_id):
|
|
||||||
domain = get_item_domain(item_id)
|
|
||||||
self.add_correlation('domain', '', domain)
|
|
||||||
|
|
||||||
|
if obj.type == 'item': # TODO same for message->chat ???
|
||||||
|
item_id = obj.get_id()
|
||||||
|
# domain
|
||||||
|
if is_crawled(item_id):
|
||||||
|
domain = get_item_domain(item_id)
|
||||||
|
self.add_correlation('domain', '', domain)
|
||||||
|
|
||||||
# TODO:ADD objects + Stats
|
# TODO:ADD objects + Stats
|
||||||
def create(self, first_seen, last_seen):
|
def create(self, first_seen, last_seen):
|
||||||
self.set_first_seen(first_seen)
|
self.set_first_seen(first_seen)
|
||||||
self.set_last_seen(last_seen)
|
self.set_last_seen(last_seen)
|
||||||
|
|
||||||
|
|
||||||
def _delete(self):
|
def _delete(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -13,16 +13,21 @@ from lib import correlations_engine
|
||||||
from lib import btc_ail
|
from lib import btc_ail
|
||||||
from lib import Tag
|
from lib import Tag
|
||||||
|
|
||||||
|
from lib.objects import Chats
|
||||||
from lib.objects import CryptoCurrencies
|
from lib.objects import CryptoCurrencies
|
||||||
from lib.objects import CookiesNames
|
from lib.objects import CookiesNames
|
||||||
from lib.objects.Cves import Cve
|
from lib.objects.Cves import Cve
|
||||||
from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decodeds_objects
|
from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decodeds_objects
|
||||||
from lib.objects.Domains import Domain
|
from lib.objects.Domains import Domain
|
||||||
|
from lib.objects import Etags
|
||||||
from lib.objects.Favicons import Favicon
|
from lib.objects.Favicons import Favicon
|
||||||
|
from lib.objects import HHHashs
|
||||||
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
|
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
|
||||||
|
from lib.objects.Messages import Message
|
||||||
from lib.objects import Pgps
|
from lib.objects import Pgps
|
||||||
from lib.objects.Screenshots import Screenshot
|
from lib.objects.Screenshots import Screenshot
|
||||||
from lib.objects import Titles
|
from lib.objects import Titles
|
||||||
|
from lib.objects.UsersAccount import UserAccount
|
||||||
from lib.objects import Usernames
|
from lib.objects import Usernames
|
||||||
|
|
||||||
config_loader = ConfigLoader()
|
config_loader = ConfigLoader()
|
||||||
|
@ -53,12 +58,20 @@ def get_object(obj_type, subtype, obj_id):
|
||||||
return Domain(obj_id)
|
return Domain(obj_id)
|
||||||
elif obj_type == 'decoded':
|
elif obj_type == 'decoded':
|
||||||
return Decoded(obj_id)
|
return Decoded(obj_id)
|
||||||
|
elif obj_type == 'chat':
|
||||||
|
return Chats.Chat(obj_id, subtype)
|
||||||
elif obj_type == 'cookie-name':
|
elif obj_type == 'cookie-name':
|
||||||
return CookiesNames.CookieName(obj_id)
|
return CookiesNames.CookieName(obj_id)
|
||||||
elif obj_type == 'cve':
|
elif obj_type == 'cve':
|
||||||
return Cve(obj_id)
|
return Cve(obj_id)
|
||||||
|
elif obj_type == 'etag':
|
||||||
|
return Etags.Etag(obj_id)
|
||||||
elif obj_type == 'favicon':
|
elif obj_type == 'favicon':
|
||||||
return Favicon(obj_id)
|
return Favicon(obj_id)
|
||||||
|
elif obj_type == 'hhhash':
|
||||||
|
return HHHashs.HHHash(obj_id)
|
||||||
|
elif obj_type == 'message':
|
||||||
|
return Message(obj_id)
|
||||||
elif obj_type == 'screenshot':
|
elif obj_type == 'screenshot':
|
||||||
return Screenshot(obj_id)
|
return Screenshot(obj_id)
|
||||||
elif obj_type == 'cryptocurrency':
|
elif obj_type == 'cryptocurrency':
|
||||||
|
@ -67,6 +80,8 @@ def get_object(obj_type, subtype, obj_id):
|
||||||
return Pgps.Pgp(obj_id, subtype)
|
return Pgps.Pgp(obj_id, subtype)
|
||||||
elif obj_type == 'title':
|
elif obj_type == 'title':
|
||||||
return Titles.Title(obj_id)
|
return Titles.Title(obj_id)
|
||||||
|
elif obj_type == 'user-account':
|
||||||
|
return UserAccount(obj_id, subtype)
|
||||||
elif obj_type == 'username':
|
elif obj_type == 'username':
|
||||||
return Usernames.Username(obj_id, subtype)
|
return Usernames.Username(obj_id, subtype)
|
||||||
|
|
||||||
|
@ -101,9 +116,12 @@ def get_obj_global_id(obj_type, subtype, obj_id):
|
||||||
obj = get_object(obj_type, subtype, obj_id)
|
obj = get_object(obj_type, subtype, obj_id)
|
||||||
return obj.get_global_id()
|
return obj.get_global_id()
|
||||||
|
|
||||||
|
def get_obj_type_subtype_id_from_global_id(global_id):
|
||||||
|
obj_type, subtype, obj_id = global_id.split(':', 2)
|
||||||
|
return obj_type, subtype, obj_id
|
||||||
|
|
||||||
def get_obj_from_global_id(global_id):
|
def get_obj_from_global_id(global_id):
|
||||||
obj = global_id.split(':', 3)
|
obj = get_obj_type_subtype_id_from_global_id(global_id)
|
||||||
return get_object(obj[0], obj[1], obj[2])
|
return get_object(obj[0], obj[1], obj[2])
|
||||||
|
|
||||||
|
|
||||||
|
@ -159,7 +177,7 @@ def get_objects_meta(objs, options=set(), flask_context=False):
|
||||||
subtype = obj[1]
|
subtype = obj[1]
|
||||||
obj_id = obj[2]
|
obj_id = obj[2]
|
||||||
else:
|
else:
|
||||||
obj_type, subtype, obj_id = obj.split(':', 2)
|
obj_type, subtype, obj_id = get_obj_type_subtype_id_from_global_id(obj)
|
||||||
metas.append(get_object_meta(obj_type, subtype, obj_id, options=options, flask_context=flask_context))
|
metas.append(get_object_meta(obj_type, subtype, obj_id, options=options, flask_context=flask_context))
|
||||||
return metas
|
return metas
|
||||||
|
|
||||||
|
@ -168,7 +186,7 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
|
||||||
obj = get_object(obj_type, subtype, id)
|
obj = get_object(obj_type, subtype, id)
|
||||||
meta = obj.get_meta()
|
meta = obj.get_meta()
|
||||||
meta['icon'] = obj.get_svg_icon()
|
meta['icon'] = obj.get_svg_icon()
|
||||||
if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'title' or obj_type == 'favicon':
|
if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon' or obj_type == 'hhhash':
|
||||||
meta['sparkline'] = obj.get_sparkline()
|
meta['sparkline'] = obj.get_sparkline()
|
||||||
if obj_type == 'cve':
|
if obj_type == 'cve':
|
||||||
meta['cve_search'] = obj.get_cve_search()
|
meta['cve_search'] = obj.get_cve_search()
|
||||||
|
@ -177,6 +195,8 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
|
||||||
if subtype == 'bitcoin' and related_btc:
|
if subtype == 'bitcoin' and related_btc:
|
||||||
meta["related_btc"] = btc_ail.get_bitcoin_info(obj.id)
|
meta["related_btc"] = btc_ail.get_bitcoin_info(obj.id)
|
||||||
if obj.get_type() == 'decoded':
|
if obj.get_type() == 'decoded':
|
||||||
|
meta['mimetype'] = obj.get_mimetype()
|
||||||
|
meta['size'] = obj.get_size()
|
||||||
meta["vt"] = obj.get_meta_vt()
|
meta["vt"] = obj.get_meta_vt()
|
||||||
meta["vt"]["status"] = obj.is_vt_enabled()
|
meta["vt"]["status"] = obj.is_vt_enabled()
|
||||||
# TAGS MODAL
|
# TAGS MODAL
|
||||||
|
@ -333,8 +353,8 @@ def get_obj_correlations(obj_type, subtype, obj_id):
|
||||||
obj = get_object(obj_type, subtype, obj_id)
|
obj = get_object(obj_type, subtype, obj_id)
|
||||||
return obj.get_correlations()
|
return obj.get_correlations()
|
||||||
|
|
||||||
def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max):
|
def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max, objs_hidden):
|
||||||
if len(objs) < nb_max or nb_max == -1:
|
if len(objs) < nb_max or nb_max == 0:
|
||||||
if lvl == 0:
|
if lvl == 0:
|
||||||
objs.add((obj_type, subtype, obj_id))
|
objs.add((obj_type, subtype, obj_id))
|
||||||
|
|
||||||
|
@ -346,15 +366,17 @@ def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lv
|
||||||
for obj2_type in correlations:
|
for obj2_type in correlations:
|
||||||
for str_obj in correlations[obj2_type]:
|
for str_obj in correlations[obj2_type]:
|
||||||
obj2_subtype, obj2_id = str_obj.split(':', 1)
|
obj2_subtype, obj2_id = str_obj.split(':', 1)
|
||||||
_get_obj_correlations_objs(objs, obj2_type, obj2_subtype, obj2_id, filter_types, lvl, nb_max)
|
if get_obj_global_id(obj2_type, obj2_subtype, obj2_id) in objs_hidden:
|
||||||
|
continue # filter object to hide
|
||||||
|
_get_obj_correlations_objs(objs, obj2_type, obj2_subtype, obj2_id, filter_types, lvl, nb_max, objs_hidden)
|
||||||
|
|
||||||
def get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=[], lvl=0, nb_max=300):
|
def get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=[], lvl=0, nb_max=300, objs_hidden=set()):
|
||||||
objs = set()
|
objs = set()
|
||||||
_get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max)
|
_get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max, objs_hidden)
|
||||||
return objs
|
return objs
|
||||||
|
|
||||||
def obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=[], lvl=0, nb_max=300):
|
def obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=[], lvl=0, nb_max=300, objs_hidden=set()):
|
||||||
objs = get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=filter_types, lvl=lvl, nb_max=nb_max)
|
objs = get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=filter_types, lvl=lvl, nb_max=nb_max, objs_hidden=objs_hidden)
|
||||||
# print(objs)
|
# print(objs)
|
||||||
for obj_tuple in objs:
|
for obj_tuple in objs:
|
||||||
obj1_type, subtype1, id1 = obj_tuple
|
obj1_type, subtype1, id1 = obj_tuple
|
||||||
|
@ -395,7 +417,7 @@ def create_correlation_graph_links(links_set):
|
||||||
def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True):
|
def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True):
|
||||||
graph_nodes_list = []
|
graph_nodes_list = []
|
||||||
for node_id in nodes_set:
|
for node_id in nodes_set:
|
||||||
obj_type, subtype, obj_id = node_id.split(':', 2)
|
obj_type, subtype, obj_id = get_obj_type_subtype_id_from_global_id(node_id)
|
||||||
dict_node = {'id': node_id}
|
dict_node = {'id': node_id}
|
||||||
dict_node['style'] = get_object_svg(obj_type, subtype, obj_id)
|
dict_node['style'] = get_object_svg(obj_type, subtype, obj_id)
|
||||||
|
|
||||||
|
@ -416,10 +438,12 @@ def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True):
|
||||||
|
|
||||||
|
|
||||||
def get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1,
|
def get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1,
|
||||||
|
objs_hidden=set(),
|
||||||
flask_context=False):
|
flask_context=False):
|
||||||
obj_str_id, nodes, links, meta = correlations_engine.get_correlations_graph_nodes_links(obj_type, subtype, obj_id,
|
obj_str_id, nodes, links, meta = correlations_engine.get_correlations_graph_nodes_links(obj_type, subtype, obj_id,
|
||||||
filter_types=filter_types,
|
filter_types=filter_types,
|
||||||
max_nodes=max_nodes, level=level,
|
max_nodes=max_nodes, level=level,
|
||||||
|
objs_hidden=objs_hidden,
|
||||||
flask_context=flask_context)
|
flask_context=flask_context)
|
||||||
# print(meta)
|
# print(meta)
|
||||||
meta['objs'] = list(meta['objs'])
|
meta['objs'] = list(meta['objs'])
|
||||||
|
|
212
bin/lib/timeline_engine.py
Executable file
212
bin/lib/timeline_engine.py
Executable file
|
@ -0,0 +1,212 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
|
||||||
|
config_loader = ConfigLoader()
|
||||||
|
r_meta = config_loader.get_db_conn("Kvrocks_Timeline")
|
||||||
|
config_loader = None
|
||||||
|
|
||||||
|
# CORRELATION_TYPES_BY_OBJ = {
|
||||||
|
# "chat": ["item", "username"], # item ???
|
||||||
|
# "cookie-name": ["domain"],
|
||||||
|
# "cryptocurrency": ["domain", "item"],
|
||||||
|
# "cve": ["domain", "item"],
|
||||||
|
# "decoded": ["domain", "item"],
|
||||||
|
# "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
|
||||||
|
# "etag": ["domain"],
|
||||||
|
# "favicon": ["domain", "item"],
|
||||||
|
# "hhhash": ["domain"],
|
||||||
|
# "item": ["chat", "cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"],
|
||||||
|
# "pgp": ["domain", "item"],
|
||||||
|
# "screenshot": ["domain", "item"],
|
||||||
|
# "title": ["domain", "item"],
|
||||||
|
# "username": ["chat", "domain", "item"],
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# def get_obj_correl_types(obj_type):
|
||||||
|
# return CORRELATION_TYPES_BY_OBJ.get(obj_type)
|
||||||
|
|
||||||
|
# def sanityze_obj_correl_types(obj_type, correl_types):
|
||||||
|
# obj_correl_types = get_obj_correl_types(obj_type)
|
||||||
|
# if correl_types:
|
||||||
|
# correl_types = set(correl_types).intersection(obj_correl_types)
|
||||||
|
# if not correl_types:
|
||||||
|
# correl_types = obj_correl_types
|
||||||
|
# if not correl_types:
|
||||||
|
# return []
|
||||||
|
# return correl_types
|
||||||
|
|
||||||
|
class Timeline:
|
||||||
|
|
||||||
|
def __init__(self, global_id, name):
|
||||||
|
self.id = global_id
|
||||||
|
self.name = name
|
||||||
|
|
||||||
|
def _get_block_obj_global_id(self, block):
|
||||||
|
return r_meta.hget(f'block:{self.id}:{self.name}', block)
|
||||||
|
|
||||||
|
def _set_block_obj_global_id(self, block, global_id):
|
||||||
|
return r_meta.hset(f'block:{self.id}:{self.name}', block, global_id)
|
||||||
|
|
||||||
|
def _get_block_timestamp(self, block, position):
|
||||||
|
return r_meta.zscore(f'line:{self.id}:{self.name}', f'{position}:{block}')
|
||||||
|
|
||||||
|
def _get_nearest_bloc_inf(self, timestamp):
|
||||||
|
inf = r_meta.zrevrangebyscore(f'line:{self.id}:{self.name}', float(timestamp), 0, start=0, num=1, withscores=True)
|
||||||
|
if inf:
|
||||||
|
inf, score = inf[0]
|
||||||
|
if inf.startswith('end'):
|
||||||
|
inf_key = f'start:{inf[4:]}'
|
||||||
|
inf_score = r_meta.zscore(f'line:{self.id}:{self.name}', inf_key)
|
||||||
|
if inf_score == score:
|
||||||
|
inf = inf_key
|
||||||
|
return inf
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _get_nearest_bloc_sup(self, timestamp):
|
||||||
|
sup = r_meta.zrangebyscore(f'line:{self.id}:{self.name}', float(timestamp), '+inf', start=0, num=1, withscores=True)
|
||||||
|
if sup:
|
||||||
|
sup, score = sup[0]
|
||||||
|
if sup.startswith('start'):
|
||||||
|
sup_key = f'end:{sup[6:]}'
|
||||||
|
sup_score = r_meta.zscore(f'line:{self.id}:{self.name}', sup_key)
|
||||||
|
if score == sup_score:
|
||||||
|
sup = sup_key
|
||||||
|
return sup
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_first_obj_id(self):
|
||||||
|
first = r_meta.zrange(f'line:{self.id}:{self.name}', 0, 0)
|
||||||
|
if first: # start:block
|
||||||
|
first = first[0]
|
||||||
|
if first.startswith('start:'):
|
||||||
|
first = first[6:]
|
||||||
|
else:
|
||||||
|
first = first[4:]
|
||||||
|
return self._get_block_obj_global_id(first)
|
||||||
|
|
||||||
|
def get_last_obj_id(self):
|
||||||
|
last = r_meta.zrevrange(f'line:{self.id}:{self.name}', 0, 0)
|
||||||
|
if last: # end:block
|
||||||
|
last = last[0]
|
||||||
|
if last.startswith('end:'):
|
||||||
|
last = last[4:]
|
||||||
|
else:
|
||||||
|
last = last[6:]
|
||||||
|
return self._get_block_obj_global_id(last)
|
||||||
|
|
||||||
|
def get_objs_ids(self):
|
||||||
|
objs = set()
|
||||||
|
for block in r_meta.zrange(f'line:{self.id}:{self.name}', 0, -1):
|
||||||
|
if block:
|
||||||
|
if block.startswith('start:'):
|
||||||
|
objs.add(self._get_block_obj_global_id(block[6:]))
|
||||||
|
return objs
|
||||||
|
|
||||||
|
# def get_objs_ids(self):
|
||||||
|
# objs = {}
|
||||||
|
# last_obj_id = None
|
||||||
|
# for block, timestamp in r_meta.zrange(f'line:{self.id}:{self.name}', 0, -1, withscores=True):
|
||||||
|
# if block:
|
||||||
|
# if block.startswith('start:'):
|
||||||
|
# last_obj_id = self._get_block_obj_global_id(block[6:])
|
||||||
|
# objs[last_obj_id] = {'first_seen': timestamp}
|
||||||
|
# else:
|
||||||
|
# objs[last_obj_id]['last_seen'] = timestamp
|
||||||
|
# return objs
|
||||||
|
|
||||||
|
def _update_bloc(self, block, position, timestamp):
|
||||||
|
r_meta.zadd(f'line:{self.id}:{self.name}', {f'{position}:{block}': timestamp})
|
||||||
|
|
||||||
|
def _add_bloc(self, obj_global_id, timestamp, end=None):
|
||||||
|
if end:
|
||||||
|
timestamp_end = end
|
||||||
|
else:
|
||||||
|
timestamp_end = timestamp
|
||||||
|
new_bloc = str(uuid4())
|
||||||
|
r_meta.zadd(f'line:{self.id}:{self.name}', {f'start:{new_bloc}': timestamp, f'end:{new_bloc}': timestamp_end})
|
||||||
|
self._set_block_obj_global_id(new_bloc, obj_global_id)
|
||||||
|
return new_bloc
|
||||||
|
|
||||||
|
def add_timestamp(self, timestamp, obj_global_id):
|
||||||
|
inf = self._get_nearest_bloc_inf(timestamp)
|
||||||
|
sup = self._get_nearest_bloc_sup(timestamp)
|
||||||
|
if not inf and not sup:
|
||||||
|
# create new bloc
|
||||||
|
new_bloc = self._add_bloc(obj_global_id, timestamp)
|
||||||
|
return new_bloc
|
||||||
|
# timestamp < first_seen
|
||||||
|
elif not inf:
|
||||||
|
sup_pos, sup_id = sup.split(':')
|
||||||
|
sup_obj = self._get_block_obj_global_id(sup_id)
|
||||||
|
if sup_obj == obj_global_id:
|
||||||
|
self._update_bloc(sup_id, 'start', timestamp)
|
||||||
|
# create new bloc
|
||||||
|
else:
|
||||||
|
new_bloc = self._add_bloc(obj_global_id, timestamp)
|
||||||
|
return new_bloc
|
||||||
|
|
||||||
|
# timestamp > first_seen
|
||||||
|
elif not sup:
|
||||||
|
inf_pos, inf_id = inf.split(':')
|
||||||
|
inf_obj = self._get_block_obj_global_id(inf_id)
|
||||||
|
if inf_obj == obj_global_id:
|
||||||
|
self._update_bloc(inf_id, 'end', timestamp)
|
||||||
|
# create new bloc
|
||||||
|
else:
|
||||||
|
new_bloc = self._add_bloc(obj_global_id, timestamp)
|
||||||
|
return new_bloc
|
||||||
|
|
||||||
|
else:
|
||||||
|
inf_pos, inf_id = inf.split(':')
|
||||||
|
sup_pos, sup_id = sup.split(':')
|
||||||
|
inf_obj = self._get_block_obj_global_id(inf_id)
|
||||||
|
|
||||||
|
if inf_id == sup_id:
|
||||||
|
# reduce bloc + create two new bloc
|
||||||
|
if obj_global_id != inf_obj:
|
||||||
|
# get end timestamp
|
||||||
|
sup_timestamp = self._get_block_timestamp(sup_id, 'end')
|
||||||
|
# reduce original bloc
|
||||||
|
self._update_bloc(inf_id, 'end', timestamp - 1)
|
||||||
|
# Insert new bloc
|
||||||
|
new_bloc = self._add_bloc(obj_global_id, timestamp)
|
||||||
|
# Recreate end of the first bloc by a new bloc
|
||||||
|
self._add_bloc(inf_obj, timestamp + 1, end=sup_timestamp)
|
||||||
|
return new_bloc
|
||||||
|
|
||||||
|
# timestamp in existing bloc
|
||||||
|
else:
|
||||||
|
return inf_id
|
||||||
|
|
||||||
|
# different blocs: expend sup/inf bloc or create a new bloc if
|
||||||
|
elif inf_pos == 'end' and sup_pos == 'start':
|
||||||
|
# Extend inf bloc
|
||||||
|
if obj_global_id == inf_obj:
|
||||||
|
self._update_bloc(inf_id, 'end', timestamp)
|
||||||
|
return inf_id
|
||||||
|
|
||||||
|
sup_obj = self._get_block_obj_global_id(sup_id)
|
||||||
|
# Extend sup bloc
|
||||||
|
if obj_global_id == sup_obj:
|
||||||
|
self._update_bloc(sup_id, 'start', timestamp)
|
||||||
|
return sup_id
|
||||||
|
|
||||||
|
# create new bloc
|
||||||
|
new_bloc = self._add_bloc(obj_global_id, timestamp)
|
||||||
|
return new_bloc
|
||||||
|
|
||||||
|
# inf_pos == 'start' and sup_pos == 'end'
|
||||||
|
# else raise error ???
|
|
@ -130,7 +130,7 @@ class Cryptocurrencies(AbstractModule, ABC):
|
||||||
if crypto.is_valid_address():
|
if crypto.is_valid_address():
|
||||||
# print(address)
|
# print(address)
|
||||||
is_valid_address = True
|
is_valid_address = True
|
||||||
crypto.add(date, item_id)
|
crypto.add(date, item)
|
||||||
|
|
||||||
# Check private key
|
# Check private key
|
||||||
if is_valid_address:
|
if is_valid_address:
|
||||||
|
|
|
@ -131,7 +131,7 @@ class Mixer(AbstractModule):
|
||||||
|
|
||||||
self.last_refresh = time.time()
|
self.last_refresh = time.time()
|
||||||
self.clear_feeders_stat()
|
self.clear_feeders_stat()
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
||||||
def computeNone(self):
|
def computeNone(self):
|
||||||
self.refresh_stats()
|
self.refresh_stats()
|
||||||
|
|
|
@ -42,7 +42,8 @@ class Onion(AbstractModule):
|
||||||
self.faup = crawlers.get_faup()
|
self.faup = crawlers.get_faup()
|
||||||
|
|
||||||
# activate_crawler = p.config.get("Crawler", "activate_crawler")
|
# activate_crawler = p.config.get("Crawler", "activate_crawler")
|
||||||
|
self.har = config_loader.get_config_boolean('Crawler', 'default_har')
|
||||||
|
self.screenshot = config_loader.get_config_boolean('Crawler', 'default_screenshot')
|
||||||
|
|
||||||
self.onion_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
self.onion_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||||
# self.i2p_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
# self.i2p_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||||
|
@ -90,8 +91,9 @@ class Onion(AbstractModule):
|
||||||
|
|
||||||
if onion_urls:
|
if onion_urls:
|
||||||
if crawlers.is_crawler_activated():
|
if crawlers.is_crawler_activated():
|
||||||
for domain in domains: # TODO LOAD DEFAULT SCREENSHOT + HAR
|
for domain in domains:
|
||||||
task_uuid = crawlers.create_task(domain, parent=item.get_id(), priority=0)
|
task_uuid = crawlers.create_task(domain, parent=item.get_id(), priority=0,
|
||||||
|
har=self.har, screenshot=self.screenshot)
|
||||||
if task_uuid:
|
if task_uuid:
|
||||||
print(f'{domain} added to crawler queue: {task_uuid}')
|
print(f'{domain} added to crawler queue: {task_uuid}')
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -210,18 +210,18 @@ class PgpDump(AbstractModule):
|
||||||
date = item.get_date()
|
date = item.get_date()
|
||||||
for key in self.keys:
|
for key in self.keys:
|
||||||
pgp = Pgps.Pgp(key, 'key')
|
pgp = Pgps.Pgp(key, 'key')
|
||||||
pgp.add(date, self.item_id)
|
pgp.add(date, item)
|
||||||
print(f' key: {key}')
|
print(f' key: {key}')
|
||||||
for name in self.names:
|
for name in self.names:
|
||||||
pgp = Pgps.Pgp(name, 'name')
|
pgp = Pgps.Pgp(name, 'name')
|
||||||
pgp.add(date, self.item_id)
|
pgp.add(date, item)
|
||||||
print(f' name: {name}')
|
print(f' name: {name}')
|
||||||
self.tracker_term.compute(name, obj_type='pgp', subtype='name')
|
self.tracker_term.compute(name, obj_type='pgp', subtype='name')
|
||||||
self.tracker_regex.compute(name, obj_type='pgp', subtype='name')
|
self.tracker_regex.compute(name, obj_type='pgp', subtype='name')
|
||||||
self.tracker_yara.compute(name, obj_type='pgp', subtype='name')
|
self.tracker_yara.compute(name, obj_type='pgp', subtype='name')
|
||||||
for mail in self.mails:
|
for mail in self.mails:
|
||||||
pgp = Pgps.Pgp(mail, 'mail')
|
pgp = Pgps.Pgp(mail, 'mail')
|
||||||
pgp.add(date, self.item_id)
|
pgp.add(date, item)
|
||||||
print(f' mail: {mail}')
|
print(f' mail: {mail}')
|
||||||
self.tracker_term.compute(mail, obj_type='pgp', subtype='mail')
|
self.tracker_term.compute(mail, obj_type='pgp', subtype='mail')
|
||||||
self.tracker_regex.compute(mail, obj_type='pgp', subtype='mail')
|
self.tracker_regex.compute(mail, obj_type='pgp', subtype='mail')
|
||||||
|
|
|
@ -58,7 +58,7 @@ class Telegram(AbstractModule):
|
||||||
user_id = dict_url.get('username')
|
user_id = dict_url.get('username')
|
||||||
if user_id:
|
if user_id:
|
||||||
username = Username(user_id, 'telegram')
|
username = Username(user_id, 'telegram')
|
||||||
username.add(item_date, item.id)
|
username.add(item_date, item)
|
||||||
print(f'username: {user_id}')
|
print(f'username: {user_id}')
|
||||||
invite_hash = dict_url.get('invite_hash')
|
invite_hash = dict_url.get('invite_hash')
|
||||||
if invite_hash:
|
if invite_hash:
|
||||||
|
@ -73,7 +73,7 @@ class Telegram(AbstractModule):
|
||||||
user_id = dict_url.get('username')
|
user_id = dict_url.get('username')
|
||||||
if user_id:
|
if user_id:
|
||||||
username = Username(user_id, 'telegram')
|
username = Username(user_id, 'telegram')
|
||||||
username.add(item_date, item.id)
|
username.add(item_date, item)
|
||||||
print(f'username: {user_id}')
|
print(f'username: {user_id}')
|
||||||
invite_hash = dict_url.get('invite_hash')
|
invite_hash = dict_url.get('invite_hash')
|
||||||
if invite_hash:
|
if invite_hash:
|
||||||
|
|
|
@ -10,6 +10,8 @@ Update AIL in the background
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import logging
|
||||||
|
import logging.config
|
||||||
import sys
|
import sys
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
|
@ -17,37 +19,55 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
##################################
|
##################################
|
||||||
# Import Project packages
|
# Import Project packages
|
||||||
##################################
|
##################################
|
||||||
|
from lib import ail_logger
|
||||||
from lib import ail_updates
|
from lib import ail_updates
|
||||||
|
|
||||||
def launch_background_upgrade(version, l_script_name):
|
logging.config.dictConfig(ail_logger.get_config(name='updates'))
|
||||||
if ail_updates.is_version_in_background_update(version):
|
def launch_background_upgrade(version):
|
||||||
ail_updates.start_background_update(version)
|
logger = logging.getLogger()
|
||||||
|
logger.warning(f'launching background update {version}')
|
||||||
|
update = ail_updates.AILBackgroundUpdate(version)
|
||||||
|
nb_done = update.get_nb_scripts_done()
|
||||||
|
update.start()
|
||||||
|
scripts = update.get_scripts()
|
||||||
|
scripts = scripts[nb_done:]
|
||||||
|
for script in scripts:
|
||||||
|
print('launching background script update', script)
|
||||||
|
# launch script
|
||||||
|
update.start_script(script)
|
||||||
|
script_path = update.get_script_path()
|
||||||
|
if script_path:
|
||||||
|
try:
|
||||||
|
process = subprocess.run(['python', script_path])
|
||||||
|
if process.returncode != 0:
|
||||||
|
stderr = process.stderr
|
||||||
|
if stderr:
|
||||||
|
error = stderr.decode()
|
||||||
|
logger.error(error)
|
||||||
|
update.set_error(error)
|
||||||
|
else:
|
||||||
|
update.set_error('Error Updater Script')
|
||||||
|
logger.error('Error Updater Script')
|
||||||
|
sys.exit(0)
|
||||||
|
except Exception as e:
|
||||||
|
update.set_error(str(e))
|
||||||
|
logger.error(str(e))
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
for script_name in l_script_name:
|
if not update.get_error():
|
||||||
ail_updates.set_current_background_update_script(script_name)
|
update.end_script()
|
||||||
update_file = ail_updates.get_current_background_update_script_path(version, script_name)
|
else:
|
||||||
|
logger.warning('Updater exited on error')
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
# # TODO: Get error output
|
update.end()
|
||||||
process = subprocess.run(['python', update_file])
|
logger.warning(f'ending background update {version}')
|
||||||
|
|
||||||
update_progress = ail_updates.get_current_background_update_progress()
|
|
||||||
if update_progress == 100:
|
|
||||||
ail_updates.end_background_update_script()
|
|
||||||
# # TODO: Create Custom error
|
|
||||||
# 'Please relaunch the bin/update-background.py script'
|
|
||||||
# # TODO: Create Class background update
|
|
||||||
|
|
||||||
ail_updates.end_background_update(version)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
if ail_updates.is_update_background_running():
|
||||||
if not ail_updates.exits_background_update_to_launch():
|
v = ail_updates.get_update_background_version()
|
||||||
ail_updates.clear_background_update()
|
launch_background_upgrade(v)
|
||||||
else:
|
else:
|
||||||
launch_background_upgrade('v1.5', ['Update-ARDB_Onions.py', 'Update-ARDB_Metadata.py', 'Update-ARDB_Tags.py',
|
for ver in ail_updates.get_update_background_to_launch():
|
||||||
'Update-ARDB_Tags_background.py', 'Update-ARDB_Onions_screenshots.py'])
|
launch_background_upgrade(ver)
|
||||||
launch_background_upgrade('v2.6', ['Update_screenshots.py'])
|
|
||||||
launch_background_upgrade('v2.7', ['Update_domain_tags.py'])
|
|
||||||
launch_background_upgrade('v3.4', ['Update_domain.py'])
|
|
||||||
launch_background_upgrade('v3.7', ['Update_trackers.py'])
|
|
||||||
|
|
|
@ -663,6 +663,7 @@ namespace.crawl ail_crawlers
|
||||||
namespace.db ail_datas
|
namespace.db ail_datas
|
||||||
namespace.dup ail_dups
|
namespace.dup ail_dups
|
||||||
namespace.obj ail_objs
|
namespace.obj ail_objs
|
||||||
|
namespace.tl ail_tls
|
||||||
namespace.stat ail_stats
|
namespace.stat ail_stats
|
||||||
namespace.tag ail_tags
|
namespace.tag ail_tags
|
||||||
namespace.track ail_trackers
|
namespace.track ail_trackers
|
||||||
|
|
|
@ -45,6 +45,10 @@ sender = sender@example.com
|
||||||
sender_host = smtp.example.com
|
sender_host = smtp.example.com
|
||||||
sender_port = 1337
|
sender_port = 1337
|
||||||
sender_pw = None
|
sender_pw = None
|
||||||
|
# Only needed for SMTP over SSL if the mail server don't support TLS (used by default). use this option to validate the server certificate.
|
||||||
|
cert_required = False
|
||||||
|
# Only needed for SMTP over SSL if you want to validate your self signed certificate for SSL
|
||||||
|
ca_file =
|
||||||
# Only needed when the credentials for email server needs a username instead of an email address
|
# Only needed when the credentials for email server needs a username instead of an email address
|
||||||
#sender_user = sender
|
#sender_user = sender
|
||||||
sender_user =
|
sender_user =
|
||||||
|
@ -191,6 +195,11 @@ host = localhost
|
||||||
port = 6383
|
port = 6383
|
||||||
password = ail_objs
|
password = ail_objs
|
||||||
|
|
||||||
|
[Kvrocks_Timeline]
|
||||||
|
host = localhost
|
||||||
|
port = 6383
|
||||||
|
password = ail_tls
|
||||||
|
|
||||||
[Kvrocks_Stats]
|
[Kvrocks_Stats]
|
||||||
host = localhost
|
host = localhost
|
||||||
port = 6383
|
port = 6383
|
||||||
|
|
|
@ -89,12 +89,12 @@ Available Importers:
|
||||||
5. Launch ail-framework, pystemon and PystemonImporter.py (all within the virtual environment):
|
5. Launch ail-framework, pystemon and PystemonImporter.py (all within the virtual environment):
|
||||||
- Option 1 (recommended):
|
- Option 1 (recommended):
|
||||||
```
|
```
|
||||||
./ail-framework/bin/LAUNCH.py -l #starts ail-framework
|
./ail-framework/bin/LAUNCH.sh -l #starts ail-framework
|
||||||
./ail-framework/bin/LAUNCH.py -f #starts pystemon and the PystemonImporter.py
|
./ail-framework/bin/LAUNCH.sh -f #starts pystemon and the PystemonImporter.py
|
||||||
```
|
```
|
||||||
- Option 2 (may require two terminal windows):
|
- Option 2 (may require two terminal windows):
|
||||||
```
|
```
|
||||||
./ail-framework/bin/LAUNCH.py -l #starts ail-framework
|
./ail-framework/bin/LAUNCH.sh -l #starts ail-framework
|
||||||
./pystemon/pystemon.py
|
./pystemon/pystemon.py
|
||||||
./ail-framework/bin/importer/PystemonImporter.py
|
./ail-framework/bin/importer/PystemonImporter.py
|
||||||
```
|
```
|
||||||
|
|
120
tools/crawler_add_task.py
Executable file
120
tools/crawler_add_task.py
Executable file
|
@ -0,0 +1,120 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Send an URL to the crawler - Create a crawler task
|
||||||
|
================
|
||||||
|
|
||||||
|
Import URL to be crawled by AIL and then analysed
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
from pyail import PyAIL
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
|
||||||
|
def check_frequency(value):
|
||||||
|
value = int(value)
|
||||||
|
if value <= 0:
|
||||||
|
raise argparse.ArgumentTypeError(f'Error: Invalid frequency {value}')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
# TODO add c argument for config file
|
||||||
|
parser = argparse.ArgumentParser(description='Send an URL to the crawler - Create a crawler task')
|
||||||
|
parser.add_argument('-u', '--url', type=str, help='URL to crawl', required=True)
|
||||||
|
parser.add_argument('-k', '--key', type=str, help='AIL API Key', required=True)
|
||||||
|
parser.add_argument('-a', '--ail', type=str, help='AIL URL')
|
||||||
|
parser.add_argument('-d', '--depth', type=int, default=1, help='Depth limit') # TODO improve me
|
||||||
|
parser.add_argument('--cookiejar', type=str, help='Cookiejar uuid')
|
||||||
|
parser.add_argument('-p', '--proxy', type=str, help='Proxy address to use, "web" and "tor" can be used as shortcut (web is used by default if the domain isn\'t an onion)')
|
||||||
|
|
||||||
|
group = parser.add_mutually_exclusive_group()
|
||||||
|
group.add_argument('--har', dest='har', action='store_true', help='Save HAR')
|
||||||
|
group.add_argument('--no-har', dest='har', action='store_false', help='Don\'t save HAR')
|
||||||
|
parser.set_defaults(har=None)
|
||||||
|
|
||||||
|
group = parser.add_mutually_exclusive_group()
|
||||||
|
group.add_argument('--screenshot', dest='screenshot', action='store_true', help='Save screenshot')
|
||||||
|
group.add_argument('--no-screenshot', dest='screenshot', action='store_false', help='Don\'t save screenshot')
|
||||||
|
parser.set_defaults(screenshot=None)
|
||||||
|
|
||||||
|
group = parser.add_argument_group('Frequency, create a regular crawler/scheduler. one shot if not specified')
|
||||||
|
group.add_argument('-f', '--frequency', type=str, choices=['monthly', 'weekly', 'daily', 'hourly'],
|
||||||
|
help='monthly, weekly, daily or hourly frequency or specify a custom one with the others arguments')
|
||||||
|
group.add_argument('--minutes', type=int, help='frequency in minutes')
|
||||||
|
group.add_argument('--hours', type=int, help='frequency in hours')
|
||||||
|
group.add_argument('--days', type=int, help='frequency in days')
|
||||||
|
group.add_argument('--weeks', type=int, help='frequency in weeks')
|
||||||
|
group.add_argument('--months', type=int, help='frequency in months')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if not args.url and not args.key:
|
||||||
|
parser.print_help()
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
# Load crawler default config
|
||||||
|
config_loader = ConfigLoader()
|
||||||
|
har = args.har
|
||||||
|
if har is None:
|
||||||
|
har = config_loader.get_config_boolean('Crawler', 'default_har')
|
||||||
|
screenshot = args.screenshot
|
||||||
|
if screenshot is None:
|
||||||
|
screenshot = config_loader.get_config_boolean('Crawler', 'default_screenshot')
|
||||||
|
|
||||||
|
if args.depth:
|
||||||
|
depth = args.depth
|
||||||
|
if depth < 0:
|
||||||
|
raise argparse.ArgumentTypeError(f'Error: Invalid depth {depth}')
|
||||||
|
else:
|
||||||
|
depth = 1
|
||||||
|
|
||||||
|
# frequency
|
||||||
|
frequency = {}
|
||||||
|
if args.frequency:
|
||||||
|
if args.frequency in ['monthly', 'weekly', 'daily', 'hourly']:
|
||||||
|
frequency = args.frequency
|
||||||
|
else:
|
||||||
|
raise argparse.ArgumentTypeError('Invalid frequency')
|
||||||
|
elif args.minutes or args.hours or args.days or args.weeks or args.months:
|
||||||
|
if args.minutes:
|
||||||
|
check_frequency(args.minutes)
|
||||||
|
frequency['minutes'] = args.minutes
|
||||||
|
if args.hours:
|
||||||
|
check_frequency(args.hours)
|
||||||
|
frequency['hours'] = args.hours
|
||||||
|
if args.days:
|
||||||
|
check_frequency(args.days)
|
||||||
|
frequency['days'] = args.days
|
||||||
|
if args.weeks:
|
||||||
|
check_frequency(args.weeks)
|
||||||
|
frequency['weeks'] = args.weeks
|
||||||
|
if args.months:
|
||||||
|
check_frequency(args.months)
|
||||||
|
frequency['months'] = args.months
|
||||||
|
if not frequency:
|
||||||
|
frequency = None
|
||||||
|
|
||||||
|
proxy = args.proxy
|
||||||
|
|
||||||
|
if args.cookiejar:
|
||||||
|
cookiejar = args.cookiejar
|
||||||
|
else:
|
||||||
|
cookiejar = None
|
||||||
|
|
||||||
|
ail = args.ail
|
||||||
|
if not ail:
|
||||||
|
ail = 'https://localhost:7000/'
|
||||||
|
|
||||||
|
client = PyAIL(ail, args.key, ssl=False)
|
||||||
|
r = client.crawl_url(args.url, har=har, screenshot=screenshot, depth_limit=depth, frequency=frequency,
|
||||||
|
cookiejar=cookiejar, proxy=proxy)
|
||||||
|
print(r)
|
|
@ -1,18 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
echo "Killing all screens ..."
|
|
||||||
bash -c "bash ../../bin/LAUNCH.sh -k"
|
|
||||||
echo ""
|
|
||||||
echo "Updating ARDB ..."
|
|
||||||
pushd ../../
|
|
||||||
rm -r ardb
|
|
||||||
pushd ardb/
|
|
||||||
git clone https://github.com/yinqiwen/ardb.git
|
|
||||||
git checkout 0.10 || exit 1
|
|
||||||
make || exit 1
|
|
||||||
popd
|
|
||||||
popd
|
|
||||||
echo "ARDB Updated"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -2,13 +2,11 @@
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_BIN. Run the script from the virtual environment." && exit 1;
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
export PATH=$AIL_HOME:$PATH
|
||||||
export PATH=$AIL_REDIS:$PATH
|
export PATH=$AIL_REDIS:$PATH
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
export PATH=$AIL_BIN:$PATH
|
||||||
export PATH=$AIL_FLASK:$PATH
|
export PATH=$AIL_FLASK:$PATH
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ class AIL_Updater(object):
|
||||||
self.start_time = time.time()
|
self.start_time = time.time()
|
||||||
|
|
||||||
self.config = ConfigLoader()
|
self.config = ConfigLoader()
|
||||||
self.r_serv = self.config.get_redis_conn("Kvrocks_DB")
|
self.r_serv = self.config.get_db_conn("Kvrocks_DB")
|
||||||
|
|
||||||
self.f_version = float(self.version[1:])
|
self.f_version = float(self.version[1:])
|
||||||
self.current_f_version = ail_updates.get_ail_float_version()
|
self.current_f_version = ail_updates.get_ail_float_version()
|
||||||
|
@ -35,7 +35,7 @@ class AIL_Updater(object):
|
||||||
"""
|
"""
|
||||||
Update DB version
|
Update DB version
|
||||||
"""
|
"""
|
||||||
ail_updates.add_ail_update(version)
|
ail_updates.add_ail_update(self.version)
|
||||||
|
|
||||||
def run_update(self):
|
def run_update(self):
|
||||||
self.update()
|
self.update()
|
||||||
|
|
|
@ -1,50 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from lib import ConfigLoader
|
|
||||||
|
|
||||||
class AIL_Updater(object):
|
|
||||||
"""docstring for AIL_Updater."""
|
|
||||||
|
|
||||||
def __init__(self, new_version):
|
|
||||||
self.version = new_version
|
|
||||||
self.start_time = time.time()
|
|
||||||
|
|
||||||
self.config = ConfigLoader.ConfigLoader()
|
|
||||||
self.r_serv = self.config.get_redis_conn("ARDB_DB")
|
|
||||||
|
|
||||||
self.f_version = float(self.version[1:])
|
|
||||||
self.current_f_version = self.r_serv.get('ail:version')
|
|
||||||
if self.current_f_version:
|
|
||||||
self.current_f_version = float(self.current_f_version[1:])
|
|
||||||
else:
|
|
||||||
self.current_f_version = 0
|
|
||||||
|
|
||||||
def update(self):
|
|
||||||
"""
|
|
||||||
AIL DB update
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def end_update(self):
|
|
||||||
"""
|
|
||||||
Update DB version
|
|
||||||
"""
|
|
||||||
# Set current ail version
|
|
||||||
self.r_serv.hset('ail:update_date', self.version, datetime.datetime.now().strftime("%Y%m%d"))
|
|
||||||
# Set current ail version
|
|
||||||
if self.f_version > self.current_f_version:
|
|
||||||
self.r_serv.set('ail:version', self.version)
|
|
||||||
|
|
||||||
def run_update(self):
|
|
||||||
self.update()
|
|
||||||
self.end_update()
|
|
|
@ -7,13 +7,13 @@ fi
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
[ -z "$AIL_KVROCKS" ] && echo "Needs the env var AIL_KVROCKS. Run the script from the virtual environment." && exit 1;
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_BIN. Run the script from the virtual environment." && exit 1;
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
export PATH=$AIL_HOME:$PATH
|
||||||
export PATH=$AIL_REDIS:$PATH
|
export PATH=$AIL_REDIS:$PATH
|
||||||
export PATH=$AIL_ARDB:$PATH
|
export PATH=AIL_KVROCKS:$PATH
|
||||||
export PATH=$AIL_BIN:$PATH
|
export PATH=$AIL_BIN:$PATH
|
||||||
export PATH=$AIL_FLASK:$PATH
|
export PATH=$AIL_FLASK:$PATH
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@ bash ${AIL_BIN}/LAUNCH.sh -ks
|
||||||
wait
|
wait
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -lav
|
bash ${AIL_BIN}/LAUNCH.sh -lkv
|
||||||
wait
|
wait
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
|
|
|
@ -1,15 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
YELLOW="\\033[1;33m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $YELLOW"\t"
|
|
||||||
echo -e "* ------------------------------------------------------------------"
|
|
||||||
echo -e "\t"
|
|
||||||
echo -e " - - - - - - - - PLEASE RELAUNCH AIL - - - - - - - - "
|
|
||||||
echo -e "\t"
|
|
||||||
echo -e "* ------------------------------------------------------------------"
|
|
||||||
echo -e "\t"
|
|
||||||
echo -e "\t"$DEFAULT
|
|
||||||
|
|
||||||
# fix invalid Updater version (kill parent):
|
|
||||||
kill -SIGUSR1 `ps --pid $$ -oppid=`; exit
|
|
|
@ -1,165 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
|
||||||
from lib import ConfigLoader
|
|
||||||
|
|
||||||
def update_tracked_terms(main_key, tracked_container_key):
|
|
||||||
for tracked_item in r_serv_term.smembers(main_key):
|
|
||||||
all_items = r_serv_term.smembers(tracked_container_key.format(tracked_item))
|
|
||||||
for item_path in all_items:
|
|
||||||
if PASTES_FOLDER in item_path:
|
|
||||||
new_item_path = item_path.replace(PASTES_FOLDER, '', 1)
|
|
||||||
r_serv_term.sadd(tracked_container_key.format(tracked_item), new_item_path)
|
|
||||||
r_serv_term.srem(tracked_container_key.format(tracked_item), item_path)
|
|
||||||
|
|
||||||
def update_hash_item(has_type):
|
|
||||||
#get all hash items:
|
|
||||||
all_hash_items = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type))
|
|
||||||
for item_path in all_hash_items:
|
|
||||||
if PASTES_FOLDER in item_path:
|
|
||||||
base64_key = '{}_paste:{}'.format(has_type, item_path)
|
|
||||||
hash_key = 'hash_paste:{}'.format(item_path)
|
|
||||||
|
|
||||||
if r_serv_metadata.exists(base64_key):
|
|
||||||
new_base64_key = base64_key.replace(PASTES_FOLDER, '', 1)
|
|
||||||
res = r_serv_metadata.renamenx(base64_key, new_base64_key)
|
|
||||||
if res == 0:
|
|
||||||
print('same key, double name: {}'.format(item_path))
|
|
||||||
# fusion
|
|
||||||
all_key = r_serv_metadata.smembers(base64_key)
|
|
||||||
for elem in all_key:
|
|
||||||
r_serv_metadata.sadd(new_base64_key, elem)
|
|
||||||
r_serv_metadata.srem(base64_key, elem)
|
|
||||||
|
|
||||||
if r_serv_metadata.exists(hash_key):
|
|
||||||
new_hash_key = hash_key.replace(PASTES_FOLDER, '', 1)
|
|
||||||
res = r_serv_metadata.renamenx(hash_key, new_hash_key)
|
|
||||||
if res == 0:
|
|
||||||
print('same key, double name: {}'.format(item_path))
|
|
||||||
# fusion
|
|
||||||
all_key = r_serv_metadata.smembers(hash_key)
|
|
||||||
for elem in all_key:
|
|
||||||
r_serv_metadata.sadd(new_hash_key, elem)
|
|
||||||
r_serv_metadata.srem(hash_key, elem)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
start_deb = time.time()
|
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
|
|
||||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
|
|
||||||
|
|
||||||
r_serv = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
|
||||||
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
|
|
||||||
r_serv_term = config_loader.get_redis_conn("ARDB_TermFreq")
|
|
||||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
r_serv.set('ail:current_background_script', 'metadata')
|
|
||||||
|
|
||||||
## Update metadata ##
|
|
||||||
print('Updating ARDB_Metadata ...')
|
|
||||||
index = 0
|
|
||||||
start = time.time()
|
|
||||||
|
|
||||||
#update stats
|
|
||||||
r_serv.set('ail:current_background_script_stat', 0)
|
|
||||||
|
|
||||||
# Update base64
|
|
||||||
update_hash_item('base64')
|
|
||||||
|
|
||||||
#update stats
|
|
||||||
r_serv.set('ail:current_background_script_stat', 20)
|
|
||||||
# Update binary
|
|
||||||
update_hash_item('binary')
|
|
||||||
|
|
||||||
#update stats
|
|
||||||
r_serv.set('ail:current_background_script_stat', 40)
|
|
||||||
# Update binary
|
|
||||||
update_hash_item('hexadecimal')
|
|
||||||
|
|
||||||
#update stats
|
|
||||||
r_serv.set('ail:current_background_script_stat', 60)
|
|
||||||
|
|
||||||
total_onion = r_serv_tag.scard('infoleak:submission=\"crawler\"')
|
|
||||||
nb_updated = 0
|
|
||||||
last_progress = 0
|
|
||||||
|
|
||||||
# Update onion metadata
|
|
||||||
all_crawled_items = r_serv_tag.smembers('infoleak:submission=\"crawler\"')
|
|
||||||
for item_path in all_crawled_items:
|
|
||||||
domain = None
|
|
||||||
if PASTES_FOLDER in item_path:
|
|
||||||
old_item_metadata = 'paste_metadata:{}'.format(item_path)
|
|
||||||
item_path = item_path.replace(PASTES_FOLDER, '', 1)
|
|
||||||
new_item_metadata = 'paste_metadata:{}'.format(item_path)
|
|
||||||
res = r_serv_metadata.renamenx(old_item_metadata, new_item_metadata)
|
|
||||||
#key already exist
|
|
||||||
if res == 0:
|
|
||||||
r_serv_metadata.delete(old_item_metadata)
|
|
||||||
|
|
||||||
# update domain port
|
|
||||||
domain = r_serv_metadata.hget(new_item_metadata, 'domain')
|
|
||||||
if domain:
|
|
||||||
if domain[-3:] != ':80':
|
|
||||||
r_serv_metadata.hset(new_item_metadata, 'domain', '{}:80'.format(domain))
|
|
||||||
super_father = r_serv_metadata.hget(new_item_metadata, 'super_father')
|
|
||||||
if super_father:
|
|
||||||
if PASTES_FOLDER in super_father:
|
|
||||||
r_serv_metadata.hset(new_item_metadata, 'super_father', super_father.replace(PASTES_FOLDER, '', 1))
|
|
||||||
father = r_serv_metadata.hget(new_item_metadata, 'father')
|
|
||||||
if father:
|
|
||||||
if PASTES_FOLDER in father:
|
|
||||||
r_serv_metadata.hset(new_item_metadata, 'father', father.replace(PASTES_FOLDER, '', 1))
|
|
||||||
|
|
||||||
nb_updated += 1
|
|
||||||
progress = int((nb_updated * 30) /total_onion)
|
|
||||||
print('{}/{} updated {}%'.format(nb_updated, total_onion, progress + 60))
|
|
||||||
# update progress stats
|
|
||||||
if progress != last_progress:
|
|
||||||
r_serv.set('ail:current_background_script_stat', progress + 60)
|
|
||||||
last_progress = progress
|
|
||||||
|
|
||||||
#update stats
|
|
||||||
r_serv.set('ail:current_background_script_stat', 90)
|
|
||||||
|
|
||||||
## update tracked term/set/regex
|
|
||||||
# update tracked term
|
|
||||||
update_tracked_terms('TrackedSetTermSet', 'tracked_{}')
|
|
||||||
|
|
||||||
#update stats
|
|
||||||
r_serv.set('ail:current_background_script_stat', 93)
|
|
||||||
# update tracked set
|
|
||||||
update_tracked_terms('TrackedSetSet', 'set_{}')
|
|
||||||
|
|
||||||
#update stats
|
|
||||||
r_serv.set('ail:current_background_script_stat', 96)
|
|
||||||
# update tracked regex
|
|
||||||
update_tracked_terms('TrackedRegexSet', 'regex_{}')
|
|
||||||
|
|
||||||
#update stats
|
|
||||||
r_serv.set('ail:current_background_script_stat', 100)
|
|
||||||
##
|
|
||||||
|
|
||||||
end = time.time()
|
|
||||||
|
|
||||||
print('Updating ARDB_Metadata Done => {} paths: {} s'.format(index, end - start))
|
|
||||||
print()
|
|
||||||
|
|
||||||
r_serv.sadd('ail:update_v1.5', 'metadata')
|
|
||||||
|
|
||||||
##
|
|
||||||
#Key, Dynamic Update
|
|
||||||
##
|
|
||||||
#paste_children
|
|
||||||
#nb_seen_hash, base64_hash, binary_hash
|
|
||||||
#paste_onion_external_links
|
|
||||||
#misp_events, hive_cases
|
|
||||||
##
|
|
|
@ -1,129 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
from lib import ConfigLoader
|
|
||||||
|
|
||||||
def substract_date(date_from, date_to):
|
|
||||||
date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
|
|
||||||
date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
|
|
||||||
delta = date_to - date_from # timedelta
|
|
||||||
l_date = []
|
|
||||||
for i in range(delta.days + 1):
|
|
||||||
date = date_from + datetime.timedelta(i)
|
|
||||||
l_date.append( date.strftime('%Y%m%d') )
|
|
||||||
return l_date
|
|
||||||
|
|
||||||
def get_date_epoch(date):
|
|
||||||
return int(datetime.datetime(int(date[0:4]), int(date[4:6]), int(date[6:8])).timestamp())
|
|
||||||
|
|
||||||
def get_domain_root_from_paste_childrens(item_father, domain):
|
|
||||||
item_children = r_serv_metadata.smembers('paste_children:{}'.format(item_father))
|
|
||||||
domain_root = ''
|
|
||||||
for item_path in item_children:
|
|
||||||
# remove absolute_path
|
|
||||||
if PASTES_FOLDER in item_path:
|
|
||||||
r_serv_metadata.srem('paste_children:{}'.format(item_father), item_path)
|
|
||||||
item_path = item_path.replace(PASTES_FOLDER, '', 1)
|
|
||||||
r_serv_metadata.sadd('paste_children:{}'.format(item_father), item_path)
|
|
||||||
if domain in item_path:
|
|
||||||
domain_root = item_path
|
|
||||||
return domain_root
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
start_deb = time.time()
|
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
|
|
||||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
|
|
||||||
|
|
||||||
r_serv = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
|
||||||
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
|
|
||||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
r_serv.set('ail:current_background_script', 'onions')
|
|
||||||
r_serv.set('ail:current_background_script_stat', 0)
|
|
||||||
|
|
||||||
## Update Onion ##
|
|
||||||
print('Updating ARDB_Onion ...')
|
|
||||||
index = 0
|
|
||||||
start = time.time()
|
|
||||||
|
|
||||||
# clean down domain from db
|
|
||||||
date_from = '20180929'
|
|
||||||
date_today = datetime.date.today().strftime("%Y%m%d")
|
|
||||||
for date in substract_date(date_from, date_today):
|
|
||||||
|
|
||||||
onion_down = r_serv_onion.smembers('onion_down:{}'.format(date))
|
|
||||||
#print(onion_down)
|
|
||||||
for onion_domain in onion_down:
|
|
||||||
if not r_serv_onion.sismember('full_onion_up', onion_domain):
|
|
||||||
# delete history
|
|
||||||
all_onion_history = r_serv_onion.lrange('onion_history:{}'.format(onion_domain), 0 ,-1)
|
|
||||||
if all_onion_history:
|
|
||||||
for date_history in all_onion_history:
|
|
||||||
#print('onion_history:{}:{}'.format(onion_domain, date_history))
|
|
||||||
r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
|
|
||||||
r_serv_onion.delete('onion_history:{}'.format(onion_domain))
|
|
||||||
|
|
||||||
#stats
|
|
||||||
total_domain = r_serv_onion.scard('full_onion_up')
|
|
||||||
nb_updated = 0
|
|
||||||
last_progress = 0
|
|
||||||
|
|
||||||
# clean up domain
|
|
||||||
all_domain_up = r_serv_onion.smembers('full_onion_up')
|
|
||||||
for onion_domain in all_domain_up:
|
|
||||||
# delete history
|
|
||||||
all_onion_history = r_serv_onion.lrange('onion_history:{}'.format(onion_domain), 0 ,-1)
|
|
||||||
if all_onion_history:
|
|
||||||
for date_history in all_onion_history:
|
|
||||||
print('--------')
|
|
||||||
print('onion_history:{}:{}'.format(onion_domain, date_history))
|
|
||||||
item_father = r_serv_onion.lrange('onion_history:{}:{}'.format(onion_domain, date_history), 0, 0)
|
|
||||||
print('item_father: {}'.format(item_father))
|
|
||||||
try:
|
|
||||||
item_father = item_father[0]
|
|
||||||
except IndexError:
|
|
||||||
r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
|
|
||||||
continue
|
|
||||||
#print(item_father)
|
|
||||||
# delete old history
|
|
||||||
r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
|
|
||||||
# create new history
|
|
||||||
root_key = get_domain_root_from_paste_childrens(item_father, onion_domain)
|
|
||||||
if root_key:
|
|
||||||
r_serv_onion.zadd(f'crawler_history_onion:{onion_domain}:80', {root_key: get_date_epoch(date_history)})
|
|
||||||
print('crawler_history_onion:{}:80 {} {}'.format(onion_domain, get_date_epoch(date_history), root_key))
|
|
||||||
#update service metadata: paste_parent
|
|
||||||
r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key)
|
|
||||||
|
|
||||||
r_serv_onion.delete('onion_history:{}'.format(onion_domain))
|
|
||||||
|
|
||||||
r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'ports', '80')
|
|
||||||
r_serv_onion.hdel('onion_metadata:{}'.format(onion_domain), 'last_seen')
|
|
||||||
|
|
||||||
nb_updated += 1
|
|
||||||
progress = int((nb_updated * 100) /total_domain)
|
|
||||||
print('{}/{} updated {}%'.format(nb_updated, total_domain, progress))
|
|
||||||
# update progress stats
|
|
||||||
if progress != last_progress:
|
|
||||||
r_serv.set('ail:current_background_script_stat', progress)
|
|
||||||
last_progress = progress
|
|
||||||
|
|
||||||
|
|
||||||
end = time.time()
|
|
||||||
print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
|
|
||||||
print()
|
|
||||||
print('Done in {} s'.format(end - start_deb))
|
|
||||||
|
|
||||||
r_serv.sadd('ail:update_v1.5', 'onions')
|
|
|
@ -1,117 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
from hashlib import sha256
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
from lib import ConfigLoader
|
|
||||||
|
|
||||||
def rreplace(s, old, new, occurrence):
|
|
||||||
li = s.rsplit(old, occurrence)
|
|
||||||
return new.join(li)
|
|
||||||
|
|
||||||
def substract_date(date_from, date_to):
|
|
||||||
date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
|
|
||||||
date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
|
|
||||||
delta = date_to - date_from # timedelta
|
|
||||||
l_date = []
|
|
||||||
for i in range(delta.days + 1):
|
|
||||||
date = date_from + datetime.timedelta(i)
|
|
||||||
l_date.append( date.strftime('%Y%m%d') )
|
|
||||||
return l_date
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
start_deb = time.time()
|
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
|
|
||||||
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"))
|
|
||||||
NEW_SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot')
|
|
||||||
|
|
||||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
|
|
||||||
|
|
||||||
r_serv = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
|
||||||
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
|
|
||||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
r_serv.set('ail:current_background_script', 'crawled_screenshot')
|
|
||||||
r_serv.set('ail:current_background_script_stat', 0)
|
|
||||||
|
|
||||||
## Update Onion ##
|
|
||||||
print('Updating ARDB_Onion ...')
|
|
||||||
index = 0
|
|
||||||
start = time.time()
|
|
||||||
|
|
||||||
# clean down domain from db
|
|
||||||
date_from = '20180801'
|
|
||||||
date_today = datetime.date.today().strftime("%Y%m%d")
|
|
||||||
list_date = substract_date(date_from, date_today)
|
|
||||||
nb_done = 0
|
|
||||||
last_progress = 0
|
|
||||||
total_to_update = len(list_date)
|
|
||||||
for date in list_date:
|
|
||||||
screenshot_dir = os.path.join(SCREENSHOT_FOLDER, date[0:4], date[4:6], date[6:8])
|
|
||||||
if os.path.isdir(screenshot_dir):
|
|
||||||
print(screenshot_dir)
|
|
||||||
for file in os.listdir(screenshot_dir):
|
|
||||||
if file.endswith(".png"):
|
|
||||||
index += 1
|
|
||||||
#print(file)
|
|
||||||
|
|
||||||
img_path = os.path.join(screenshot_dir, file)
|
|
||||||
with open(img_path, 'br') as f:
|
|
||||||
image_content = f.read()
|
|
||||||
|
|
||||||
hash = sha256(image_content).hexdigest()
|
|
||||||
img_dir_path = os.path.join(hash[0:2], hash[2:4], hash[4:6], hash[6:8], hash[8:10], hash[10:12])
|
|
||||||
filename_img = os.path.join(NEW_SCREENSHOT_FOLDER, img_dir_path, hash[12:] +'.png')
|
|
||||||
dirname = os.path.dirname(filename_img)
|
|
||||||
if not os.path.exists(dirname):
|
|
||||||
os.makedirs(dirname)
|
|
||||||
if not os.path.exists(filename_img):
|
|
||||||
os.rename(img_path, filename_img)
|
|
||||||
else:
|
|
||||||
os.remove(img_path)
|
|
||||||
|
|
||||||
item = os.path.join('crawled', date[0:4], date[4:6], date[6:8], file[:-4])
|
|
||||||
# add item metadata
|
|
||||||
r_serv_metadata.hset('paste_metadata:{}'.format(item), 'screenshot', hash)
|
|
||||||
# add sha256 metadata
|
|
||||||
r_serv_onion.sadd('screenshot:{}'.format(hash), item)
|
|
||||||
|
|
||||||
if file.endswith('.pnghar.txt'):
|
|
||||||
har_path = os.path.join(screenshot_dir, file)
|
|
||||||
new_file = rreplace(file, '.pnghar.txt', '.json', 1)
|
|
||||||
new_har_path = os.path.join(screenshot_dir, new_file)
|
|
||||||
os.rename(har_path, new_har_path)
|
|
||||||
|
|
||||||
progress = int((nb_done * 100) /total_to_update)
|
|
||||||
# update progress stats
|
|
||||||
if progress != last_progress:
|
|
||||||
r_serv.set('ail:current_background_script_stat', progress)
|
|
||||||
print('{}/{} screenshot updated {}%'.format(nb_done, total_to_update, progress))
|
|
||||||
last_progress = progress
|
|
||||||
|
|
||||||
nb_done += 1
|
|
||||||
|
|
||||||
r_serv.set('ail:current_background_script_stat', 100)
|
|
||||||
|
|
||||||
|
|
||||||
end = time.time()
|
|
||||||
print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
|
|
||||||
print()
|
|
||||||
print('Done in {} s'.format(end - start_deb))
|
|
||||||
|
|
||||||
r_serv.set('ail:current_background_script_stat', 100)
|
|
||||||
r_serv.sadd('ail:update_v1.5', 'crawled_screenshot')
|
|
||||||
if r_serv.scard('ail:update_v1.5') != 5:
|
|
||||||
r_serv.set('ail:update_error', 'Update v1.5 Failed, please relaunch the bin/update-background.py script')
|
|
|
@ -1,135 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import redis
|
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
|
||||||
from lib import ConfigLoader
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
start_deb = time.time()
|
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
|
|
||||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
|
|
||||||
|
|
||||||
r_serv = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
|
||||||
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
|
|
||||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
|
||||||
r_important_paste_2018 = redis.StrictRedis(
|
|
||||||
host=config_loader.get_config_str("ARDB_Metadata", "host"),
|
|
||||||
port=config_loader.get_config_int("ARDB_Metadata", "port"),
|
|
||||||
db=2018,
|
|
||||||
decode_responses=True)
|
|
||||||
|
|
||||||
r_important_paste_2019 = redis.StrictRedis(
|
|
||||||
host=config_loader.get_config_str("ARDB_Metadata", "host"),
|
|
||||||
port=config_loader.get_config_int("ARDB_Metadata", "port"),
|
|
||||||
db=2019,
|
|
||||||
decode_responses=True)
|
|
||||||
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
r_serv.set('ail:current_background_script', 'tags')
|
|
||||||
r_serv.set('ail:current_background_script_stat', 0)
|
|
||||||
|
|
||||||
if r_serv.sismember('ail:update_v1.5', 'onions') and r_serv.sismember('ail:update_v1.5', 'metadata'):
|
|
||||||
|
|
||||||
print('Updating ARDB_Tags ...')
|
|
||||||
index = 0
|
|
||||||
nb_tags_to_update = 0
|
|
||||||
nb_updated = 0
|
|
||||||
last_progress = 0
|
|
||||||
start = time.time()
|
|
||||||
|
|
||||||
tags_list = r_serv_tag.smembers('list_tags')
|
|
||||||
# create temp tags metadata
|
|
||||||
tag_metadata = {}
|
|
||||||
for tag in tags_list:
|
|
||||||
tag_metadata[tag] = {}
|
|
||||||
tag_metadata[tag]['first_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'first_seen')
|
|
||||||
if tag_metadata[tag]['first_seen'] is None:
|
|
||||||
tag_metadata[tag]['first_seen'] = 99999999
|
|
||||||
else:
|
|
||||||
tag_metadata[tag]['first_seen'] = int(tag_metadata[tag]['first_seen'])
|
|
||||||
|
|
||||||
tag_metadata[tag]['last_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
|
|
||||||
if tag_metadata[tag]['last_seen'] is None:
|
|
||||||
tag_metadata[tag]['last_seen'] = 0
|
|
||||||
else:
|
|
||||||
tag_metadata[tag]['last_seen'] = int(tag_metadata[tag]['last_seen'])
|
|
||||||
nb_tags_to_update += r_serv_tag.scard(tag)
|
|
||||||
|
|
||||||
if nb_tags_to_update == 0:
|
|
||||||
nb_tags_to_update = 1
|
|
||||||
|
|
||||||
for tag in tags_list:
|
|
||||||
|
|
||||||
all_item = r_serv_tag.smembers(tag)
|
|
||||||
for item_path in all_item:
|
|
||||||
splitted_item_path = item_path.split('/')
|
|
||||||
#print(tag)
|
|
||||||
#print(item_path)
|
|
||||||
try:
|
|
||||||
item_date = int( ''.join([splitted_item_path[-4], splitted_item_path[-3], splitted_item_path[-2]]) )
|
|
||||||
except IndexError:
|
|
||||||
r_serv_tag.srem(tag, item_path)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# remove absolute path
|
|
||||||
new_path = item_path.replace(PASTES_FOLDER, '', 1)
|
|
||||||
if new_path != item_path:
|
|
||||||
# save in queue absolute path to remove
|
|
||||||
r_serv_tag.sadd('maj:v1.5:absolute_path_to_rename', item_path)
|
|
||||||
|
|
||||||
# update metadata first_seen
|
|
||||||
if item_date < tag_metadata[tag]['first_seen']:
|
|
||||||
tag_metadata[tag]['first_seen'] = item_date
|
|
||||||
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date)
|
|
||||||
|
|
||||||
# update metadata last_seen
|
|
||||||
if item_date > tag_metadata[tag]['last_seen']:
|
|
||||||
tag_metadata[tag]['last_seen'] = item_date
|
|
||||||
last_seen_db = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
|
|
||||||
if last_seen_db:
|
|
||||||
if item_date > int(last_seen_db):
|
|
||||||
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date)
|
|
||||||
else:
|
|
||||||
tag_metadata[tag]['last_seen'] = last_seen_db
|
|
||||||
|
|
||||||
r_serv_tag.sadd('{}:{}'.format(tag, item_date), new_path)
|
|
||||||
r_serv_tag.hincrby('daily_tags:{}'.format(item_date), tag, 1)
|
|
||||||
|
|
||||||
# clean db
|
|
||||||
r_serv_tag.srem(tag, item_path)
|
|
||||||
index = index + 1
|
|
||||||
|
|
||||||
nb_updated += 1
|
|
||||||
progress = int((nb_updated * 100) /nb_tags_to_update)
|
|
||||||
print('{}/{} updated {}%'.format(nb_updated, nb_tags_to_update, progress))
|
|
||||||
# update progress stats
|
|
||||||
if progress != last_progress:
|
|
||||||
r_serv.set('ail:current_background_script_stat', progress)
|
|
||||||
last_progress = progress
|
|
||||||
|
|
||||||
#flush browse importante pastes db
|
|
||||||
try:
|
|
||||||
r_important_paste_2018.flushdb()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
r_important_paste_2019.flushdb()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
end = time.time()
|
|
||||||
|
|
||||||
print('Updating ARDB_Tags Done => {} paths: {} s'.format(index, end - start))
|
|
||||||
|
|
||||||
r_serv.sadd('ail:update_v1.5', 'tags')
|
|
|
@ -1,70 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
from lib import ConfigLoader
|
|
||||||
|
|
||||||
def tags_key_fusion(old_item_path_key, new_item_path_key):
|
|
||||||
print('fusion:')
|
|
||||||
print(old_item_path_key)
|
|
||||||
print(new_item_path_key)
|
|
||||||
for tag in r_serv_metadata.smembers(old_item_path_key):
|
|
||||||
r_serv_metadata.sadd(new_item_path_key, tag)
|
|
||||||
r_serv_metadata.srem(old_item_path_key, tag)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
start_deb = time.time()
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
|
|
||||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
|
|
||||||
|
|
||||||
r_serv = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
|
||||||
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
if r_serv.sismember('ail:update_v1.5', 'tags'):
|
|
||||||
|
|
||||||
r_serv.set('ail:current_background_script', 'tags_background')
|
|
||||||
r_serv.set('ail:current_background_script_stat', 0)
|
|
||||||
|
|
||||||
print('Updating ARDB_Tags ...')
|
|
||||||
start = time.time()
|
|
||||||
|
|
||||||
# update item metadata tags
|
|
||||||
tag_not_updated = True
|
|
||||||
total_to_update = r_serv_tag.scard('maj:v1.5:absolute_path_to_rename')
|
|
||||||
nb_updated = 0
|
|
||||||
last_progress = 0
|
|
||||||
if total_to_update > 0:
|
|
||||||
while tag_not_updated:
|
|
||||||
item_path = r_serv_tag.srandmember('maj:v1.5:absolute_path_to_rename')
|
|
||||||
old_tag_item_key = 'tag:{}'.format(item_path)
|
|
||||||
new_item_path = item_path.replace(PASTES_FOLDER, '', 1)
|
|
||||||
new_tag_item_key = 'tag:{}'.format(new_item_path)
|
|
||||||
res = r_serv_metadata.renamenx(old_tag_item_key, new_tag_item_key)
|
|
||||||
if res == 0:
|
|
||||||
tags_key_fusion(old_tag_item_key, new_tag_item_key)
|
|
||||||
nb_updated += 1
|
|
||||||
r_serv_tag.srem('maj:v1.5:absolute_path_to_rename', item_path)
|
|
||||||
if r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') == 0:
|
|
||||||
tag_not_updated = False
|
|
||||||
else:
|
|
||||||
progress = int((nb_updated * 100) / total_to_update)
|
|
||||||
print('{}/{} Tags updated {}%'.format(nb_updated, total_to_update, progress))
|
|
||||||
# update progress stats
|
|
||||||
if progress != last_progress:
|
|
||||||
r_serv.set('ail:current_background_script_stat', progress)
|
|
||||||
last_progress = progress
|
|
||||||
|
|
||||||
end = time.time()
|
|
||||||
|
|
||||||
print('Updating ARDB_Tags Done: {} s'.format(end - start))
|
|
||||||
|
|
||||||
r_serv.sadd('ail:update_v1.5', 'tags_background')
|
|
|
@ -1,54 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
from lib import ConfigLoader
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
start_deb = time.time()
|
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
|
|
||||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
|
|
||||||
|
|
||||||
r_serv = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
print()
|
|
||||||
print('Updating ARDB_Onion ...')
|
|
||||||
index = 0
|
|
||||||
start = time.time()
|
|
||||||
|
|
||||||
# update crawler queue
|
|
||||||
for elem in r_serv_onion.smembers('onion_crawler_queue'):
|
|
||||||
if PASTES_FOLDER in elem:
|
|
||||||
r_serv_onion.srem('onion_crawler_queue', elem)
|
|
||||||
r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1))
|
|
||||||
index = index +1
|
|
||||||
for elem in r_serv_onion.smembers('onion_crawler_priority_queue'):
|
|
||||||
if PASTES_FOLDER in elem:
|
|
||||||
r_serv_onion.srem('onion_crawler_queue', elem)
|
|
||||||
r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1))
|
|
||||||
index = index +1
|
|
||||||
|
|
||||||
end = time.time()
|
|
||||||
print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
|
|
||||||
print()
|
|
||||||
|
|
||||||
# Add background update
|
|
||||||
r_serv.sadd('ail:to_update', 'v1.5')
|
|
||||||
|
|
||||||
#Set current ail version
|
|
||||||
r_serv.set('ail:version', 'v1.5')
|
|
||||||
|
|
||||||
#Set current ail version
|
|
||||||
r_serv.set('ail:update_date_v1.5', datetime.datetime.now().strftime("%Y%m%d"))
|
|
||||||
|
|
||||||
print('Done in {} s'.format(end - start_deb))
|
|
|
@ -1,60 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -k &
|
|
||||||
wait
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
bash -c "bash ${AIL_HOME}/update/bin/Update_Redis.sh"
|
|
||||||
#bash -c "bash ${AIL_HOME}/update/bin/Update_ARDB.sh"
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Update DomainClassifier"$DEFAULT
|
|
||||||
echo ""
|
|
||||||
pip3 install --upgrade --force-reinstall git+https://github.com/D4-project/BGP-Ranking.git/@28013297efb039d2ebbce96ee2d89493f6ae56b0#subdirectory=client&egg=pybgpranking
|
|
||||||
pip3 install --upgrade --force-reinstall git+https://github.com/adulau/DomainClassifier.git
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Update Web thirdparty"$DEFAULT
|
|
||||||
echo ""
|
|
||||||
bash -c "(cd ${AIL_FLASK}; ./update_thirdparty.sh &)"
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
bash ${AIL_BIN}LAUNCH.sh -lav &
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Fixing ARDB ..."$DEFAULT
|
|
||||||
echo ""
|
|
||||||
python ${AIL_HOME}/update/v1.5/Update.py &
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks &
|
|
||||||
wait
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,25 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
from lib import ConfigLoader
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
start_deb = time.time()
|
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
|
|
||||||
r_serv = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
# Set current ail version
|
|
||||||
r_serv.set('ail:version', 'v1.7')
|
|
||||||
|
|
||||||
# Set current ail version
|
|
||||||
r_serv.set('ail:update_date_v1.7', datetime.datetime.now().strftime("%Y%m%d"))
|
|
|
@ -1,65 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks &
|
|
||||||
wait
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Update DomainClassifier"$DEFAULT
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
cd $AIL_HOME
|
|
||||||
git clone https://github.com/kazu-yamamoto/pgpdump.git
|
|
||||||
cd pgpdump
|
|
||||||
./configure
|
|
||||||
make
|
|
||||||
sudo make install
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Update requirement"$DEFAULT
|
|
||||||
echo ""
|
|
||||||
pip3 install beautifulsoup4
|
|
||||||
|
|
||||||
bash ${AIL_BIN}LAUNCH.sh -lav &
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
|
||||||
echo ""
|
|
||||||
python ${AIL_HOME}/update/v1.7/Update.py &
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks &
|
|
||||||
wait
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Update thirdparty ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -t &
|
|
||||||
wait
|
|
||||||
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,33 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
from lib import ConfigLoader
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
start_deb = time.time()
|
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
|
|
||||||
r_serv = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
# Set current ail version
|
|
||||||
r_serv.set('ail:version', 'v2.0')
|
|
||||||
|
|
||||||
# use new update_date format
|
|
||||||
date_tag_to_replace = ['v1.5', 'v1.7']
|
|
||||||
for tag in date_tag_to_replace:
|
|
||||||
if r_serv.exists('ail:update_date_{}'.format(tag)):
|
|
||||||
date_tag = r_serv.get('ail:update_date_{}'.format(tag))
|
|
||||||
r_serv.hset('ail:update_date', tag, date_tag)
|
|
||||||
r_serv.delete('ail:update_date_{}'.format(tag))
|
|
||||||
|
|
||||||
# Set current ail version
|
|
||||||
r_serv.hset('ail:update_date', 'v2.0', datetime.datetime.now().strftime("%Y%m%d"))
|
|
|
@ -1,75 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Create Self-Signed Certificate"$DEFAULT
|
|
||||||
echo ""
|
|
||||||
pushd ${AIL_BIN}/helper/gen_cert
|
|
||||||
bash gen_root.sh
|
|
||||||
wait
|
|
||||||
bash gen_cert.sh
|
|
||||||
wait
|
|
||||||
popd
|
|
||||||
|
|
||||||
cp ${AIL_BIN}/helper/gen_cert/server.crt ${AIL_FLASK}/server.crt
|
|
||||||
cp ${AIL_BIN}/helper/gen_cert/server.key ${AIL_FLASK}/server.key
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Update requirement"$DEFAULT
|
|
||||||
echo ""
|
|
||||||
pip3 install flask-login
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
pip3 install bcrypt
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -lav &
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
|
||||||
echo ""
|
|
||||||
python ${AIL_HOME}/update/v2.0/Update.py
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Update thirdparty ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -t
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Create Default User"$DEFAULT
|
|
||||||
echo ""
|
|
||||||
python3 ${AIL_FLASK}create_default_user.py
|
|
||||||
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,118 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
from packages import Term
|
|
||||||
|
|
||||||
from lib import ConfigLoader
|
|
||||||
|
|
||||||
|
|
||||||
def rreplace(s, old, new, occurrence):
|
|
||||||
li = s.rsplit(old, occurrence)
|
|
||||||
return new.join(li)
|
|
||||||
|
|
||||||
def get_item_id(full_path):
|
|
||||||
return full_path.replace(PASTES_FOLDER, '', 1)
|
|
||||||
|
|
||||||
def get_item_date(id_item):
|
|
||||||
l_dir = id_item.split('/')
|
|
||||||
return f'{l_dir[-4]}{l_dir[-3]}{l_dir[-2]}'
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
start_deb = time.time()
|
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
|
|
||||||
PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '')
|
|
||||||
|
|
||||||
r_serv = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
r_serv_term_stats = config_loader.get_redis_conn("ARDB_Trending")
|
|
||||||
r_serv_termfreq = config_loader.get_redis_conn("ARDB_TermFreq")
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
r_serv_term_stats.flushdb()
|
|
||||||
|
|
||||||
# Disabled. Checkout the v2.2 branch if you need it
|
|
||||||
# # convert all regex:
|
|
||||||
# all_regex = r_serv_termfreq.smembers('TrackedRegexSet')
|
|
||||||
# for regex in all_regex:
|
|
||||||
# tags = list(r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(regex)))
|
|
||||||
# mails = list(r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(regex)))
|
|
||||||
#
|
|
||||||
# new_term = regex[1:-1]
|
|
||||||
# res = Term.parse_json_term_to_add({"term": new_term, "type": 'regex', "tags": tags, "mails": mails, "level": 1},
|
|
||||||
# 'admin@admin.test')
|
|
||||||
# if res[1] == 200:
|
|
||||||
# term_uuid = res[0]['uuid']
|
|
||||||
# list_items = r_serv_termfreq.smembers('regex_{}'.format(regex))
|
|
||||||
# for paste_item in list_items:
|
|
||||||
# item_id = get_item_id(paste_item)
|
|
||||||
# item_date = get_item_date(item_id)
|
|
||||||
# Term.add_tracked_item(term_uuid, item_id, item_date)
|
|
||||||
#
|
|
||||||
# # Invalid Tracker => remove it
|
|
||||||
# else:
|
|
||||||
# print('Invalid Regex Removed: {}'.format(regex))
|
|
||||||
# print(res[0])
|
|
||||||
# # allow reprocess
|
|
||||||
# r_serv_termfreq.srem('TrackedRegexSet', regex)
|
|
||||||
#
|
|
||||||
# all_tokens = r_serv_termfreq.smembers('TrackedSetTermSet')
|
|
||||||
# for token in all_tokens:
|
|
||||||
# tags = list(r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(token)))
|
|
||||||
# mails = list(r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(token)))
|
|
||||||
#
|
|
||||||
# res = Term.parse_json_term_to_add({"term": token, "type": 'word', "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test')
|
|
||||||
# if res[1] == 200:
|
|
||||||
# term_uuid = res[0]['uuid']
|
|
||||||
# list_items = r_serv_termfreq.smembers('tracked_{}'.format(token))
|
|
||||||
# for paste_item in list_items:
|
|
||||||
# item_id = get_item_id(paste_item)
|
|
||||||
# item_date = get_item_date(item_id)
|
|
||||||
# Term.add_tracked_item(term_uuid, item_id, item_date)
|
|
||||||
# # Invalid Tracker => remove it
|
|
||||||
# else:
|
|
||||||
# print('Invalid Token Removed: {}'.format(token))
|
|
||||||
# print(res[0])
|
|
||||||
# # allow reprocess
|
|
||||||
# r_serv_termfreq.srem('TrackedSetTermSet', token)
|
|
||||||
#
|
|
||||||
# all_set = r_serv_termfreq.smembers('TrackedSetSet')
|
|
||||||
# for curr_set in all_set:
|
|
||||||
# tags = list(r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(curr_set)))
|
|
||||||
# mails = list(r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(curr_set)))
|
|
||||||
#
|
|
||||||
# to_remove = ',{}'.format(curr_set.split(',')[-1])
|
|
||||||
# new_set = rreplace(curr_set, to_remove, '', 1)
|
|
||||||
# new_set = new_set[2:]
|
|
||||||
# new_set = new_set.replace(',', '')
|
|
||||||
#
|
|
||||||
# res = Term.parse_json_term_to_add({"term": new_set, "type": 'set', "nb_words": 1, "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test')
|
|
||||||
# if res[1] == 200:
|
|
||||||
# term_uuid = res[0]['uuid']
|
|
||||||
# list_items = r_serv_termfreq.smembers('tracked_{}'.format(curr_set))
|
|
||||||
# for paste_item in list_items:
|
|
||||||
# item_id = get_item_id(paste_item)
|
|
||||||
# item_date = get_item_date(item_id)
|
|
||||||
# Term.add_tracked_item(term_uuid, item_id, item_date)
|
|
||||||
# # Invalid Tracker => remove it
|
|
||||||
# else:
|
|
||||||
# print('Invalid Set Removed: {}'.format(curr_set))
|
|
||||||
# print(res[0])
|
|
||||||
# # allow reprocess
|
|
||||||
# r_serv_termfreq.srem('TrackedSetSet', curr_set)
|
|
||||||
|
|
||||||
r_serv_termfreq.flushdb()
|
|
||||||
|
|
||||||
# Set current ail version
|
|
||||||
r_serv.set('ail:version', 'v2.2')
|
|
||||||
|
|
||||||
# Set current ail version
|
|
||||||
r_serv.hset('ail:update_date', 'v2.2', datetime.datetime.now().strftime("%Y%m%d"))
|
|
|
@ -1,39 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -lav &
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
|
||||||
echo ""
|
|
||||||
python ${AIL_HOME}/update/v2.2/Update.py
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,35 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
from lib import ConfigLoader
|
|
||||||
|
|
||||||
new_version = 'v2.5'
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
start_deb = time.time()
|
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
r_serv = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
r_serv.zadd('ail:all_role', {'user': 3})
|
|
||||||
r_serv.zadd('ail:all_role', {'user_no_api': 4})
|
|
||||||
r_serv.zadd('ail:all_role', {'read_only': 5})
|
|
||||||
|
|
||||||
for user in r_serv.hkeys('user:all'):
|
|
||||||
r_serv.sadd('user_role:user', user)
|
|
||||||
r_serv.sadd('user_role:user_no_api', user)
|
|
||||||
r_serv.sadd('user_role:read_only', user)
|
|
||||||
|
|
||||||
# Set current ail version
|
|
||||||
r_serv.set('ail:version', new_version)
|
|
||||||
|
|
||||||
# Set current ail version
|
|
||||||
r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d"))
|
|
|
@ -1,39 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -lav &
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
|
||||||
echo ""
|
|
||||||
python ${AIL_HOME}/update/v2.5/Update.py
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,27 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
from lib import ConfigLoader
|
|
||||||
|
|
||||||
new_version = 'v2.6'
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
start_deb = time.time()
|
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
r_serv = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
r_serv.sadd('ail:to_update', new_version)
|
|
||||||
|
|
||||||
# Set current ail version
|
|
||||||
r_serv.set('ail:version', new_version)
|
|
||||||
|
|
||||||
# Set current ail version
|
|
||||||
r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d"))
|
|
|
@ -1,39 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -lav &
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
|
||||||
echo ""
|
|
||||||
python ${AIL_HOME}/update/v2.6/Update.py
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,90 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
from pyfaup.faup import Faup
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
from lib import ConfigLoader
|
|
||||||
|
|
||||||
def get_domain(item_id):
|
|
||||||
item_id = item_id.split('/')
|
|
||||||
item_id = item_id[-1]
|
|
||||||
return item_id[:-36]
|
|
||||||
|
|
||||||
def get_all_item(s_sha256):
|
|
||||||
return r_serv_onion.smembers(f'screenshot:{s_sha256}')
|
|
||||||
|
|
||||||
def sanitize_domain(domain):
|
|
||||||
faup.decode(domain)
|
|
||||||
domain_sanitized = faup.get()
|
|
||||||
domain_sanitized = domain_sanitized['domain']
|
|
||||||
try:
|
|
||||||
domain_sanitized = domain_sanitized.decode()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return domain_sanitized.lower()
|
|
||||||
|
|
||||||
def update_db(s_sha256):
|
|
||||||
screenshot_items = get_all_item(s_sha256)
|
|
||||||
if screenshot_items:
|
|
||||||
for item_id in screenshot_items:
|
|
||||||
item_id = item_id.replace(PASTES_FOLDER+'/', '', 1) # remove root path
|
|
||||||
domain = get_domain(item_id)
|
|
||||||
|
|
||||||
domain_sanitized = sanitize_domain(domain)
|
|
||||||
if domain != domain_sanitized:
|
|
||||||
r_serv_onion.sadd('incorrect_domain', domain)
|
|
||||||
domain = domain_sanitized
|
|
||||||
|
|
||||||
r_serv_onion.sadd('domain_screenshot:{}'.format(domain), s_sha256)
|
|
||||||
r_serv_onion.sadd('screenshot_domain:{}'.format(s_sha256), domain)
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
# broken screenshot
|
|
||||||
r_serv_onion.sadd('broken_screenshot', s_sha256)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
start_deb = time.time()
|
|
||||||
faup = Faup()
|
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
|
|
||||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes"))
|
|
||||||
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot')
|
|
||||||
|
|
||||||
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
r_serv_db.set('ail:update_in_progress', 'v2.6')
|
|
||||||
r_serv_db.set('ail:current_background_update', 'v2.6')
|
|
||||||
|
|
||||||
r_serv_db.set('ail:current_background_script_stat', 20)
|
|
||||||
r_serv_db.set('ail:current_background_script', 'screenshot update')
|
|
||||||
|
|
||||||
nb = 0
|
|
||||||
|
|
||||||
if os.path.isdir(SCREENSHOT_FOLDER):
|
|
||||||
for root, dirs, files in os.walk(SCREENSHOT_FOLDER, topdown=False):
|
|
||||||
# print(dirs)
|
|
||||||
for name in files:
|
|
||||||
nb = nb + 1
|
|
||||||
screenshot_sha256 = os.path.join(root, name)
|
|
||||||
screenshot_sha256 = screenshot_sha256[:-4] # remove .png
|
|
||||||
screenshot_sha256 = screenshot_sha256.replace(SCREENSHOT_FOLDER, '', 1)
|
|
||||||
screenshot_sha256 = screenshot_sha256.replace('/', '')
|
|
||||||
update_db(screenshot_sha256)
|
|
||||||
# print('Screenshot updated: {}'.format(nb))
|
|
||||||
if nb % 1000 == 0:
|
|
||||||
r_serv_db.set('ail:current_background_script', 'screenshot updated: {}'.format(nb))
|
|
||||||
|
|
||||||
r_serv_db.set('ail:current_background_script_stat', 100)
|
|
||||||
|
|
||||||
end = time.time()
|
|
||||||
print('ALL screenshot updated: {} in {} s'.format(nb, end - start_deb))
|
|
|
@ -1,37 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
from lib import ConfigLoader
|
|
||||||
|
|
||||||
new_version = 'v2.7'
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
start_deb = time.time()
|
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
r_serv = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
r_serv_tags = config_loader.get_redis_conn("ARDB_Tags")
|
|
||||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
r_serv.sadd('ail:to_update', new_version)
|
|
||||||
|
|
||||||
#### Update tags ####
|
|
||||||
r_serv_tags.sunionstore('list_tags:item', 'list_tags', [])
|
|
||||||
r_serv_onion.sunionstore('domain_update_v2.7', 'full_onion_up', [])
|
|
||||||
r_serv_onion.delete('incorrect_domain')
|
|
||||||
r_serv.set('ail:update_v2.7:deletetagrange', 1)
|
|
||||||
#### ####
|
|
||||||
|
|
||||||
# Set current ail version
|
|
||||||
r_serv.set('ail:version', new_version)
|
|
||||||
|
|
||||||
# Set current ail version
|
|
||||||
r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d"))
|
|
|
@ -1,39 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -lav &
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
|
||||||
echo ""
|
|
||||||
python ${AIL_HOME}/update/v2.7/Update.py
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,127 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
from pyfaup.faup import Faup
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
from packages import Date
|
|
||||||
|
|
||||||
from lib import ConfigLoader
|
|
||||||
|
|
||||||
def sanitize_domain(domain):
|
|
||||||
faup.decode(domain)
|
|
||||||
domain_sanitized = faup.get()
|
|
||||||
domain_sanitized = domain_sanitized['domain']
|
|
||||||
try:
|
|
||||||
domain_sanitized = domain_sanitized.decode()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return domain_sanitized.lower()
|
|
||||||
|
|
||||||
def get_all_obj_tags(obj_type):
|
|
||||||
return list(r_serv_tags.smembers(f'list_tags:{obj_type}'))
|
|
||||||
|
|
||||||
def add_global_tag(tag, object_type=None):
|
|
||||||
r_serv_tags.sadd('list_tags', tag)
|
|
||||||
if object_type:
|
|
||||||
r_serv_tags.sadd('list_tags:{}'.format(object_type), tag)
|
|
||||||
|
|
||||||
def get_obj_tag(object_id):
|
|
||||||
res = r_serv_metadata.smembers('tag:{}'.format(object_id))
|
|
||||||
if res:
|
|
||||||
return list(res)
|
|
||||||
else:
|
|
||||||
return []
|
|
||||||
|
|
||||||
def delete_domain_tag_daterange():
|
|
||||||
all_domains_tags = get_all_obj_tags('domain')
|
|
||||||
nb_updated = 0
|
|
||||||
nb_to_update = len(all_domains_tags)
|
|
||||||
if nb_to_update == 0:
|
|
||||||
nb_to_update = 1
|
|
||||||
refresh_time = time.time()
|
|
||||||
l_dates = Date.substract_date('20191008', Date.get_today_date_str())
|
|
||||||
for tag in all_domains_tags:
|
|
||||||
for date_day in l_dates:
|
|
||||||
r_serv_tags.delete('domain:{}:{}'.format(tag, date_day))
|
|
||||||
nb_updated += 1
|
|
||||||
refresh_time = update_progress(refresh_time, nb_updated, nb_to_update)
|
|
||||||
r_serv_db.delete('ail:update_v2.7:deletetagrange')
|
|
||||||
|
|
||||||
def update_domain_tags(domain):
|
|
||||||
domain_sanitized = sanitize_domain(domain)
|
|
||||||
if domain != domain_sanitized:
|
|
||||||
r_serv_onion.sadd('incorrect_domain', domain)
|
|
||||||
domain = domain_sanitized
|
|
||||||
|
|
||||||
domain_tags = get_obj_tag(domain)
|
|
||||||
for tag in domain_tags:
|
|
||||||
# delete incorrect tags
|
|
||||||
if tag == 'infoleak:submission="crawler"' or tag == 'infoleak:submission="manual"':
|
|
||||||
r_serv_metadata.srem('tag:{}'.format(domain), tag)
|
|
||||||
else:
|
|
||||||
add_global_tag(tag, object_type='domain')
|
|
||||||
r_serv_tags.sadd('{}:{}'.format('domain', tag), domain)
|
|
||||||
|
|
||||||
def update_progress(refresh_time, nb_updated, nb_elem_to_update):
|
|
||||||
if time.time() - refresh_time > 10:
|
|
||||||
progress = int((nb_updated * 100) / nb_elem_to_update)
|
|
||||||
print('{}/{} updated {}%'.format(nb_updated, nb_elem_to_update, progress))
|
|
||||||
r_serv_db.set('ail:current_background_script_stat', progress)
|
|
||||||
refresh_time = time.time()
|
|
||||||
|
|
||||||
return refresh_time
|
|
||||||
|
|
||||||
def update_db():
|
|
||||||
nb_updated = 0
|
|
||||||
nb_to_update = r_serv_onion.scard('domain_update_v2.7')
|
|
||||||
refresh_time = time.time()
|
|
||||||
r_serv_db.set('ail:current_background_script_stat', 0)
|
|
||||||
r_serv_db.set('ail:current_background_script', 'domain tags update')
|
|
||||||
domain = r_serv_onion.spop('domain_update_v2.7')
|
|
||||||
while domain is not None:
|
|
||||||
update_domain_tags(domain)
|
|
||||||
nb_updated += 1
|
|
||||||
refresh_time = update_progress(refresh_time, nb_updated, nb_to_update)
|
|
||||||
domain = r_serv_onion.spop('domain_update_v2.7')
|
|
||||||
if r_serv_db.exists('ail:update_v2.7:deletetagrange'):
|
|
||||||
r_serv_db.set('ail:current_background_script_stat', 0)
|
|
||||||
r_serv_db.set('ail:current_background_script', 'tags: remove deprecated keys')
|
|
||||||
delete_domain_tag_daterange()
|
|
||||||
|
|
||||||
# sort all crawled domain
|
|
||||||
r_serv_onion.sort('full_onion_up', alpha=True)
|
|
||||||
r_serv_onion.sort('full_regular_up', alpha=True)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
start_deb = time.time()
|
|
||||||
faup = Faup()
|
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
|
|
||||||
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
r_serv_tags = config_loader.get_redis_conn("ARDB_Tags")
|
|
||||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
|
||||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
update_version = 'v2.7'
|
|
||||||
|
|
||||||
r_serv_db.set('ail:update_in_progress', update_version)
|
|
||||||
r_serv_db.set('ail:current_background_update', update_version)
|
|
||||||
|
|
||||||
r_serv_db.set('ail:current_background_script_stat', 0)
|
|
||||||
r_serv_db.set('ail:current_background_script', 'tags update')
|
|
||||||
|
|
||||||
update_db()
|
|
||||||
|
|
||||||
r_serv_db.set('ail:current_background_script_stat', 100)
|
|
||||||
|
|
||||||
end = time.time()
|
|
||||||
print('ALL domains tags updated in {} s'.format(end - start_deb))
|
|
|
@ -1,43 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -lav &
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
|
||||||
echo ""
|
|
||||||
python ${AIL_HOME}/update/v3.0/Update.py
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Update thirdparty ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -t
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,22 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_HOME'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from update.bin.old_ail_updater import AIL_Updater
|
|
||||||
|
|
||||||
class Updater(AIL_Updater):
|
|
||||||
"""default Updater."""
|
|
||||||
|
|
||||||
def __init__(self, version):
|
|
||||||
super(Updater, self).__init__(version)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
updater = Updater('v3.1.1')
|
|
||||||
updater.run_update()
|
|
|
@ -1,52 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ldbv &
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# SUBMODULES #
|
|
||||||
git submodule init
|
|
||||||
git submodule update
|
|
||||||
|
|
||||||
echo -e $GREEN"Installing YARA ..."$DEFAULT
|
|
||||||
pip3 install yara-python
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -t
|
|
||||||
|
|
||||||
# SUBMODULES #
|
|
||||||
git submodule init
|
|
||||||
git submodule update
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
|
||||||
echo ""
|
|
||||||
python ${AIL_HOME}/update/v3.1.1/Update.py
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,23 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_HOME'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from update.bin.old_ail_updater import AIL_Updater
|
|
||||||
|
|
||||||
class Updater(AIL_Updater):
|
|
||||||
"""default Updater."""
|
|
||||||
|
|
||||||
def __init__(self, version):
|
|
||||||
super(Updater, self).__init__(version)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
updater = Updater('v3.1')
|
|
||||||
updater.run_update()
|
|
||||||
|
|
|
@ -1,46 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -lav &
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
pip3 install scrapy
|
|
||||||
pip3 install scrapy-splash
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
|
||||||
echo ""
|
|
||||||
python ${AIL_HOME}/update/v3.1/Update.py
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Update thirdparty ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -t
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,22 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_HOME'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from update.bin.old_ail_updater import AIL_Updater
|
|
||||||
|
|
||||||
class Updater(AIL_Updater):
|
|
||||||
"""default Updater."""
|
|
||||||
|
|
||||||
def __init__(self, version):
|
|
||||||
super(Updater, self).__init__(version)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
updater = Updater('v3.2')
|
|
||||||
updater.run_update()
|
|
|
@ -1,52 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ldbv &
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# SUBMODULES #
|
|
||||||
git submodule init
|
|
||||||
git submodule update
|
|
||||||
|
|
||||||
echo -e $GREEN"Installing YARA ..."$DEFAULT
|
|
||||||
pip3 install yara-python
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -t
|
|
||||||
|
|
||||||
# SUBMODULES #
|
|
||||||
git submodule init
|
|
||||||
git submodule update
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
|
||||||
echo ""
|
|
||||||
python ${AIL_HOME}/update/v3.2/Update.py
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,22 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_HOME'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from update.bin.old_ail_updater import AIL_Updater
|
|
||||||
|
|
||||||
class Updater(AIL_Updater):
|
|
||||||
"""default Updater."""
|
|
||||||
|
|
||||||
def __init__(self, version):
|
|
||||||
super(Updater, self).__init__(version)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
updater = Updater('v3.3')
|
|
||||||
updater.run_update()
|
|
|
@ -1,54 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ldbv &
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# SUBMODULES #
|
|
||||||
git submodule update
|
|
||||||
|
|
||||||
# echo ""
|
|
||||||
# echo -e $GREEN"installing KVORCKS ..."$DEFAULT
|
|
||||||
# cd ${AIL_HOME}
|
|
||||||
# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git
|
|
||||||
# pushd kvrocks/
|
|
||||||
# make -j4
|
|
||||||
# popd
|
|
||||||
|
|
||||||
echo -e $GREEN"Installing html2text ..."$DEFAULT
|
|
||||||
pip3 install html2text
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
|
||||||
echo ""
|
|
||||||
python ${AIL_HOME}/update/v3.3/Update.py
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,34 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_HOME'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from update.bin.old_ail_updater import AIL_Updater
|
|
||||||
|
|
||||||
class Updater(AIL_Updater):
|
|
||||||
"""default Updater."""
|
|
||||||
|
|
||||||
def __init__(self, version):
|
|
||||||
super(Updater, self).__init__(version)
|
|
||||||
self.r_serv_onion = self.config.get_redis_conn("ARDB_Onion")
|
|
||||||
|
|
||||||
def update(self):
|
|
||||||
"""
|
|
||||||
Update Domain Languages
|
|
||||||
"""
|
|
||||||
self.r_serv_onion.sunionstore('domain_update_v3.4', 'full_onion_up', 'full_regular_up')
|
|
||||||
self.r_serv.set('update:nb_elem_to_convert', self.r_serv_onion.scard('domain_update_v3.4'))
|
|
||||||
self.r_serv.set('update:nb_elem_converted', 0)
|
|
||||||
|
|
||||||
# Add background update
|
|
||||||
self.r_serv.sadd('ail:to_update', self.version)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
updater = Updater('v3.4')
|
|
||||||
updater.run_update()
|
|
|
@ -1,54 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
# bash ${AIL_BIN}/LAUNCH.sh -ldbv &
|
|
||||||
# wait
|
|
||||||
# echo ""
|
|
||||||
|
|
||||||
# SUBMODULES #
|
|
||||||
git submodule update
|
|
||||||
|
|
||||||
# echo ""
|
|
||||||
# echo -e $GREEN"installing KVORCKS ..."$DEFAULT
|
|
||||||
# cd ${AIL_HOME}
|
|
||||||
# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git
|
|
||||||
# pushd kvrocks/
|
|
||||||
# make -j4
|
|
||||||
# popd
|
|
||||||
|
|
||||||
echo -e $GREEN"Installing html2text ..."$DEFAULT
|
|
||||||
pip3 install pycld3
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
|
||||||
echo ""
|
|
||||||
python ${AIL_HOME}/update/v3.4/Update.py
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,121 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from lib import ConfigLoader
|
|
||||||
from lib.objects.Items import Item
|
|
||||||
|
|
||||||
def get_domain_type(domain_name):
|
|
||||||
if str(domain_name).endswith('.onion'):
|
|
||||||
return 'onion'
|
|
||||||
else:
|
|
||||||
return 'regular'
|
|
||||||
|
|
||||||
def add_domain_language(domain_name, language):
|
|
||||||
language = language.split('-')[0]
|
|
||||||
domain_type = get_domain_type(domain_name)
|
|
||||||
r_serv_onion.sadd('all_domains_languages', language)
|
|
||||||
r_serv_onion.sadd(f'all_domains_languages:{domain_type}', language)
|
|
||||||
r_serv_onion.sadd(f'language:domains:{domain_type}:{language}', domain_name)
|
|
||||||
r_serv_onion.sadd(f'domain:language:{domain_name}', language)
|
|
||||||
|
|
||||||
def add_domain_languages_by_item_id(domain_name, item_id):
|
|
||||||
item = Item(item_id)
|
|
||||||
for lang in item.get_languages():
|
|
||||||
add_domain_language(domain_name, lang.language)
|
|
||||||
|
|
||||||
def update_update_stats():
|
|
||||||
nb_updated = int(r_serv_db.get('update:nb_elem_converted'))
|
|
||||||
progress = int((nb_updated * 100) / nb_elem_to_update)
|
|
||||||
print(f'{nb_updated}/{nb_elem_to_update} updated {progress}%')
|
|
||||||
r_serv_db.set('ail:current_background_script_stat', progress)
|
|
||||||
|
|
||||||
def update_domain_language(domain_obj, item_id):
|
|
||||||
domain_name = domain_obj.get_domain_name()
|
|
||||||
add_domain_languages_by_item_id(domain_name, item_id)
|
|
||||||
|
|
||||||
def get_domain_history(domain_type, domain_name):
|
|
||||||
return r_serv_onion.zrange(f'crawler_history_{domain_type}:{domain_name}:80', 0, -1, withscores=True)
|
|
||||||
|
|
||||||
|
|
||||||
def get_item_children(item_id):
|
|
||||||
return r_serv_metadata.smembers(f'paste_children:{item_id}')
|
|
||||||
|
|
||||||
def get_domain_items(domain_name, root_item_id):
|
|
||||||
dom_item = get_domain_item_children(domain_name, root_item_id)
|
|
||||||
dom_item.append(root_item_id)
|
|
||||||
return dom_item
|
|
||||||
|
|
||||||
def is_item_in_domain(domain_name, item_id):
|
|
||||||
is_in_domain = False
|
|
||||||
domain_length = len(domain_name)
|
|
||||||
if len(item_id) > (domain_length+48):
|
|
||||||
if item_id[-36-domain_length:-36] == domain_name:
|
|
||||||
is_in_domain = True
|
|
||||||
return is_in_domain
|
|
||||||
|
|
||||||
def get_domain_item_children(domain_name, root_item_id):
|
|
||||||
all_items = []
|
|
||||||
for item_id in get_item_children(root_item_id):
|
|
||||||
if is_item_in_domain(domain_name, item_id):
|
|
||||||
all_items.append(item_id)
|
|
||||||
all_items.extend(get_domain_item_children(domain_name, item_id))
|
|
||||||
return all_items
|
|
||||||
|
|
||||||
def get_domain_crawled_item_root(domain_name, domain_type, epoch):
|
|
||||||
res = r_serv_onion.zrevrangebyscore(f'crawler_history_{domain_type}:{domain_name}:80', int(epoch), int(epoch))
|
|
||||||
return {"root_item": res[0], "epoch": int(epoch)}
|
|
||||||
|
|
||||||
def get_domain_items_crawled(domain_name, domain_type, epoch):
|
|
||||||
item_crawled = []
|
|
||||||
item_root = get_domain_crawled_item_root(domain_name, domain_type, epoch)
|
|
||||||
if item_root:
|
|
||||||
if item_root['root_item'] != str(item_root['epoch']) and item_root['root_item']:
|
|
||||||
for item_id in get_domain_items(domain_name, item_root['root_item']):
|
|
||||||
item_crawled.append(item_id)
|
|
||||||
return item_crawled
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
start_deb = time.time()
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
|
||||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
r_serv_db.set('ail:current_background_script', 'domain languages update')
|
|
||||||
|
|
||||||
nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert')
|
|
||||||
if not nb_elem_to_update:
|
|
||||||
nb_elem_to_update = 1
|
|
||||||
else:
|
|
||||||
nb_elem_to_update = int(nb_elem_to_update)
|
|
||||||
|
|
||||||
# _delete_all_domains_languages()
|
|
||||||
|
|
||||||
while True:
|
|
||||||
domain = r_serv_onion.spop('domain_update_v3.4')
|
|
||||||
if domain is not None:
|
|
||||||
print(domain)
|
|
||||||
domain = str(domain)
|
|
||||||
domain_t = get_domain_type(domain)
|
|
||||||
for domain_history in get_domain_history(domain_t, domain):
|
|
||||||
domain_items = get_domain_items_crawled(domain, domain_t, domain_history[1])
|
|
||||||
for id_item in domain_items:
|
|
||||||
update_domain_language(domain, id_item)
|
|
||||||
|
|
||||||
r_serv_db.incr('update:nb_elem_converted')
|
|
||||||
update_update_stats()
|
|
||||||
|
|
||||||
else:
|
|
||||||
r_serv_db.set('ail:current_background_script_stat', 100)
|
|
||||||
sys.exit(0)
|
|
|
@ -1,22 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_HOME'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from update.bin.old_ail_updater import AIL_Updater
|
|
||||||
|
|
||||||
class Updater(AIL_Updater):
|
|
||||||
"""default Updater."""
|
|
||||||
|
|
||||||
def __init__(self, version):
|
|
||||||
super(Updater, self).__init__(version)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
updater = Updater('v3.5')
|
|
||||||
updater.run_update()
|
|
|
@ -1,35 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
# SUBMODULES #
|
|
||||||
git submodule update
|
|
||||||
|
|
||||||
echo -e $GREEN"Installing PyAIL ..."$DEFAULT
|
|
||||||
pip3 install -U pyail
|
|
||||||
|
|
||||||
echo -e $GREEN"Installing D4 CLIENT ..."$DEFAULT
|
|
||||||
pip3 install -U d4-pyclient
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating DomainClassifier ..."$DEFAULT
|
|
||||||
pip3 install -U DomainClassifier
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,22 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_HOME'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from update.bin.old_ail_updater import AIL_Updater
|
|
||||||
|
|
||||||
class Updater(AIL_Updater):
|
|
||||||
"""default Updater."""
|
|
||||||
|
|
||||||
def __init__(self, version):
|
|
||||||
super(Updater, self).__init__(version)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
updater = Updater('v3.6')
|
|
||||||
updater.run_update()
|
|
|
@ -1,39 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
# SUBMODULES #
|
|
||||||
git submodule update
|
|
||||||
|
|
||||||
echo -e $GREEN"Updating D4 CLIENT ..."$DEFAULT
|
|
||||||
pip3 install -U d4-pyclient
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Installing nose ..."$DEFAULT
|
|
||||||
pip3 install -U nose
|
|
||||||
|
|
||||||
echo -e $GREEN"Installing coverage ..."$DEFAULT
|
|
||||||
pip3 install -U coverage
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating DomainClassifier ..."$DEFAULT
|
|
||||||
pip3 install -U DomainClassifier
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,40 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_HOME'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from update.bin.old_ail_updater import AIL_Updater
|
|
||||||
from lib import Tracker
|
|
||||||
|
|
||||||
class Updater(AIL_Updater):
|
|
||||||
"""default Updater."""
|
|
||||||
|
|
||||||
def __init__(self, version):
|
|
||||||
super(Updater, self).__init__(version)
|
|
||||||
|
|
||||||
def update(self):
|
|
||||||
"""
|
|
||||||
Update Domain Languages
|
|
||||||
"""
|
|
||||||
print('Fixing Tracker_uuid list ...')
|
|
||||||
Tracker.fix_all_tracker_uuid_list()
|
|
||||||
nb = 0
|
|
||||||
for tracker_uuid in Tracker.get_trackers():
|
|
||||||
self.r_serv.sadd('trackers_update_v3.7', tracker_uuid)
|
|
||||||
nb += 1
|
|
||||||
|
|
||||||
self.r_serv.set('update:nb_elem_to_convert', nb)
|
|
||||||
self.r_serv.set('update:nb_elem_converted',0)
|
|
||||||
|
|
||||||
# Add background update
|
|
||||||
self.r_serv.sadd('ail:to_update', self.version)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
updater = Updater('v3.7')
|
|
||||||
updater.run_update()
|
|
|
@ -1,44 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
# SUBMODULES #
|
|
||||||
git submodule update
|
|
||||||
|
|
||||||
|
|
||||||
echo -e $GREEN"Updating thirdparty ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ut
|
|
||||||
wait
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
|
||||||
echo ""
|
|
||||||
python ${AIL_HOME}/update/v3.7/Update.py
|
|
||||||
wait
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,53 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from lib import ConfigLoader
|
|
||||||
from lib import Tracker
|
|
||||||
|
|
||||||
def update_update_stats():
|
|
||||||
nb_updated = int(r_serv_db.get('update:nb_elem_converted'))
|
|
||||||
progress = int((nb_updated * 100) / nb_elem_to_update)
|
|
||||||
print(f'{nb_updated}/{nb_elem_to_update} updated {progress}%')
|
|
||||||
r_serv_db.set('ail:current_background_script_stat', progress)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
start_deb = time.time()
|
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
|
|
||||||
r_serv_tracker = config_loader.get_db_conn("Kvrocks_DB")
|
|
||||||
config_loader = None
|
|
||||||
Tracker.r_serv_tracker = r_serv_tracker
|
|
||||||
|
|
||||||
r_serv_db.set('ail:current_background_script', 'trackers update')
|
|
||||||
|
|
||||||
nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert')
|
|
||||||
if not nb_elem_to_update:
|
|
||||||
nb_elem_to_update = 1
|
|
||||||
else:
|
|
||||||
nb_elem_to_update = int(nb_elem_to_update)
|
|
||||||
|
|
||||||
while True:
|
|
||||||
tracker_uuid = r_serv_db.spop('trackers_update_v3.7')
|
|
||||||
if tracker_uuid is not None:
|
|
||||||
print(tracker_uuid)
|
|
||||||
# FIX STATS
|
|
||||||
Tracker.fix_tracker_stats_per_day(tracker_uuid)
|
|
||||||
# MAP TRACKER - ITEM_ID
|
|
||||||
Tracker.fix_tracker_item_link(tracker_uuid)
|
|
||||||
|
|
||||||
r_serv_db.incr('update:nb_elem_converted')
|
|
||||||
update_update_stats()
|
|
||||||
|
|
||||||
else:
|
|
||||||
r_serv_db.set('ail:current_background_script_stat', 100)
|
|
||||||
sys.exit(0)
|
|
|
@ -1,22 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_HOME'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from update.bin.old_ail_updater import AIL_Updater
|
|
||||||
|
|
||||||
class Updater(AIL_Updater):
|
|
||||||
"""default Updater."""
|
|
||||||
|
|
||||||
def __init__(self, version):
|
|
||||||
super(Updater, self).__init__(version)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
updater = Updater('v4.0')
|
|
||||||
updater.run_update()
|
|
|
@ -1,29 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
# SUBMODULES #
|
|
||||||
git submodule update
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Installing nose ..."$DEFAULT
|
|
||||||
pip3 install -U websockets
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,31 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import redis
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_HOME'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from update.bin.old_ail_updater import AIL_Updater
|
|
||||||
|
|
||||||
class Updater(AIL_Updater):
|
|
||||||
"""default Updater."""
|
|
||||||
|
|
||||||
def __init__(self, version):
|
|
||||||
super(Updater, self).__init__(version)
|
|
||||||
|
|
||||||
def update(self):
|
|
||||||
r_tracking = redis.StrictRedis(host='localhost',
|
|
||||||
port=6382,
|
|
||||||
db=2,
|
|
||||||
decode_responses=True)
|
|
||||||
# FLUSH OLD DB
|
|
||||||
r_tracking.flushdb()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
updater = Updater('v4.1')
|
|
||||||
updater.run_update()
|
|
|
@ -1,22 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_HOME'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from update.bin.old_ail_updater import AIL_Updater
|
|
||||||
|
|
||||||
class Updater(AIL_Updater):
|
|
||||||
"""default Updater."""
|
|
||||||
|
|
||||||
def __init__(self, version):
|
|
||||||
super(Updater, self).__init__(version)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
updater = Updater('v4.2.1')
|
|
||||||
updater.run_update()
|
|
|
@ -1,29 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
# SUBMODULES #
|
|
||||||
git submodule update
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating pusblogger ..."$DEFAULT
|
|
||||||
pip3 install -U pubsublogger
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -1,22 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_HOME'])
|
|
||||||
##################################
|
|
||||||
# Import Project packages
|
|
||||||
##################################
|
|
||||||
from update.bin.old_ail_updater import AIL_Updater
|
|
||||||
|
|
||||||
class Updater(AIL_Updater):
|
|
||||||
"""default Updater."""
|
|
||||||
|
|
||||||
def __init__(self, version):
|
|
||||||
super(Updater, self).__init__(version)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
updater = Updater('v4.2')
|
|
||||||
updater.run_update()
|
|
|
@ -1,33 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
|
||||||
export PATH=$AIL_REDIS:$PATH
|
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
|
||||||
export PATH=$AIL_FLASK:$PATH
|
|
||||||
|
|
||||||
GREEN="\\033[1;32m"
|
|
||||||
DEFAULT="\\033[0;39m"
|
|
||||||
|
|
||||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
|
||||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
|
||||||
wait
|
|
||||||
|
|
||||||
# SUBMODULES #
|
|
||||||
git submodule update
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Installing typo-squatting ..."$DEFAULT
|
|
||||||
pip3 install -U ail_typo_squatting
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e $GREEN"Updating d4-client ..."$DEFAULT
|
|
||||||
pip3 install -U d4-pyclient
|
|
||||||
|
|
||||||
exit 0
|
|
|
@ -8,7 +8,8 @@ sys.path.append(os.environ['AIL_HOME'])
|
||||||
##################################
|
##################################
|
||||||
# Import Project packages
|
# Import Project packages
|
||||||
##################################
|
##################################
|
||||||
from update.bin.old_ail_updater import AIL_Updater
|
from update.bin.ail_updater import AIL_Updater
|
||||||
|
from lib import ail_updates
|
||||||
|
|
||||||
class Updater(AIL_Updater):
|
class Updater(AIL_Updater):
|
||||||
"""default Updater."""
|
"""default Updater."""
|
||||||
|
@ -18,5 +19,6 @@ class Updater(AIL_Updater):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
updater = Updater('v3.0')
|
updater = Updater('v5.2')
|
||||||
updater.run_update()
|
updater.run_update()
|
||||||
|
ail_updates.add_background_update('v5.2')
|
|
@ -2,13 +2,11 @@
|
||||||
|
|
||||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
||||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
||||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
|
||||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
||||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
||||||
|
|
||||||
export PATH=$AIL_HOME:$PATH
|
export PATH=$AIL_HOME:$PATH
|
||||||
export PATH=$AIL_REDIS:$PATH
|
export PATH=$AIL_REDIS:$PATH
|
||||||
export PATH=$AIL_ARDB:$PATH
|
|
||||||
export PATH=$AIL_BIN:$PATH
|
export PATH=$AIL_BIN:$PATH
|
||||||
export PATH=$AIL_FLASK:$PATH
|
export PATH=$AIL_FLASK:$PATH
|
||||||
|
|
||||||
|
@ -22,4 +20,12 @@ wait
|
||||||
# SUBMODULES #
|
# SUBMODULES #
|
||||||
git submodule update
|
git submodule update
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
||||||
|
echo ""
|
||||||
|
python ${AIL_HOME}/update/v5.2/Update.py
|
||||||
|
wait
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
|
||||||
exit 0
|
exit 0
|
27
update/v5.2/compress_har.py
Executable file
27
update/v5.2/compress_har.py
Executable file
|
@ -0,0 +1,27 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import gzip
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from lib import ail_updates
|
||||||
|
from lib import crawlers
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
update = ail_updates.AILBackgroundUpdate('v5.2')
|
||||||
|
HAR_DIR = crawlers.HAR_DIR
|
||||||
|
hars_ids = crawlers.get_all_har_ids()
|
||||||
|
update.set_nb_to_update(len(hars_ids))
|
||||||
|
n = 0
|
||||||
|
for har_id in hars_ids:
|
||||||
|
crawlers._gzip_har(har_id)
|
||||||
|
update.inc_nb_updated()
|
||||||
|
if n % 100 == 0:
|
||||||
|
update.update_progress()
|
||||||
|
|
||||||
|
crawlers._gzip_all_hars()
|
|
@ -17,9 +17,6 @@ from flask_login import LoginManager, current_user, login_user, logout_user, log
|
||||||
import importlib
|
import importlib
|
||||||
from os.path import join
|
from os.path import join
|
||||||
|
|
||||||
# # TODO: put me in lib/Tag
|
|
||||||
from pytaxonomies import Taxonomies
|
|
||||||
|
|
||||||
sys.path.append('./modules/')
|
sys.path.append('./modules/')
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
@ -51,6 +48,9 @@ from blueprints.objects_decoded import objects_decoded
|
||||||
from blueprints.objects_subtypes import objects_subtypes
|
from blueprints.objects_subtypes import objects_subtypes
|
||||||
from blueprints.objects_title import objects_title
|
from blueprints.objects_title import objects_title
|
||||||
from blueprints.objects_cookie_name import objects_cookie_name
|
from blueprints.objects_cookie_name import objects_cookie_name
|
||||||
|
from blueprints.objects_etag import objects_etag
|
||||||
|
from blueprints.objects_hhhash import objects_hhhash
|
||||||
|
from blueprints.objects_chat import objects_chat
|
||||||
|
|
||||||
Flask_dir = os.environ['AIL_FLASK']
|
Flask_dir = os.environ['AIL_FLASK']
|
||||||
|
|
||||||
|
@ -106,6 +106,9 @@ app.register_blueprint(objects_decoded, url_prefix=baseUrl)
|
||||||
app.register_blueprint(objects_subtypes, url_prefix=baseUrl)
|
app.register_blueprint(objects_subtypes, url_prefix=baseUrl)
|
||||||
app.register_blueprint(objects_title, url_prefix=baseUrl)
|
app.register_blueprint(objects_title, url_prefix=baseUrl)
|
||||||
app.register_blueprint(objects_cookie_name, url_prefix=baseUrl)
|
app.register_blueprint(objects_cookie_name, url_prefix=baseUrl)
|
||||||
|
app.register_blueprint(objects_etag, url_prefix=baseUrl)
|
||||||
|
app.register_blueprint(objects_hhhash, url_prefix=baseUrl)
|
||||||
|
app.register_blueprint(objects_chat, url_prefix=baseUrl)
|
||||||
|
|
||||||
# ========= =========#
|
# ========= =========#
|
||||||
|
|
||||||
|
@ -250,16 +253,6 @@ default_taxonomies = ["infoleak", "gdpr", "fpf", "dark-web"]
|
||||||
for taxonomy in default_taxonomies:
|
for taxonomy in default_taxonomies:
|
||||||
Tag.enable_taxonomy_tags(taxonomy)
|
Tag.enable_taxonomy_tags(taxonomy)
|
||||||
|
|
||||||
# ========== INITIAL tags auto export ============
|
|
||||||
# taxonomies = Taxonomies()
|
|
||||||
#
|
|
||||||
# infoleak_tags = taxonomies.get('infoleak').machinetags()
|
|
||||||
# infoleak_automatic_tags = []
|
|
||||||
# for tag in taxonomies.get('infoleak').machinetags():
|
|
||||||
# if tag.split('=')[0][:] == 'infoleak:automatic-detection':
|
|
||||||
# r_serv_db.sadd('list_export_tags', tag)
|
|
||||||
#
|
|
||||||
# r_serv_db.sadd('list_export_tags', 'infoleak:submission="manual"')
|
|
||||||
# ============ MAIN ============
|
# ============ MAIN ============
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue