DB migration

This commit is contained in:
Terrtia 2022-08-19 16:53:31 +02:00
parent 2f8a5a333a
commit 9c1bfb7073
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
40 changed files with 2234 additions and 894 deletions

View file

@ -6,6 +6,7 @@
"""
import os
import sys
import time
import importlib.util
@ -15,13 +16,23 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from lib.ConfigLoader import ConfigLoader
from lib import Users
from lib.objects import Decodeds
from lib.objects import Domains
from lib.objects import Items
from lib.objects.CryptoCurrencies import CryptoCurrency
from lib.objects.Pgps import Pgp
from lib.objects.Screenshots import Screenshot, get_all_screenshots
from lib.objects.Usernames import Username
# # # # CONFIGS # # # #
config_loader = ConfigLoader()
r_kvrocks = config_loader.get_redis_conn("Kvrocks_DB")
r_kvrocks = config_loader.get_db_conn("Kvrocks_DB")
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_tracker = config_loader.get_redis_conn("ARDB_Tracker")
r_serv_tags = config_loader.get_redis_conn("ARDB_Tags")
r_crawler = config_loader.get_redis_conn("ARDB_Onion")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
# # - - CONFIGS - - # #
@ -32,6 +43,13 @@ spec.loader.exec_module(old_ail_2_ail)
old_ail_2_ail.r_serv_sync = r_serv_db
from packages import Tag
spec = importlib.util.find_spec('Tag')
old_Tag = importlib.util.module_from_spec(spec)
spec.loader.exec_module(old_Tag)
old_Tag.r_serv_tags = r_serv_tags
from lib import Tracker
spec = importlib.util.find_spec('Tracker')
old_Tracker = importlib.util.module_from_spec(spec)
@ -46,6 +64,12 @@ spec.loader.exec_module(old_Investigations)
old_Investigations.r_tracking = r_serv_tracker
from lib import crawlers
spec = importlib.util.find_spec('crawlers')
old_crawlers = importlib.util.module_from_spec(spec)
spec.loader.exec_module(old_crawlers)
old_crawlers.r_serv_onion = r_crawler
# # TODO: desable features - credentials - stats ? - sentiment analysis
@ -53,6 +77,13 @@ old_Investigations.r_tracking = r_serv_tracker
# /!\ ISSUE WITH FILE DUPLICATES => NEED TO BE REFACTORED
def get_item_date(item_id):
dirs = item_id.split('/')
return f'{dirs[-4]}{dirs[-3]}{dirs[-2]}'
################################################################
def core_migration():
print('CORE MIGRATION...')
@ -81,6 +112,12 @@ def core_migration():
r_kvrocks.hset('d4:passivedns', 'enabled', bool(d4_enabled))
r_kvrocks.hset('d4:passivedns', 'update_time', d4_update_time)
# Crawler Manager
manager_url = old_crawlers.get_splash_manager_url()
manager_api_key = old_crawlers.get_splash_api_key()
crawlers.save_splash_manager_url_api(manager_url, manager_api_key)
crawlers.reload_splash_and_proxies_list()
# ail:misp
# ail:thehive
# hive:auto-alerts
@ -91,9 +128,6 @@ def core_migration():
# # TODO: TO CHECK
# config:all_global_section +
# config:global:crawler +
# mess_not_saved_export
# # # # # # # # # # # # # # # #
@ -215,26 +249,361 @@ def item_submit_migration():
pass
# /!\ KEY COLISION
# # TODO: change db
# # TODO: change db -> olds modules + blueprints
# # TODO: HANDLE LOCAL TAGS
# # TODO: HANDLE LOCAL TAGS
# # TODO: HANDLE LOCAL TAGS
# # TODO: HANDLE LOCAL TAGS
# # TODO: HANDLE LOCAL TAGS
def tags_migration():
# HANDLE LOCAL TAGS
print(old_Tag.get_all_tags())
#
# /!\ OBJECTS TAGS ISSUE /!\
# -> only one layer
#
# issue with subtypes + between objects with same ID
#
#
#
#
pass
# # TODO: MIGRATE item_basic.add_map_obj_id_item_id ??????????????????????
###############################
# #
# ITEMS MIGRATION #
# #
###############################
def get_item_father(item_id):
return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'father')
def items_migration():
pass
print('ITEMS MIGRATION...')
# MIGRATE IMPORTED URLEXTRACT Father
for item_id in Items.get_items_by_source('urlextract'):
father_id = get_item_father(item_id)
if father_id:
item = Items.Item(item_id)
item.set_father(father_id)
# TODO: migrate cookies
# TODO: migrate auto crawlers
###############################
# #
# CRAWLERS MIGRATION #
# #
###############################
# Retun last crawled domains by type
# domain;epoch
def get_last_crawled_domains(domain_type):
return r_crawler.lrange(f'last_{domain_type}', 0 ,-1)
def crawler_migration():
print('CRAWLER MIGRATION...')
pass
# for domain_type in ['onion', 'regular']:
# for row in get_last_crawled_domains(domain_type):
# dom_row, epoch = row.rsplit(';', 1)
# domain, port = dom_row.rsplit(':', 1)
# print(domain, port, epoch)
# #crawlers.add_last_crawled_domain(domain_type, domain, port, epoch)
for cookiejar_uuid in old_crawlers.get_all_cookiejar():
meta = old_crawlers.get_cookiejar_metadata(cookiejar_uuid, level=True)
#print(meta)
#crawlers.create_cookiejar(meta['user_id'], level=meta['level'], description=meta['description'], cookiejar_uuid=cookiejar_uuid)
#_set_cookiejar_date(meta['date'])
for meta_cookie, cookie_uuid in old_crawlers.get_cookiejar_cookies_list(cookiejar_uuid, add_cookie_uuid=True):
print(cookie_uuid)
#crawlers.add_cookie_to_cookiejar(cookiejar_uuid, meta_cookie, cookie_uuid=cookie_uuid)
# TODO: auto crawler -> to Fix / change
# TODO: crawlers queues
###############################
# #
# DOMAINS MIGRATION #
# #
###############################
# # TODO: DOMAIN DOWN -> start onion_down:20190101
# Start -> 2019-01-01
# BY TYPE - FIRST DATE DOWN / UP
def get_item_link(item_id):
return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'real_link')
def get_item_father(item_id):
return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'father')
def get_item_children(item_id):
return r_serv_metadata.smembers(f'paste_children:{item_id}')
def get_domains_up_by_type(domain_type):
return r_crawler.smembers(f'full_{domain_type}_up')
def get_domain_first_seen(domain_type, domain):
return r_crawler.hget(f'{domain_type}_metadata:{domain}', 'first_seen')
def get_domain_last_check(domain_type, domain):
return r_crawler.hget(f'{domain_type}_metadata:{domain}', 'last_check')
def get_domain_last_origin(domain_type, domain):
return r_crawler.hget(f'{domain_type}_metadata:{domain}', 'paste_parent')
def get_domain_ports(domain_type, domain):
l_ports = r_crawler.hget(f'{domain_type}_metadata:{domain}', 'ports')
if l_ports:
return l_ports.split(";")
return []
def get_domain_languages(dom):
return r_crawler.smembers(f'domain:language:{dom}')
def is_crawled_item(domain, item_id):
domain_lenght = len(domain)
if len(item_id) > (domain_lenght+48):
if item_id[-36-domain_lenght:-36] == domain:
return True
return False
def get_crawled_items(domain, root_id):
crawled_items = get_crawled_items_children(domain, root_id)
crawled_items.append(root_id)
return crawled_items
def get_crawled_items_children(domain, root_id):
crawled_items = []
for item_id in get_item_children(root_id):
if is_crawled_item(domain, item_id):
crawled_items.append(item_id)
crawled_items.extend(get_crawled_items_children(domain, item_id))
return crawled_items
def get_domain_history_by_port(domain_type, domain, port):
history_tuple = r_crawler.zrange(f'crawler_history_{domain_type}:{domain}:{port}', 0, -1, withscores=True)
history = []
for root_id, epoch in history_tuple:
dict_history = {}
epoch = int(epoch) # force int
dict_history["epoch"] = epoch
try:
int(root_id)
dict_history['status'] = False
except ValueError:
dict_history['status'] = True
dict_history['root'] = root_id
history.append(dict_history)
return history
def domain_migration():
pass
print('Domains MIGRATION...')
# # TODO: refractor keys
def correlations_migration():
pass
for domain_type in ['onion', 'regular']:
for dom in get_domains_up_by_type(domain_type):
ports = get_domain_ports(domain_type, dom)
first_seen = get_domain_first_seen(domain_type, dom)
last_check = get_domain_last_check(domain_type, dom)
last_origin = get_domain_last_origin(domain_type, dom)
languages = get_domain_languages(dom)
domain = Domains.Domain(dom)
# domain.update_daterange(first_seen)
# domain.update_daterange(last_check)
# domain._set_ports(ports)
# if last_origin:
# domain.set_last_origin(last_origin)
for language in languages:
print(language)
# domain.add_language(language)
#print('------------------')
#print('------------------')
#print('------------------')
#print('------------------')
#print('------------------')
print(dom)
#print(first_seen)
#print(last_check)
#print(ports)
# # TODO: FIXME filter invalid hostname
# CREATE DOMAIN HISTORY
for port in ports:
for history in get_domain_history_by_port(domain_type, dom, port):
epoch = history['epoch']
# DOMAIN DOWN
if not history.get('status'): # domain DOWN
# domain.add_history(epoch, port)
print(f'DOWN {epoch}')
# DOMAIN UP
else:
root_id = history.get('root')
if root_id:
# domain.add_history(epoch, port, root_item=root_id)
#print(f'UP {root_id}')
crawled_items = get_crawled_items(dom, root_id)
for item_id in crawled_items:
url = get_item_link(item_id)
item_father = get_item_father(item_id)
if item_father and url:
#print(f'{url} {item_id}')
pass
# domain.add_crawled_item(url, port, item_id, item_father)
#print()
###############################
# #
# DECODEDS MIGRATION #
# #
###############################
def get_estimated_type(decoded_id):
return r_serv_metadata.hget(f'metadata_hash:{decoded_id}', 'estimated_type')
def get_decoded_items_list_by_decoder(decoder_type, decoded_id): ###################
#return r_serv_metadata.zrange('nb_seen_hash:{}'.format(sha1_string), 0, -1)
return r_serv_metadata.zrange(f'{decoder_type}_hash:{decoded_id}', 0, -1)
def decodeds_migration():
print('Decoded MIGRATION...')
decoder_names = ['base64', 'binary', 'hexadecimal']
Decodeds._delete_old_json_descriptor()
for decoded_id in Decodeds.get_all_decodeds():
mimetype = get_estimated_type(decoded_id)
# ignore invalid object
if mimetype is None:
continue
print()
print(decoded_id)
decoded = Decodeds.Decoded(decoded_id)
filepath = decoded.get_filepath(mimetype=mimetype)
decoded._save_meta(filepath, mimetype)
for decoder_type in decoder_names:
for item_id in get_decoded_items_list_by_decoder(decoder_type, decoded_id):
print(item_id, decoder_type)
date = get_item_date(item_id)
#for decoder_type in :
decoded.add(decoder_type, date, item_id, mimetype)
###############################
# #
# SCREENSHOTS MIGRATION #
# #
###############################
# old correlation
def get_screenshot_items_list(screenshot_id): ######################### # TODO: DELETE SOLO SCREENSHOTS
print(f'screenshot:{screenshot_id}')
return r_crawler.smembers(f'screenshot:{screenshot_id}')
# old correlation
def get_screenshot_domain(screenshot_id):
return r_crawler.smembers(f'screenshot_domain:{screenshot_id}')
# Tags + Correlations
# # TODO: save orphelin screenshot ?????
def screenshots_migration():
print('SCREENSHOTS MIGRATION...')
screenshots = get_all_screenshots()
#screenshots = ['5fcc292ea8a699aa7a9ce93a704b78b8f493620ccdb2a5cebacb1069a4327211']
for screenshot_id in screenshots:
print(screenshot_id)
screenshot = Screenshot(screenshot_id)
tags = old_Tag.get_obj_tag(screenshot_id) ################## # TODO:
if tags:
print(screenshot_id)
print(tags)
# Correlations
for item_id in get_screenshot_items_list(screenshot_id):
print(item_id)
screenshot.add_correlation('item', '', item_id)
for domain_id in get_screenshot_domain(screenshot_id):
print(domain_id)
screenshot.add_correlation('domain', '', domain_id)
###############################
# #
# SUBTYPES MIGRATION #
# #
###############################
def get_item_correlation_obj(obj_type, subtype, obj_id):
return r_serv_metadata.smembers(f'set_{obj_type}_{subtype}:{obj_id}')
def get_obj_subtype_first_seen(obj_type, subtype, obj_id):
return r_serv_metadata.hget(f'{obj_type}_metadata_{subtype}:{obj_id}', 'first_seen')
def get_obj_subtype_last_seen(obj_type, subtype, obj_id):
return r_serv_metadata.hget(f'{obj_type}_metadata_{subtype}:{obj_id}', 'last_seen')
def get_all_subtype_id(obj_type, subtype):
print(f'{obj_type}_all:{subtype}')
print(r_serv_metadata.zrange(f'{obj_type}_all:{subtype}', 0, -1))
return r_serv_metadata.zrange(f'{obj_type}_all:{subtype}', 0, -1)
def get_subtype_object(obj_type, subtype, obj_id):
if obj_type == 'cryptocurrency':
return CryptoCurrency(obj_id, subtype)
elif obj_type == 'pgpdump':
return Pgp(obj_id, subtype)
elif obj_type == 'username':
return Username(obj_id, subtype)
def migrate_subtype_obj(Obj, obj_type, subtype, obj_id):
first_seen = get_obj_subtype_first_seen(obj_type, subtype, obj_id)
last_seen = get_obj_subtype_last_seen(obj_type, subtype, obj_id)
# dates
for item_id in get_item_correlation_obj(obj_type, subtype, obj_id):
date = get_item_date(item_id)
Obj.add(date, item_id)
dict_obj_subtypes = {'cryptocurrency': ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash'],
'pgpdump': ['key', 'mail', 'name'],
'username': ['telegram', 'twitter', 'jabber']}
def subtypes_obj_migration():
print('SUBPTYPE MIGRATION...')
for obj_type in dict_obj_subtypes:
print(f'{obj_type} MIGRATION...')
for subtype in dict_obj_subtypes[obj_type]:
for obj_id in get_all_subtype_id(obj_type, subtype):
Obj = get_subtype_object(obj_type, subtype, obj_id)
migrate_subtype_obj(Obj, obj_type, subtype, obj_id)
# # # # # # # # # # # # # # # #
# STATISTICS
@ -246,10 +615,16 @@ def statistics_migration():
if __name__ == '__main__':
core_migration()
user_migration()
#core_migration()
#user_migration()
#items_migration()
#crawler_migration()
#domain_migration()
#decodeds_migration()
#screenshots_migration()
#subtypes_obj_migration()
#ail_2_ail_migration()
trackers_migration()
#trackers_migration()
#investigations_migration()

View file

@ -295,7 +295,7 @@ function shutting_down_redis_servers {
redis_dir=${AIL_HOME}/redis/src
for port in "${array[@]}";
do
bash -c "${redis_dir}/redis-cli -p ${port} SHUTDOWN"
bash -c "${redis_dir}/redis-cli -p ${port} -a ail SHUTDOWN"
sleep 0.1
done
}
@ -324,7 +324,7 @@ function checking_redis_servers {
for port in "${array[@]}";
do
sleep 0.2
bash -c "${redis_dir}/redis-cli -p ${port} PING | grep "PONG" &> /dev/null"
bash -c "${redis_dir}/redis-cli -p ${port} -a ail PING | grep "PONG" &> /dev/null"
if [ ! $? == 0 ]; then
echo -e "${RED}\t${port} ${db_name} not ready${DEFAULT}"
flag_db=1
@ -512,6 +512,20 @@ function killall {
fi
}
function _set_kvrocks_namespace() {
bash -c "${redis_dir}/redis-cli -p ${port} -a ail namespace add $1 $2"
}
function set_kvrocks_namespaces() {
if checking_kvrocks; then
_set_kvrocks_namespace "cor" "ail_correls"
_set_kvrocks_namespace "obj" "ail_objs"
_set_kvrocks_namespace "tag" "ail_tags"
else
echo -e $RED"\t* Error: Please launch Kvrocks server"$DEFAULT
fi
}
function update() {
bin_dir=${AIL_HOME}/bin
@ -672,6 +686,8 @@ while [ "$1" != "" ]; do
-lkv | --launchKVORCKSVerify ) launch_kvrocks;
wait_until_kvrocks_is_ready;
;;
--set_kvrocks_namespaces ) set_kvrocks_namespaces;
;;
-k | --killAll ) killall;
;;
-ks | --killscript ) killscript;

View file

@ -26,8 +26,8 @@ import Tag
config_loader = ConfigLoader.ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")
r_serv_db = config_loader.get_redis_conn("Kvrocks_DB")
r_serv_sync = config_loader.get_redis_conn("Kvrocks_DB")
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
r_serv_sync = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None
WEBSOCKETS_CLOSE_CODES = {

View file

@ -8,14 +8,15 @@ Receiver Jabber Json Items
"""
import os
import json
import sys
import time
import datetime
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import item_basic
import Username
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import item_basic
from lib.objects.Usernames import Username
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'import', 'ail_json_importer'))
from Default_json import Default_json
@ -45,6 +46,9 @@ class Ail_feeder_jabber(Default_json):
to = str(self.json_item['meta']['jabber:to'])
fr = str(self.json_item['meta']['jabber:from'])
item_date = item_basic.get_item_date(item_id)
Username.save_item_correlation('jabber', to, item_id, item_date)
Username.save_item_correlation('jabber', fr, item_id, item_date)
user_to = Username(to, 'jabber')
user_fr = Username(fr, 'jabber')
user_to.add(date, item_id)
user_fr.add(date, item_id)
return None

View file

@ -8,13 +8,15 @@ Recieve Json Items (example: Twitter feeder)
"""
import os
import json
import sys
import datetime
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import item_basic
import Username
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import item_basic
from lib.objects.Usernames import Username
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'import', 'ail_json_importer'))
from Default_json import Default_json
@ -46,14 +48,14 @@ class Ail_feeder_telegram(Default_json):
telegram_id = f'{channel_id}_{message_id}'
item_basic.add_map_obj_id_item_id(telegram_id, item_id, 'telegram_id')
#print(self.json_item['meta'])
username = None
user = None
if self.json_item['meta'].get('user'):
username = str(self.json_item['meta']['user'])
user = str(self.json_item['meta']['user'])
else:
if self.json_item['meta'].get('channel'):
username = str(self.json_item['meta']['channel']['username'])
if username:
#print(username)
user = str(self.json_item['meta']['channel']['username'])
if user:
item_date = item_basic.get_item_date(item_id)
Username.save_item_correlation('telegram', username, item_id, item_date)
username = Username(user, 'telegram')
username.add(date, item_id)
return None

View file

@ -8,13 +8,15 @@ Recieve Json Items (example: Twitter feeder)
"""
import os
import json
import sys
import datetime
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import item_basic
import Username
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import item_basic
from lib.objects.Usernames import Username
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'import', 'ail_json_importer'))
from Default_json import Default_json
@ -39,9 +41,12 @@ class Ail_feeder_twitter(Default_json):
'''
Process JSON meta filed.
'''
twitter_id = str(self.json_item['meta']['twitter:tweet_id'])
item_basic.add_map_obj_id_item_id(twitter_id, item_id, 'twitter_id')
username = str(self.json_item['meta']['twitter:id'])
item_date = item_basic.get_item_date(item_id)
Username.save_item_correlation('twitter', username, item_id, item_date)
tweet_id = str(self.json_item['meta']['twitter:tweet_id'])
item_basic.add_map_obj_id_item_id(tweet_id, item_id, 'twitter_id')
date = item_basic.get_item_date(item_id)
user = str(self.json_item['meta']['twitter:id'])
username = Username(user, 'twitter')
username.add(date, item_id)
return None

View file

@ -8,7 +8,6 @@ Recieve Json Items (example: Twitter feeder)
"""
import os
import json
import sys
import datetime
import uuid

View file

@ -41,6 +41,12 @@ class ConfigLoader(object):
db=self.cfg.getint(redis_name, "db"),
decode_responses=decode_responses )
def get_db_conn(self, db_name, decode_responses=True): ## TODO: verify redis name
return redis.StrictRedis( host=self.cfg.get(db_name, "host"),
port=self.cfg.getint(db_name, "port"),
password=self.cfg.get(db_name, "password"),
decode_responses=decode_responses )
def get_files_directory(self, key_name):
directory_path = self.cfg.get('Directories', key_name)
# full path

View file

@ -16,7 +16,7 @@ sys.path.append(os.environ['AIL_BIN'])
from lib.ConfigLoader import ConfigLoader
config_loader = ConfigLoader()
r_serv_db = config_loader.get_redis_conn("Kvrocks_DB")
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
MIN_ITEM_SIZE = float(config_loader.get_config_str('Modules_Duplicates', 'min_paste_size')) # # TODO: RENAME ME
config_loader = None

View file

@ -28,7 +28,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Tag
config_loader = ConfigLoader.ConfigLoader()
r_tracking = config_loader.get_redis_conn("Kvrocks_DB")
r_tracking = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None

View file

@ -146,7 +146,7 @@ def save_domain_relationship(obj_id, domain):
def delete_domain_relationship(obj_id, domain):
r_serv_onion.srem('domain_screenshot:{}'.format(domain), obj_id)
r_serv_onion.sadd('screenshot_domain:{}'.format(obj_id), domain)
r_serv_onion.srem('screenshot_domain:{}'.format(obj_id), domain)
def save_obj_relationship(obj_id, obj2_type, obj2_id):
if obj2_type == 'domain':

View file

@ -25,8 +25,8 @@ import item_basic
config_loader = ConfigLoader.ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")
r_serv_db = config_loader.get_redis_conn("Kvrocks_DB")
r_serv_tracker = config_loader.get_redis_conn("Kvrocks_DB")
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
r_serv_tracker = config_loader.get_db_conn("Kvrocks_DB")
items_dir = config_loader.get_config_str("Directories", "pastes")
if items_dir[-1] == '/':

View file

@ -18,7 +18,7 @@ from lib.ConfigLoader import ConfigLoader
# Config
config_loader = ConfigLoader()
#r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_db = config_loader.get_redis_conn("Kvrocks_DB")
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None
regex_password = r'^(?=(.*\d){2})(?=.*[a-z])(?=.*[A-Z]).{10,100}$'

723
bin/lib/correlations_engine.py Executable file
View file

@ -0,0 +1,723 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import redis
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
config_loader = ConfigLoader()
r_metadata = config_loader.get_db_conn("Kvrocks_Correlations")
config_loader = None
##################################
# CORRELATION MIGRATION
##################################
#
# MIGRATE TO KVROCKS + Rename correlation Keys
# => Add support for correlations between subtypes
# => Common correlation engine for each objects
#
# Objects Iterations: -screenshot
# -decoded
# -subtypes
# -domains
#
# /!\ Handle reinsertion /!\
#
#
# CORRELATION DB ????? => purge if needed
#
#
#
#
#
##################################
# CORRELATION MIGRATION
##################################
CORRELATION_TYPES_BY_OBJ = {
"cryptocurrency" : ["domain", "item"],
"decoded" : ["domain", "item"],
"domain": ["cryptocurrency", "decoded", "item", "pgp", "username", "screenshot"],
"item": ["cryptocurrency", "decoded", "domain", "pgp", "username", "screenshot"],
"pgp" : ["domain", "item"],
"username" : ["domain", "item"],
"screenshot" : ["domain", "item"],
}
def get_obj_correl_types(obj_type):
return CORRELATION_TYPES_BY_OBJ.get(obj_type)
def sanityze_obj_correl_types(obj_type, correl_types):
obj_correl_types = get_obj_correl_types(obj_type)
if correl_types:
correl_types = set(correl_types).intersection(obj_correl_types)
if not correl_types:
correl_types = obj_correl_types
return correl_types
def get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type):
return r_metadata.scard(f'correlation:obj:{obj_type}:{subtype}:{correl_type}:{obj_id}')
def get_nb_correlations(obj_type, subtype, obj_id, filter_types=[]):
if subtype is None:
subtype = ''
nb_correlations = 0
filter_types = sanityze_obj_correl_types(obj_type, filter_types)
for correl_type in filter_types:
obj_correlations += get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type)
return obj_correlations
def get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type):
return r_metadata.smembers(f'correlation:obj:{obj_type}:{subtype}:{correl_type}:{obj_id}')
def get_correlations(obj_type, subtype, obj_id, filter_types=[]):
if subtype is None:
subtype = ''
obj_correlations = {}
filter_types = sanityze_obj_correl_types(obj_type, filter_types)
for correl_type in filter_types:
obj_correlations[correl_type] = get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type)
return obj_correlations
def exists_obj_correlation(obj_type, subtype, obj_id, obj2_type):
if subtype is None:
subtype = ''
return r_metadata.exists(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}')
def is_obj_correlated(obj_type, subtype, obj_id, obj2_type, subtype2, obj2_id):
if subtype is None:
subtype = ''
if subtype2 is None:
subtype2 = ''
return r_metadata.sismember(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}', '{subtype2}:{obj2_id}')
def add_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id):
print(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id)
if subtype1 is None:
subtype1 = ''
if subtype2 is None:
subtype2 = ''
r_metadata.sadd(f'correlation:obj:{obj1_type}:{subtype1}:{obj2_type}:{obj1_id}', f'{subtype2}:{obj2_id}')
r_metadata.sadd(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype1}:{obj1_id}')
def delete_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id):
if subtype1 is None:
subtype1 = ''
if subtype2 is None:
subtype2 = ''
r_metadata.srem(f'correlation:obj:{obj1_type}:{subtype}:{obj2_type}:{obj_id}', f'{subtype2}:{obj2_id}')
r_metadata.srem(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype}:{obj_id}')
# # TODO: CORRELATION GRAPH
def get_obj_str_id(obj_type, subtype, obj_id): ################ REPLACE BY : ?????????????????????????
if subtype is None:
subtype = ''
return f'{obj_type};{subtype};{obj_id}'
def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, flask_context=False):
links = set()
nodes = set()
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
_get_correlations_graph_node(links, nodes, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], previous_str_obj='')
return obj_str_id, nodes, links
def _get_correlations_graph_node(links, nodes, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], previous_str_obj=''):
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
nodes.add(obj_str_id)
obj_correlations = get_correlations(obj_type, subtype, obj_id, filter_types=[])
print(obj_correlations)
for correl_type in obj_correlations:
for str_obj in obj_correlations[correl_type]:
subtype2, obj2_id = str_obj.split(':', 1)
obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id)
if obj2_str_id == previous_str_obj:
continue
if len(nodes) > max_nodes:
break
nodes.add(obj2_str_id)
links.add((obj_str_id, obj2_str_id))
if level > 0:
next_level = level - 1
_get_correlations_graph_node(links, nodes, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, previous_str_obj=obj_str_id)
##########################################################
##########################################################
##########################################################
##########################################################
##########################################################
##########################################################
# get_correlations_fcts = {
# "cryptocurrency" : ["domain", "item"],
# "decoded" : ["domain", "item"],
# "domain": ["cryptocurrency", "decoded", "item", "pgp", "username", "screenshot"],
# "item": ["cryptocurrency", "decoded", "domain", "pgp", "username", "screenshot"],
# "pgp" : ["domain", "item"],
# "username" : ["domain", "item"],
# "screenshot" :{
# "domain": get_correl_screenshot_domain,
# "item": get_correl_screenshot_item,
# },
# }
# }
#
# def build_lsets_obj_types(obj1_type, obj_types):
# return [set(obj1_type, x) for x in subtypes_obj]
#
# ##########################
# subtypes_obj = ['cryptocurrency', 'pgp', 'username']
# lsets_subtypes_obj_domain = build_lsets_obj_types('domain', subtypes_obj)
# lsets_subtypes_obj_item = build_lsets_obj_types('item', subtypes_obj)
# ##########################
# TODO HANDLE CRAWLED ITEMS
def add_correlation(obj1_type, obj1_subtype, obj1_id, obj2_type, obj2_subtype, obj2_id):
set_type = set(ob1_type, ob2_type)
# domain - subtypes objs
if set_type in lsets_subtypes_obj_domain:
if ob1_type == 'domain':
domain = obj1_id
obj_type = obj2_type
obj_subtype = obj2_subtype
obj_id = obj2_id
else:
domain = obj2_id
obj_type = obj1_type
obj_subtype = obj1_subtype
obj_id = obj1_id
r_metadata.sadd(f'domain_{obj_type}_{obj_subtype}:{domain}', obj_id)
r_metadata.sadd(f'set_domain_{obj_type}_{obj_subtype}:{obj_id}', domain)
# TODO HANDLE CRAWLED ITEMS
# item - subtypes objs
elif set_type in lsets_subtypes_obj_item:
if ob1_type == 'item':
item_id = obj1_id
obj_type = obj2_type
obj_subtype = obj2_subtype
obj_id = obj2_id
else:
item_id = obj2_id
obj_type = obj1_type
obj_subtype = obj1_subtype
obj_id = obj1_id
r_metadata.sadd(f'set_{obj_type}_{obj_subtype}:{obj_id}', item_id)
r_metadata.sadd(f'item_{obj_type}_{obj_subtype}:{item_id}', obj_id)
# domain - decoded
elif set_type == set('domain', 'decoded'):
if ob1_type == 'decoded':
decoded_id = ob1_id
domain = obj2_id
else:
decoded_id = obj2_id
domain = ob1_id
r_metadata.sadd(f'hash_domain:{domain}', decoded_id) # domain - hash map
r_metadata.sadd(f'domain_hash:{decoded_id}', domain) # hash - domain map
# item - decoded
elif set_type == set('item', 'decoded'):
if ob1_type == 'decoded':
decoded_id = ob1_id
item_id = obj2_id
else:
decoded_id = obj2_id
item_id = ob1_id
############################################################
# domain - screenshot
elif set_type == set('domain', 'screenshot'):
if ob1_type == 'screenshot':
screenshot_id = ob1_id
domain = obj2_id
else:
screenshot_id = obj2_id
domain = ob1_id
r_crawler.sadd(f'domain_screenshot:{domain}', screenshot_id)
r_crawler.sadd(f'screenshot_domain:{screenshot_id}', domain)
# item - screenshot
elif set_type == set('item', 'screenshot'):
if ob1_type == 'screenshot':
screenshot_id = ob1_id
item_id = obj2_id
else:
screenshot_id = obj2_id
item_id = ob1_id
r_metadata.hset(f'paste_metadata:{item_id}', 'screenshot', screenshot_id)
r_crawler.sadd(f'screenshot:{screenshot_id}', item_id)
# domain - item
elif set_type == set('domain', 'item'):
if ob1_type == 'item':
item_id = ob1_id
domain = obj2_id
else:
item_id = obj2_id
domain = ob1_id
############################################################
# TODO ADD COMPLETE DELETE
# TODO: Handle items crawled
def delete_correlation(obj1_type, obj1_subtype, obj1_id, obj2_type, obj2_subtype, obj2_id):
set_type = set(ob1_type, ob2_type)
# domain - subtypes objs
if set_type in lsets_subtypes_obj_domain:
if ob1_type == 'domain':
domain = obj1_id
obj_type = obj2_type
obj_subtype = obj2_subtype
obj_id = obj2_id
else:
domain = obj2_id
obj_type = obj1_type
obj_subtype = obj1_subtype
obj_id = obj1_id
r_metadata.srem(f'domain_{obj_type}_{obj_subtype}:{domain}', obj_id)
r_metadata.srem(f'set_domain_{obj_type}_{obj_subtype}:{obj_id}', domain)
# TODO ADD COMPLETE DELETE
# item - subtypes objs
elif set_type in lsets_subtypes_obj_item:
if ob1_type == 'item':
item_id = obj1_id
obj_type = obj2_type
obj_subtype = obj2_subtype
obj_id = obj2_id
else:
item_id = obj2_id
obj_type = obj1_type
obj_subtype = obj1_subtype
obj_id = obj1_id
# TODO ADD COMPLETE DELETE
r_metadata.srem(f'set_{obj_type}_{subtype}:{obj_id}', item_id)
r_metadata.srem(f'item_{obj_type}_{subtype}:{item_id}', obj_id)
# TODO ADD COMPLETE DELETE
# domain - decoded
elif set_type == set('domain', 'decoded'):
if ob1_type == 'decoded':
decoded_id = ob1_id
domain = obj2_id
else:
decoded_id = obj2_id
domain = ob1_id
r_metadata.srem(f'hash_domain:{domain}', decoded_id)
r_metadata.srem(f'domain_hash:{decoded_id}', domain)
# item - decoded
elif set_type == set('item', 'decoded'):
if ob1_type == 'decoded':
decoded_id = ob1_id
item_id = obj2_id
else:
decoded_id = obj2_id
item_id = ob1_id
####################################################################
# domain - screenshot
elif set_type == set('domain', 'screenshot'):
if ob1_type == 'screenshot':
screenshot_id = ob1_id
domain = obj2_id
else:
screenshot_id = obj2_id
domain = ob1_id
r_crawler.srem(f'domain_screenshot:{domain}', screenshot_id)
r_crawler.srem(f'screenshot_domain:{screenshot_id}', domain)
# item - screenshot
elif set_type == set('item', 'screenshot'):
if ob1_type == 'screenshot':
screenshot_id = ob1_id
item_id = obj2_id
else:
screenshot_id = obj2_id
item_id = ob1_id
r_metadata.hdel(f'paste_metadata:{item_id}', 'screenshot', screenshot_id)
r_crawler.srem(f'screenshot:{screenshot_id}', item_id)
# domain - item
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
## Subtypes - Cryptocurrency Pgp Username ##
def get_correl_subtypes_obj_domain(obj_type, obj_subtype, obj_id):
r_serv_metadata.smembers(f'set_domain_{obj_type}_{obj_subtype}:{obj_id}')
def get_correl_subtypes_obj_item():
pass
def delete_subtype_domain_correlation(domain, obj_type, subtype, obj_id):
r_metadata.srem(f'domain_{obj_type}_{subtype}:{domain}', obj_id)
r_metadata.srem(f'set_domain_{obj_type}_{subtype}:{obj_id}', domain)
# TODO ADD COMPLETE DELETE
def delete_subtype_item_correlation(obj_type, subtype, obj_id, item_id, item_date):
#self.update_correlation_daterange(subtype, obj_id, item_date) update daterange ! # # TODO:
r_metadata.srem(f'set_{obj_type}_{subtype}:{obj_id}', item_id)
r_metadata.srem(f'item_{obj_type}_{subtype}:{item_id}', obj_id)
# # TODO: FIXME HANDLE SUB Objects Metadata # WARNING:
# res = r_serv_metadata.hincrby('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id, -1)
# if int(res) < 0: # remove last
# r_serv_metadata.hdel('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id)
#
# res = r_serv_metadata.zscore('{}_all:{}'.format(self.correlation_name, subtype), obj_id)
# if int(res) > 0:
# r_serv_metadata.zincrby('{}_all:{}'.format(self.correlation_name, subtype), obj_id, -1)
## Screenshot ##
##-- Screenshot - Domain --##
def add_correl_screenshot_domain(screenshot_id, domain):
r_crawler.sadd(f'domain_screenshot:{domain}', screenshot_id)
r_crawler.sadd(f'screenshot_domain:{screenshot_id}', domain)
def get_correl_screenshot_domain(screenshot_id):
return r_crawler.smembers(f'screenshot_domain:{screenshot_id}')
# def delete_correl_screenshot_domain(screenshot_id, domain):
# r_crawler.srem(f'domain_screenshot:{domain}', screenshot_id)
# r_crawler.srem(f'screenshot_domain:{screenshot_id}', domain)
##-- Screenshot - Item --##
def add_correl_screenshot_item(screenshot_id, item_id):
r_metadata.hset(f'paste_metadata:{item_id}', 'screenshot', screenshot_id)
r_crawler.sadd(f'screenshot:{screenshot_id}', item_id)
def get_correl_screenshot_item(screenshot_id):
r_crawler.smembers(f'screenshot:{screenshot_id}')
# def delete_correl_screenshot_item(screenshot_id, item_id):
# r_metadata.hdel(f'paste_metadata:{item_id}', 'screenshot', screenshot_id)
# r_crawler.srem(f'screenshot:{screenshot_id}', item_id)
## -- ##
def get_correl_item_screenshot(item_id):
res = r_metadata.hget(f'paste_metadata:{item_id}', 'screenshot')
if res:
return set(res)
else:
return set()
## Domain ##
def get_correl_domain_subtypes_obj(domain_id, obj_type, obj_subtype):
return r_serv_metadata.smembers(f'domain_{obj_type}_{obj_subtype}:{domain_id}')
## -- ##
## Item ##
def get_correl_item_subtypes_obj():
pass
## -- ## war game stinger - stranger thing
def _get_object_correlations(obj_type, obj_subtype, obj_id, filter_types=[]): # # TODO: , filter_subtypes=[]
obj_relationships = get_obj_relationships(obj_type)
correlations = []
for correlation_fct in obj_relationship_fcts[obj_type]:
correlations
def get_object_correlations(filter_types, filter_subtypes, lvl=0):
pass
####################################################################
####################################################################
####################################################################
####################################################################
####################################################################
####################################################################
def get_object_correlation(object_type, value, correlation_names=None, correlation_objects=None, requested_correl_type=None):
if object_type == 'domain':
return Domain.get_domain_all_correlation(value, correlation_names=correlation_names)
elif object_type == 'paste' or object_type == 'item':
return Item.get_item_all_correlation(value, correlation_names=correlation_names)
elif object_type == 'decoded':
return Decoded.get_decoded_correlated_object(value, correlation_objects=correlation_objects)
elif object_type == 'pgp':
return Pgp.pgp.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects)
elif object_type == 'cryptocurrency':
return Cryptocurrency.cryptocurrency.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects)
elif object_type == 'username':
return Username.correlation.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects)
elif object_type == 'screenshot' or object_type == 'image':
return Screenshot.get_screenshot_correlated_object(value, correlation_objects=correlation_objects)
return {}
def get_obj_tag_table_keys(object_type):
'''
Warning: use only in flask (dynamic templates)
'''
if object_type=="domain":
return ['id', 'first_seen', 'last_check', 'status'] # # TODO: add root screenshot
def create_obj_relationship(obj1_type, obj1_id, obj2_type, obj2_id, obj1_subtype=None, obj2_subtype=None):
if obj1_type == 'domain':
pass
elif obj1_type == 'item':
pass # son/father + duplicate + domain
elif obj1_type == 'pgp':
Pgp.pgp.save_obj_relationship(obj1_subtype, obj1_id, obj2_type, obj2_id)
elif obj1_type == 'cryptocurrency':
Cryptocurrency.cryptocurrency.save_obj_relationship(obj1_subtype, obj1_type, obj2_type, obj2_id)
elif obj1_type == 'decoded':
Decoded.save_obj_relationship(obj1_id, obj2_type, obj2_id)
elif obj1_type == 'image':
Screenshot.save_obj_relationship(obj1_id, obj2_type, obj2_id)
def delete_obj_relationship(obj1_type, obj1_id, obj2_type, obj2_id, obj1_subtype=None, obj2_subtype=None):
if obj1_type == 'domain':
pass
elif obj1_type == 'item':
pass # son/father + duplicate + domain
elif obj1_type == 'pgp':
Pgp.pgp.delete_obj_relationship(obj1_subtype, obj1_id, obj2_type, obj2_id)
elif obj1_type == 'cryptocurrency':
Cryptocurrency.cryptocurrency.delete_obj_relationship(obj1_subtype, obj1_type, obj2_type, obj2_id)
elif obj1_type == 'decoded':
Decoded.delete_obj_relationship(obj1_id, obj2_type, obj2_id)
elif obj1_type == 'image':
Screenshot.delete_obj_relationship(obj1_id, obj2_type, obj2_id)
def create_graph_links(links_set):
graph_links_list = []
for link in links_set:
graph_links_list.append({"source": link[0], "target": link[1]})
return graph_links_list
def create_graph_nodes(nodes_set, root_node_id, flask_context=True):
graph_nodes_list = []
for node_id in nodes_set:
correlation_name, correlation_type, value = node_id.split(';', 3)
dict_node = {"id": node_id}
dict_node['style'] = get_correlation_node_icon(correlation_name, correlation_type, value)
dict_node['text'] = value
if node_id == root_node_id:
dict_node["style"]["node_color"] = 'orange'
dict_node["style"]["node_radius"] = 7
dict_node['url'] = get_item_url(correlation_name, value, correlation_type, flask_context=flask_context)
graph_nodes_list.append(dict_node)
return graph_nodes_list
def create_node_id(correlation_name, value, correlation_type=''):
if correlation_type is None:
correlation_type = ''
return '{};{};{}'.format(correlation_name, correlation_type, value)
# # TODO: filter by correlation type => bitcoin, mail, ...
def get_graph_node_object_correlation(object_type, root_value, mode, correlation_names, correlation_objects, max_nodes=300, requested_correl_type=None, flask_context=True):
links = set()
nodes = set()
root_node_id = create_node_id(object_type, root_value, requested_correl_type)
nodes.add(root_node_id)
root_correlation = get_object_correlation(object_type, root_value, correlation_names, correlation_objects, requested_correl_type=requested_correl_type)
for correl in root_correlation:
if correl in ('pgp', 'cryptocurrency', 'username'):
for correl_type in root_correlation[correl]:
for correl_val in root_correlation[correl][correl_type]:
# add correlation
correl_node_id = create_node_id(correl, correl_val, correl_type)
if mode=="union":
if len(nodes) > max_nodes:
break
nodes.add(correl_node_id)
links.add((root_node_id, correl_node_id))
# get second correlation
res = get_object_correlation(correl, correl_val, correlation_names, correlation_objects, requested_correl_type=correl_type)
if res:
for corr_obj in res:
for correl_key_val in res[corr_obj]:
#filter root value
if correl_key_val == root_value:
continue
if len(nodes) > max_nodes:
break
new_corel_1 = create_node_id(corr_obj, correl_key_val)
new_corel_2 = create_node_id(correl, correl_val, correl_type)
nodes.add(new_corel_1)
nodes.add(new_corel_2)
links.add((new_corel_1, new_corel_2))
if mode=="inter":
nodes.add(correl_node_id)
links.add((root_node_id, correl_node_id))
if correl in ('decoded', 'screenshot', 'domain', 'paste'):
for correl_val in root_correlation[correl]:
correl_node_id = create_node_id(correl, correl_val)
if mode=="union":
if len(nodes) > max_nodes:
break
nodes.add(correl_node_id)
links.add((root_node_id, correl_node_id))
res = get_object_correlation(correl, correl_val, correlation_names, correlation_objects)
if res:
for corr_obj in res:
if corr_obj in ('decoded', 'domain', 'paste', 'screenshot'):
for correl_key_val in res[corr_obj]:
#filter root value
if correl_key_val == root_value:
continue
if len(nodes) > max_nodes:
break
new_corel_1 = create_node_id(corr_obj, correl_key_val)
new_corel_2 = create_node_id(correl, correl_val)
nodes.add(new_corel_1)
nodes.add(new_corel_2)
links.add((new_corel_1, new_corel_2))
if mode=="inter":
nodes.add(correl_node_id)
links.add((root_node_id, correl_node_id))
if corr_obj in ('pgp', 'cryptocurrency', 'username'):
for correl_key_type in res[corr_obj]:
for correl_key_val in res[corr_obj][correl_key_type]:
#filter root value
if correl_key_val == root_value:
continue
if len(nodes) > max_nodes:
break
new_corel_1 = create_node_id(corr_obj, correl_key_val, correl_key_type)
new_corel_2 = create_node_id(correl, correl_val)
nodes.add(new_corel_1)
nodes.add(new_corel_2)
links.add((new_corel_1, new_corel_2))
if mode=="inter":
nodes.add(correl_node_id)
links.add((root_node_id, correl_node_id))
return {"nodes": create_graph_nodes(nodes, root_node_id, flask_context=flask_context), "links": create_graph_links(links)}
#######################################################################################3

View file

@ -168,7 +168,7 @@ def load_crawler_cookies(cookiejar_uuid, domain, crawler_type='regular'):
################################################################################
def get_all_cookiejar():
r_serv_onion.smembers('cookiejar:all')
return r_serv_onion.smembers('cookiejar:all')
def get_global_cookiejar():
res = r_serv_onion.smembers('cookiejar:global')
@ -185,19 +185,24 @@ def get_user_cookiejar(user_id):
def exist_cookiejar(cookiejar_uuid):
return r_serv_onion.exists('cookiejar_metadata:{}'.format(cookiejar_uuid))
def create_cookiejar(user_id, level=1, description=None):
cookiejar_uuid = str(uuid.uuid4())
def _set_cookiejar_date(date):
r_serv_onion.hset(f'cookiejar_metadata:{cookiejar_uuid}', 'date', date)
# # TODO: sanitize cookie_uuid
def create_cookiejar(user_id, level=1, description=None, cookiejar_uuid=None):
if not cookiejar_uuid:
cookiejar_uuid = str(uuid.uuid4())
r_serv_onion.sadd('cookiejar:all', cookiejar_uuid)
if level==0:
r_serv_onion.sadd('cookiejar:user:{}'.format(user_id), cookiejar_uuid)
r_serv_onion.sadd(f'cookiejar:user:{user_id}', cookiejar_uuid)
else:
r_serv_onion.sadd('cookiejar:global', cookiejar_uuid)
# metadata
r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'user_id', user_id)
r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'level', level)
r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'description', description)
r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'date', datetime.now().strftime("%Y%m%d"))
r_serv_onion.hset(f'cookiejar_metadata:{cookiejar_uuid}', 'user_id', user_id)
r_serv_onion.hset(f'cookiejar_metadata:{cookiejar_uuid}', 'level', level)
r_serv_onion.hset(f'cookiejar_metadata:{cookiejar_uuid}', 'description', description)
_set_cookiejar_date(datetime.now().strftime("%Y%m%d"))
# if json_cookies:
# json_cookies = json.loads(json_cookies) # # TODO: catch Exception
@ -259,7 +264,7 @@ def get_cookiejar_metadata(cookiejar_uuid, level=False):
dict_cookiejar['date'] = get_cookiejar_date(cookiejar_uuid)
dict_cookiejar['user_id'] = get_cookiejar_owner(cookiejar_uuid)
if level:
dict_cookiejar['level'] = get_cookies_level(cookiejar_uuid)
dict_cookiejar['level'] = get_cookiejar_level(cookiejar_uuid)
return dict_cookiejar
def get_cookiejar_metadata_by_iterator(iter_cookiejar_uuid):
@ -311,10 +316,12 @@ def get_cookie_dict(cookie_uuid):
return cookie_dict
# name, value, path=None, httpOnly=None, secure=None, domain=None, text=None
def add_cookie_to_cookiejar(cookiejar_uuid, cookie_dict):
cookie_uuid = generate_uuid()
r_serv_onion.sadd('cookiejar:{}:cookies:uuid'.format(cookiejar_uuid), cookie_uuid)
r_serv_onion.sadd('cookies:map:cookiejar:{}'.format(cookie_uuid), cookiejar_uuid)
def add_cookie_to_cookiejar(cookiejar_uuid, cookie_dict, cookie_uuid=None):
# # TODO: sanitize cookie_uuid
if not cookie_uuid:
cookie_uuid = generate_uuid()
r_serv_onion.sadd(f'cookiejar:{cookiejar_uuid}:cookies:uuid', cookie_uuid)
r_serv_onion.sadd(f'cookies:map:cookiejar:{cookie_uuid}', cookiejar_uuid)
set_cookie_value(cookie_uuid, 'name', cookie_dict['name'])
set_cookie_value(cookie_uuid, 'value', cookie_dict['value'])
@ -631,7 +638,6 @@ def add_auto_crawler_in_queue(domain, domain_type, port, epoch, delta, message):
def update_auto_crawler_queue():
current_epoch = int(time.time())
current_epoch = 1631096842
# check if current_epoch > domain_next_epoch
l_queue = r_serv_onion.zrangebyscore('crawler_auto_queue', 0, current_epoch)
for elem in l_queue:
@ -715,6 +721,73 @@ def send_url_to_crawl_in_queue(crawler_mode, crawler_type, url):
#### ####
#### CRAWLER TASK API ####
def api_add_crawler_task(json_dict):
user_id = None ###############################################
user_agent = data.get('user_agent', None)
url = json_dict.get('url', '')
if not is_valid_uuid_v4(investigation_uuid):
return {"status": "error", "reason": f"Invalid Investigation uuid: {investigation_uuid}"}, 400
screenshot = json_dict.get('screenshot', True) ####
screenshot = screenshot == True
har = json_dict.get('screenshot', True) ####
har = har == True
depth_limit = data.get('depth_limit', 1)
try:
depth_limit = int(depth_limit)
if depth_limit < 0:
depth_limit = 0
except ValueError:
return ({'error':'invalid depth limit'}, 400)
max_pages = data.get('max_pages', 100)
if max_pages:
try:
max_pages = int(max_pages)
if max_pages < 1:
max_pages = 1
except ValueError:
return ({'error':'invalid max_pages limit'}, 400)
auto_crawler = data.get('auto_crawler', False)
auto_crawler = auto_crawler == True
crawler_delta = data.get('crawler_delta', 3600)
if auto_crawler:
try:
crawler_delta = int(crawler_delta)
if crawler_delta < 0:
return ({'error':'invalid delta between two pass of the crawler'}, 400)
except ValueError:
return ({'error':'invalid delta between two pass of the crawler'}, 400)
crawler_type = data.get('crawler_type', None)
cookiejar_uuid = data.get('cookiejar_uuid', None)
if cookiejar_uuid:
if not exist_cookiejar(cookiejar_uuid):
return ({'error': 'unknow cookiejar uuid', 'cookiejar_uuid': cookiejar_uuid}, 404)
level = get_cookiejar_level(cookiejar_uuid)
if level == 0: # # TODO: check if user is admin ######################################################
cookie_owner = get_cookiejar_owner(cookiejar_uuid)
if cookie_owner != user_id:
return ({'error': 'The access to this cookiejar is restricted'}, 403)
create_crawler_task(url, screenshot=screenshot, har=har, depth_limit=depth_limit,
max_pages=max_pages, crawler_type=crawler_type,
auto_crawler=auto_crawler, crawler_delta=crawler_delta,
cookiejar_uuid=cookiejar_uuid, user_agent=user_agent)
# # TODO: # FIXME: REPLACE ME
def api_create_crawler_task(user_id, url, screenshot=True, har=True, depth_limit=1, max_pages=100, auto_crawler=False, crawler_delta=3600, crawler_type=None, cookiejar_uuid=None, user_agent=None):
# validate url
if url is None or url=='' or url=='\n':
@ -802,10 +875,16 @@ def create_domain_metadata(domain_type, domain, current_port, date, date_month):
all_domain_ports.append(current_port)
r_serv_onion.hset('{}_metadata:{}'.format(domain_type, domain), 'ports', ';'.join(all_domain_ports))
def add_last_crawled_domain(domain_type, domain, port, epoch):
# update list, last crawled domains
redis_crawler.lpush(f'last_{domain_type}', f'{domain}:{port};{epoch}')
redis_crawler.ltrim(f'last_{domain_type}', 0, 15)
# add root_item to history
# if down -> root_item = epoch_date
def add_domain_root_item(root_item, domain_type, domain, epoch_date, port):
# Create/Update crawler history
r_serv_onion.zadd('crawler_history_{}:{}:{}'.format(domain_type, domain, port), epoch_date, root_item)
r_serv_onion.zadd(f'crawler_history_{domain_type}:{domain}:{port}', epoch_date, root_item)
def create_item_metadata(item_id, domain, url, port, item_father):
r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'father', item_father)
@ -1498,5 +1577,5 @@ if __name__ == '__main__':
#print(get_all_queues_stats())
#res = get_auto_crawler_all_domain()
res = update_auto_crawler_queue()
res = get_all_cookiejar()
print(res)

View file

@ -11,7 +11,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import ConfigLoader
config_loader = ConfigLoader.ConfigLoader()
r_serv_db = config_loader.get_redis_conn("Kvrocks_DB")
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
r_cache = config_loader.get_redis_conn("Redis_Cache")
config_loader = None

View file

@ -185,6 +185,7 @@ def get_obj_id_item_id(parent_type, parent_id):
else:
return None
# # TODO: # FIXME: TO MIGRATE ??????
def add_map_obj_id_item_id(obj_id, item_id, obj_type):
if obj_type == 'twitter_id':
r_serv_metadata.hset('map:twitter_id:item_id', obj_id, item_id)

View file

@ -9,13 +9,13 @@ from flask import url_for
sys.path.append(os.environ['AIL_BIN'])
from lib.ConfigLoader import ConfigLoader
from lib.objects import abstract_object
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
config_loader = ConfigLoader()
config_loader = None
class CryptoCurrency(abstract_object.AbstractObject):
class CryptoCurrency(AbstractSubtypeObject):
"""
AIL CryptoCurrency Object. (strings)
"""
@ -88,29 +88,45 @@ class CryptoCurrency(abstract_object.AbstractObject):
obj_attr.add_tag(tag)
return obj
############################################################################
############################################################################
############################################################################
def get_meta(self, options=set()):
return self._get_meta()
def exist_correlation(self):
pass
############################################################################
############################################################################
def build_crypto_regex(subtype, search_id):
pass
def get_all_subtypes():
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash']
def search_by_name(subtype, search_id):
# # TODO: BUILD regex
obj = CryptoCurrency(subtype, search_id)
if obj.exists():
return search_id
else:
regex = build_crypto_regex(subtype, search_id)
return abstract_object.search_subtype_obj_by_id('cryptocurrency', subtype, regex)
# def build_crypto_regex(subtype, search_id):
# pass
#
# def search_by_name(subtype, search_id): ##################################################
#
# # # TODO: BUILD regex
# obj = CryptoCurrency(subtype, search_id)
# if obj.exists():
# return search_id
# else:
# regex = build_crypto_regex(subtype, search_id)
# return abstract_object.search_subtype_obj_by_id('cryptocurrency', subtype, regex)
#if __name__ == '__main__':
# by days -> need first/last entry USEFULL FOR DATA RETENTION UI
def get_all_cryptocurrencies():
cryptos = {}
for subtype in get_all_subtypes():
cryptos[subtype] = get_all_cryptocurrencies_by_subtype(subtype)
return cryptos
def get_all_cryptocurrencies_by_subtype(subtype):
return get_all_id('cryptocurrency', subtype)
if __name__ == '__main__':
res = get_all_cryptocurrencies()
print(res)

View file

@ -3,6 +3,8 @@
import os
import sys
import requests
import zipfile
from flask import url_for
from io import BytesIO
@ -10,11 +12,17 @@ from io import BytesIO
sys.path.append(os.environ['AIL_BIN'])
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_object import AbstractObject
from lib.item_basic import is_crawled, get_item_domain
from packages import Date
config_loader = ConfigLoader()
r_metadata = config_loader.get_db_conn("Kvrocks_Objects")
r_metadata = config_loader.get_redis_conn("ARDB_Metadata")
HASH_DIR = config_loader.get_config_str('Directories', 'hash')
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
VT_TOKEN = 'f1a6281c8a533172a45d901435452f67f5e61fd06a83dcc058f3f7b4aab66f5b'
config_loader = None
@ -32,17 +40,14 @@ class Decoded(AbstractObject):
def __init__(self, id):
super(Decoded, self).__init__('decoded', id)
def exists(self):
return r_metadata.exists(f'metadata_hash:{self.id}')
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True),
# 'compress': 'gzip'}
# return payload
def get_sha1(self):
return self.id.split('/')[0]
def get_file_type(self):
return r_metadata.hget(f'metadata_hash:{self.get_sha1()}', 'estimated_type')
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
# # TODO:
@ -50,13 +55,14 @@ class Decoded(AbstractObject):
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', object_type="decoded", correlation_id=value)
url = url_for('correlation.show_correlation', object_type="decoded", correlation_id=self.id)
else:
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&correlation_id={self.id}'
return url
def get_svg_icon(self):
file_type = self.get_file_type()
file_type = self.get_estimated_type()
file_type = file_type.split('/')[0]
if file_type == 'application':
icon = '\uf15b'
elif file_type == 'audio':
@ -85,12 +91,23 @@ class Decoded(AbstractObject):
def get_filepath(self, mimetype=None):
return os.path.join(os.environ['AIL_HOME'], self.get_rel_path(mimetype=mimetype))
def get_file_content(self, mimetype=None):
def get_content(self, mimetype=None):
filepath = self.get_filepath(mimetype=mimetype)
with open(filepath, 'rb') as f:
file_content = BytesIO(f.read())
return file_content
def get_zip_content(self):
mimetype = self.get_estimated_type()
zip_content = BytesIO()
with zipfile.ZipFile(zip_content, "w") as zf:
# TODO: Fix password
#zf.setpassword(b"infected")
zf.writestr( self.id, self.get_content().getvalue())
zip_content.seek(0)
return zip_content
def get_misp_object(self):
obj_attrs = []
obj = MISPObject('file')
@ -99,7 +116,7 @@ class Decoded(AbstractObject):
obj_attrs.append( obj.add_attribute('sha1', value=self.id) )
obj_attrs.append( obj.add_attribute('mimetype', value=self.get_estimated_type()) )
obj_attrs.append( obj.add_attribute('malware-sample', value=self.id, data=self.get_file_content()) )
obj_attrs.append( obj.add_attribute('malware-sample', value=self.id, data=self.get_content()) )
for obj_attr in obj_attrs:
for tag in self.get_tags():
obj_attr.add_tag(tag)
@ -108,24 +125,312 @@ class Decoded(AbstractObject):
############################################################################
############################################################################
############################################################################
def get_decoders(self):
return ['base64', 'binary', 'hexadecimal']
def exist_correlation(self):
def get_first_seen(self):
res = r_metadata.hget(f'metadata_hash:{self.id}', 'first_seen')
if res:
return int(res)
else:
return 99999999
def get_last_seen(self):
res = r_metadata.hget(f'metadata_hash:{self.id}', 'last_seen')
if res:
return int(res)
else:
return 0
def set_first_seen(self, date):
r_metadata.hset(f'metadata_hash:{self.id}', 'first_seen', date)
def set_last_seen(self, date):
r_metadata.hset(f'metadata_hash:{self.id}', 'last_seen', date)
def update_daterange(self, date):
first_seen = self.get_first_seen()
last_seen = self.get_last_seen()
if date < first_seen:
self.set_first_seen(date)
if date > last_seen:
self.set_last_seen(date)
def get_meta(self):
pass
def create(self, content, date):
def get_meta_vt(self):
meta = {}
meta['link'] = r_metadata.hget(f'metadata_hash:{self.id}', 'vt_link')
meta['report'] = r_metadata.hget(f'metadata_hash:{self.id}', 'vt_report')
return meta
def guess_mimetype(self, bytes_content):
return magic.from_buffer(bytes_content, mime=True)
def _save_meta(self, filepath, mimetype):
# create hash metadata
r_metadata.hset(f'metadata_hash:{self.id}', 'size', os.path.getsize(filepath))
r_metadata.hset(f'metadata_hash:{self.id}', 'estimated_type', mimetype)
r_metadata.sadd('hash_all_type', mimetype) #################################################### rename ????
def save_file(self, content, mimetype): #####################################################
filepath = self.get_filepath(mimetype=mimetype)
if os.path.isfile(filepath):
#print('File already exist')
return False
# create dir
dirname = os.path.dirname(filepath)
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(filepath, 'wb') as f:
f.write(file_content)
# create hash metadata
self._save_meta(filepath, mimetype)
return True
# avoid counting the same hash multiple time on the same item
# except if defferent encoding
def is_seen_this_day(self, date):
for decoder in get_decoders_names():
if r_metadata.zscore(f'{decoder_name}_date:{date}', self.id):
return True
return False
def add(self, decoder_name, date, obj_id, mimetype):
if not self.is_seen_this_day(date):
# mimetype
r_metadata.zincrby(f'decoded:mimetype:{date}', mimetype, 1)
r_metadata.sadd(f'decoded:mimetypes', mimetype)
# filter hash encoded in the same object
if not self.is_correlated('item', None, obj_id):
r_metadata.hincrby(f'metadata_hash:{self.id}', f'{decoder_name}_decoder', 1)
r_metadata.zincrby(f'{decoder_name}_type:{mimetype}', date, 1)
r_metadata.incrby(f'{decoder_name}_decoded:{date}', 1)
r_metadata.zincrby(f'{decoder_name}_date:{date}', self.id, 1)
self.update_daterange(date)
# First Hash for this decoder this day
Decoded.save_decoded_file_content(sha1_string, decoded_file, item_date, mimetype=mimetype)
####correlation Decoded.save_item_relationship(sha1_string, item_id)
Decoded.create_decoder_matadata(sha1_string, item_id, decoder_name)
# Correlations
self.add_correlation('item', '', obj_id)
# domain
if is_crawled(obj_id):
domain = get_item_domain(obj_id)
self.add_correlation('domain', '', domain)
# Filter duplicates ######################################################################
# Filter on item + hash for this day
# filter Obj Duplicate
# first time we see this day
# iterate on all decoder
######################################################################
# first time we see this hash today
# mimetype # # # # # # # #
r_metadata.zincrby(f'decoded:mimetype:{date}', mimetype, 1)
# create hash metadata
r_metadata.sadd(f'decoded:mimetypes', mimetype)
# # TODO: DUPLICATES + check fields
def add(self, decoder_name, date, obj_id, mimetype):
self.update_daterange(date)
r_metadata.incrby(f'{decoder_type}_decoded:{date}', 1)
r_metadata.zincrby(f'{decoder_type}_date:{date}', self.id, 1)
r_metadata.hincrby(f'metadata_hash:{self.id}', f'{decoder_type}_decoder', 1)
r_metadata.zincrby(f'{decoder_type}_type:{mimetype}', date, 1) # # TODO: # DUP1
################################################################ # TODO: REMOVE ?????????????????????????????????
r_metadata.zincrby(f'{decoder_type}_hash:{self.id}', obj_id, 1) # number of b64 on this item
# first time we see this hash encoding on this item
if not r_metadata.zscore(f'{decoder_type}_hash:{self.id}', obj_id):
# create hash metadata
r_metadata.sadd(f'hash_{decoder_type}_all_type', mimetype)
# first time we see this hash encoding today
if not r_metadata.zscore(f'{decoder_type}_date:{date}', self.id):
r_metadata.zincrby(f'{decoder_type}_type:{mimetype}', date, 1) # # TODO: # DUP1
# Correlations
self.add_correlation('item', '', obj_id)
# domain
if is_crawled(obj_id):
domain = get_item_domain(obj_id)
self.add_correlation('domain', '', domain)
# NB of MIMETYPE / DAY -> ALL HASH OR UNIQ HASH ??????
# # TODO: ADD items
def create(self, content, date, mimetype=None):
if not mimetype:
mimetype = self.guess_mimetype(content)
self.save_file(content, mimetype)
update_decoded_daterange(sha1_string, date_from)
if date_from != date_to and date_to:
update_decoded_daterange(sha1_string, date_to)
#######################################################################################
#######################################################################################
#######################################################################################
#######################################################################################
def set_vt_report(self, report):
r_metadata.hset(f'metadata_hash:{self.id}', 'vt_report', report)
def set_meta_vt(self, link, report):
r_metadata.hset(f'metadata_hash:{self.id}', 'vt_link', link)
self.set_vt_report(report)
def refresh_vt_report(self):
params = {'apikey': VT_TOKEN, 'resource': self.id}
response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params)
if response.status_code == 200:
json_response = response.json()
response_code = json_response['response_code']
# report exist
if response_code == 1:
total = json_response['total']
detection = json_response['positives']
report = f'Detection {detection}/{total}'
# no report found
elif response_code == 0:
report = 'No report found'
# file in queue
elif response_code == -2:
report = 'In Queue - Refresh'
self.set_vt_report(report)
print(json_response)
print(response_code)
print(report)
return report
elif response.status_code == 403:
Flask_config.vt_enabled = False
return 'Virustotal key is incorrect (e.g. for public API not for virustotal intelligence), authentication failed'
elif response.status_code == 204:
return 'Rate Limited'
def send_to_vt(self):
files = {'file': (self.id, self.get_content())}
response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan', files=files, params= {'apikey': VT_TOKEN})
json_response = response.json()
link = json_response['permalink'].split('analysis')[0] + 'analysis/'
self.set_meta_vt(link, 'Please Refresh')
############################################################################
############################################################################
def get_decoders_names():
return ['base64', 'binary', 'hexadecimal']
def get_all_mimetypes():
return r_metadata.smembers(f'decoded:mimetypes')
#if __name__ == '__main__':
def sanityze_decoder_names(decoder_name):
if decoder_name not in Decodeds.get_decoders_names():
return None
else:
return decoder_name
def sanityze_mimetype(mimetype):
if mimetype == 'All types':
return None
elif not r_metadata.sismember('hash_all_type', mimetype):
return None
else:
return mimetype
def pie_chart_mimetype_json(date_from, date_to, mimetype, decoder_name):
if mimetype:
all_mimetypes = [mimetype]
else:
all_mimetypes = get_all_mimetypes()
date_range = Date.substract_date(date_from, date_to)
for date in date_range:
for mimet in all_mimetypes:
pass
def pie_chart_decoder_json(date_from, date_to, mimetype):
all_decoder = get_decoders_names()
date_range = Date.substract_date(date_from, date_to)
if not date_range:
date_range.append(Date.get_today_date_str())
nb_decoder = {}
for date in date_range:
for decoder_name in all_decoder:
if not mimetype:
nb = r_metadata.get(f'{decoder_name}_decoded:{date}')
if nb is None:
nb = 0
else:
nb = int(nb)
else:
nb = r_metadata.zscore(f'{decoder_name}_type:{mimetype}', date)
nb_decoder[decoder_name] = nb_decoder.get(decoder_name, 0) + nb
pie_chart = []
for decoder_name in all_decoder:
pie_chart.append({'name': decoder_name, 'value': nb_decoder[decoder_name]})
return pie_chart
def api_pie_chart_decoder_json(date_from, date_to, mimetype):
mimetype = sanityze_mimetype(mimetype)
date = Date.sanitise_date_range(date_from, date_to)
return pie_chart_decoder_json(date['date_from'], date['date_to'], mimetype)
def _delete_old_json_descriptor():
decodeds = []
hash_dir = os.path.join(os.environ['AIL_HOME'], HASH_DIR)
for root, dirs, files in os.walk(hash_dir):
for file in files:
if file.endswith('.json'):
decoded_path = f'{root}/{file}'
os.remove(decoded_path)
return decodeds
def get_all_decodeds():
decodeds = []
hash_dir = os.path.join(os.environ['AIL_HOME'], HASH_DIR)
if not hash_dir.endswith("/"):
hash_dir = f"{hash_dir}/"
for root, dirs, files in os.walk(hash_dir):
for file in files:
decoded_path = f'{root}{file}'
decodeds.append(file)
return decodeds
#if __name__ == '__main__':

View file

@ -43,7 +43,7 @@ class Domain(AbstractObject):
else:
return 'regular'
def get_first_seen(selfr_int=False, separator=True):
def get_first_seen(self, r_int=False, separator=True):
first_seen = r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'first_seen')
if first_seen:
if separator:
@ -62,12 +62,59 @@ class Domain(AbstractObject):
last_check = int(last_check)
return last_check
def get_ports(self):
def _set_first_seen(self, date):
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'first_seen', date)
def _set_last_check(self, date):
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'last_check', date)
def update_daterange(self, date):
first_seen = self.get_first_seen(r_int=True)
last_check = self.get_last_check(r_int=True)
if not first_seen:
self._set_first_seen(date)
self._set_last_check(date)
elif int(first_seen) > date:
self._set_first_seen(date)
elif int(last_check) < date:
self._set_last_check(date)
def get_last_origin(self):
return r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'paste_parent')
def set_last_origin(self, origin_id):
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'paste_parent', origin_id)
def is_up(self, ports=[]):
if not ports:
ports = self.get_ports()
for port in ports:
res = r_onion.zrevrange(f'crawler_history_{self.domain_type}:{self.id}:{port}', 0, 0, withscores=True)
if res:
item_core, epoch = res[0]
if item_core != str(epoch):
return True
return False
def get_ports(self, r_set=False):
l_ports = r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'ports')
if l_ports:
return l_ports.split(";")
l_ports = l_ports.split(";")
if r_set:
return set(l_ports)
else:
return l_ports
return []
def _set_ports(self, ports):
ports = ';'.join(ports)
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'ports', ports)
def add_ports(self, port):
ports = self.get_ports(r_set=True)
ports.add(port)
self._set_ports(ports)
def get_history_by_port(self, port, status=False, root=False):
'''
Return .
@ -94,6 +141,23 @@ class Domain(AbstractObject):
history.append(dict_history)
return history
def get_languages(self):
return r_onion.smembers(f'domain:language:{self.id}')
def get_meta(self):
meta = {}
meta['type'] = self.domain_type
meta['first_seen'] = self.get_first_seen()
meta['last_check'] = self.get_last_check()
meta['last_origin'] = self.last_origin()
meta['ports'] = self.get_ports()
meta['status'] = self.is_up(ports=ports)
meta['tags'] = self.get_last_origin()
#meta['is_tags_safe'] =
meta['languages'] = self.get_languages()
#meta['screenshot'] =
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
# # TODO:
@ -179,16 +243,74 @@ class Domain(AbstractObject):
obj_attr.add_tag(tag)
return obj
############################################################################
############################################################################
############################################################################
def add_language(self, language):
r_onion.sadd('all_domains_languages', language)
r_onion.sadd(f'all_domains_languages:{self.domain_type}', language)
r_onion.sadd(f'language:domains:{self.domain_type}:{language}', self.id)
r_onion.sadd(f'domain:language:{self.id}', language)
def exist_correlation(self):
pass
############################################################################
############################################################################
def create(self, first_seen, last_check, ports, status, tags, languages):
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'first_seen', first_seen)
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'last_check', last_check)
for language in languages:
self.add_language(language)
#### CRAWLER ####
# add root_item to history
# if domain down -> root_item = epoch
def _add_history_root_item(self, root_item, epoch, port):
# Create/Update crawler history
r_onion.zadd(f'crawler_history_{self.domain_type}:{self.id}:{port}', epoch, int(root_item))
# if domain down -> root_item = epoch
def add_history(self, epoch, port, root_item=None):
date = time.strftime('%Y%m%d', time.gmtime(epoch))
try:
int(root_item)
except ValueError:
root_item = None
# UP
if root_item:
r_onion.sadd(f'full_{self.domain_type}_up', self.id)
r_onion.sadd(f'{self.domain_type}_up:{date}', self.id) # # TODO: -> store first day
r_onion.sadd(f'month_{self.domain_type}_up:{date[0:6]}', self.id) # # TODO: -> store first month
self._add_history_root_item(root_item, epoch, port)
else:
r_onion.sadd(f'{self.domain_type}_down:{date}', self.id) # # TODO: -> store first month
self._add_history_root_item(epoch, epoch, port)
def add_crawled_item(self, url, port, item_id, item_father):
r_metadata.hset(f'paste_metadata:{item_id}', 'father', item_father)
r_metadata.hset(f'paste_metadata:{item_id}', 'domain', f'{self.id}:{port}')
r_metadata.hset(f'paste_metadata:{item_id}', 'real_link', url)
# add this item_id to his father
r_metadata.sadd(f'paste_children:{item_father}', item_id)
##-- CRAWLER --##
############################################################################
############################################################################
def get_all_domains_types():
return ['onion', 'regular'] # i2p
def get_all_domains_languages():
return r_onion.smembers('all_domains_languages')
def get_domains_up_by_type(domain_type):
return r_onion.smembers(f'full_{domain_type}_up')
################################################################################
################################################################################
#if __name__ == '__main__':

View file

@ -22,19 +22,9 @@ from export.Export import get_ail_uuid # # TODO: REPLACE
from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader
from lib import item_basic
from lib import domain_basic
from packages import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Cryptocurrency
import Pgp
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import Correlate_object
import Decoded
import Screenshot
import Username
from flask import url_for
@ -310,6 +300,51 @@ class Item(AbstractObject):
############################################################################
############################################################################
def _get_dir_source_name(dir, source_name=None, l_sources_name=set(), filter_dir=False):
if not l_sources_name:
l_sources_name = set()
if source_name:
l_dir = os.listdir(os.path.join(dir, source_name))
else:
l_dir = os.listdir(dir)
# empty directory
if not l_dir:
return l_sources_name.add(source_name)
else:
for src_name in l_dir:
if len(src_name) == 4:
#try:
int(src_name)
to_add = os.path.join(source_name)
# filter sources, remove first directory
if filter_dir:
to_add = to_add.replace('archive/', '').replace('alerts/', '')
l_sources_name.add(to_add)
return l_sources_name
#except:
# pass
if source_name:
src_name = os.path.join(source_name, src_name)
l_sources_name = _get_dir_source_name(dir, source_name=src_name, l_sources_name=l_sources_name, filter_dir=filter_dir)
return l_sources_name
def get_items_sources(filter_dir=False, r_list=False):
res = _get_dir_source_name(ITEMS_FOLDER, filter_dir=filter_dir)
if res:
if r_list:
res = list(res)
return res
else:
return []
def get_items_by_source(source):
l_items = []
dir_item = os.path.join(os.environ['AIL_HOME'], ITEMS_FOLDER, source)
for root, dirs, files in os.walk(dir_item):
for file in files:
item_id = os.path.join(root, file).replace(ITEMS_FOLDER, '', 1)
l_items.append(item_id)
return l_items
################################################################################
################################################################################
@ -526,117 +561,6 @@ def api_get_items_sources():
# return {'status': 'error', 'reason': 'Invalid source', 'provide': source}, 400
# return {'status': 'success', 'reason': 'Valid source', 'provide': source}, 200
###
### correlation
###
def get_item_cryptocurrency(item_id, currencies_type=None, get_nb=False):
'''
Return all cryptocurrencies of a given item.
:param item_id: item id
:param currencies_type: list of cryptocurrencies type
:type currencies_type: list, optional
'''
return Cryptocurrency.cryptocurrency.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb)
def get_item_pgp(item_id, currencies_type=None, get_nb=False):
'''
Return all pgp of a given item.
:param item_id: item id
:param currencies_type: list of cryptocurrencies type
:type currencies_type: list, optional
'''
return Pgp.pgp.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb)
def get_item_username(item_id, sub_type=None, get_nb=False):
'''
Return all pgp of a given item.
:param item_id: item id
:param sub_type: list of username type
:type sub_type: list, optional
'''
return Username.correlation.get_item_correlation_dict(item_id, correlation_type=sub_type, get_nb=get_nb)
def get_item_decoded(item_id):
'''
Return all pgp of a given item.
:param item_id: item id
:param currencies_type: list of cryptocurrencies type
:type currencies_type: list, optional
'''
return Decoded.get_item_decoded(item_id)
def get_item_all_screenshot(item_id):
'''
Return all screenshot of a given item.
:param item_id: item id
'''
return Screenshot.get_item_screenshot_list(item_id)
def get_item_all_correlation(item_id, correlation_names=[], get_nb=False):
'''
Retun all correlation of a given item id.
:param item_id: item id
:type domain: str
:return: a dict of all correlation for a item id
:rtype: dict
'''
if not correlation_names:
correlation_names = Correlate_object.get_all_correlation_names()
item_correl = {}
for correlation_name in correlation_names:
if correlation_name=='cryptocurrency':
res = get_item_cryptocurrency(item_id, get_nb=get_nb)
elif correlation_name=='pgp':
res = get_item_pgp(item_id, get_nb=get_nb)
elif correlation_name=='username':
res = get_item_username(item_id, get_nb=get_nb)
elif correlation_name=='decoded':
res = get_item_decoded(item_id)
elif correlation_name=='screenshot':
res = get_item_all_screenshot(item_id)
else:
res = None
# add correllation to dict
if res:
item_correl[correlation_name] = res
return item_correl
## TODO: REFRACTOR
def _get_item_correlation(correlation_name, correlation_type, item_id):
res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
if res:
return list(res)
else:
return []
## TODO: REFRACTOR
def get_item_bitcoin(item_id):
return _get_item_correlation('cryptocurrency', 'bitcoin', item_id)
## TODO: REFRACTOR
def get_item_pgp_key(item_id):
return _get_item_correlation('pgpdump', 'key', item_id)
## TODO: REFRACTOR
def get_item_pgp_name(item_id):
return _get_item_correlation('pgpdump', 'name', item_id)
## TODO: REFRACTOR
def get_item_pgp_mail(item_id):
return _get_item_correlation('pgpdump', 'mail', item_id)
## TODO: REFRACTOR
def get_item_pgp_correlation(item_id):
pass
###
### GET Internal Module DESC
@ -804,60 +728,63 @@ def create_item(obj_id, obj_metadata, io_content):
# Item not created
return False
# # TODO:
def delete_item(obj_id):
# check if item exists
if not exist_item(obj_id):
return False
else:
delete_item_duplicate(obj_id)
# delete MISP event
r_serv_metadata.delete('misp_events:{}'.format(obj_id))
r_serv_metadata.delete('hive_cases:{}'.format(obj_id))
pass
os.remove(get_item_filename(obj_id))
# get all correlation
obj_correlations = get_item_all_correlation(obj_id)
for correlation in obj_correlations:
if correlation=='cryptocurrency' or correlation=='pgp':
for obj2_subtype in obj_correlations[correlation]:
for obj2_id in obj_correlations[correlation][obj2_subtype]:
Correlate_object.delete_obj_relationship(correlation, obj2_id, 'item', obj_id,
obj1_subtype=obj2_subtype)
else:
for obj2_id in obj_correlations[correlation]:
Correlate_object.delete_obj_relationship(correlation, obj2_id, 'item', obj_id)
# delete father/child
delete_node(obj_id)
# delete item metadata
r_serv_metadata.delete('paste_metadata:{}'.format(obj_id))
return True
### TODO in inport V2
# delete from tracked items
# # # TODO: # FIXME: LATER
# delete from queue
###
return False
# # check if item exists
# if not exist_item(obj_id):
# return False
# else:
# delete_item_duplicate(obj_id)
# # delete MISP event
# r_serv_metadata.delete('misp_events:{}'.format(obj_id))
# r_serv_metadata.delete('hive_cases:{}'.format(obj_id))
#
# os.remove(get_item_filename(obj_id))
#
# # get all correlation
# obj_correlations = get_item_all_correlation(obj_id)
# for correlation in obj_correlations:
# if correlation=='cryptocurrency' or correlation=='pgp':
# for obj2_subtype in obj_correlations[correlation]:
# for obj2_id in obj_correlations[correlation][obj2_subtype]:
# Correlate_object.delete_obj_relationship(correlation, obj2_id, 'item', obj_id,
# obj1_subtype=obj2_subtype)
# else:
# for obj2_id in obj_correlations[correlation]:
# Correlate_object.delete_obj_relationship(correlation, obj2_id, 'item', obj_id)
#
# # delete father/child
# delete_node(obj_id)
#
# # delete item metadata
# r_serv_metadata.delete('paste_metadata:{}'.format(obj_id))
#
# return True
#
# ### TODO in inport V2
# # delete from tracked items
#
# # # # TODO: # FIXME: LATER
# # delete from queue
# ###
# return False
#### ####
def delete_node(item_id):
if is_node(item_id):
if is_crawled(item_id):
delete_domain_node(item_id)
item_basic._delete_node(item_id)
def delete_domain_node(item_id):
if is_domain_root(item_id):
# remove from domain history
domain, port = get_item_domain_with_port(item_id).split(':')
domain_basic.delete_domain_item_core(item_id, domain, port)
for child_id in get_all_domain_node_by_item_id(item_id):
delete_item(child_id)
# def delete_node(item_id):
# if is_node(item_id):
# if is_crawled(item_id):
# delete_domain_node(item_id)
# item_basic._delete_node(item_id)
#
# def delete_domain_node(item_id):
# if is_domain_root(item_id):
# # remove from domain history
# domain, port = get_item_domain_with_port(item_id).split(':')
# domain_basic.delete_domain_item_core(item_id, domain, port)
# for child_id in get_all_domain_node_by_item_id(item_id):
# delete_item(child_id)
if __name__ == '__main__':

View file

@ -10,7 +10,7 @@ import redis
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
from abstract_object import AbstractObject
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
from flask import url_for
config_loader = ConfigLoader.ConfigLoader()
@ -22,7 +22,7 @@ config_loader = None
################################################################################
################################################################################
class Pgp(AbstractObject):
class Pgp(AbstractSubtypeObject):
"""
AIL Pgp Object. (strings)
"""
@ -78,13 +78,18 @@ class Pgp(AbstractObject):
############################################################################
############################################################################
############################################################################
def exist_correlation(self):
pass
def get_all_subtypes():
return ['key', 'mail', 'name']
############################################################################
############################################################################
def get_all_pgps():
pgps = {}
for subtype in get_all_subtypes():
pgps[subtype] = get_all_pgps_by_subtype(subtype)
return pgps
def get_all_pgps_by_subtype(subtype):
return get_all_id('pgp', subtype)

View file

@ -5,6 +5,7 @@ import os
import sys
from io import BytesIO
from flask import url_for
sys.path.append(os.environ['AIL_BIN'])
from lib.ConfigLoader import ConfigLoader
@ -71,6 +72,15 @@ class Screenshot(AbstractObject):
obj_attr.add_tag(tag)
return obj
def get_meta(self, options=set()):
meta = {}
meta['id'] = self.id
metadata_dict['img'] = get_screenshot_rel_path(sha256_string) ######### # TODO: Rename ME ??????
meta['tags'] = self.get_tags()
# TODO: ADD IN ABSTRACT CLASS
#meta['is_tags_safe'] = Tag.is_tags_safe(metadata_dict['tags']) ################## # TODO: ADD IN ABSZTRACT CLASS
return meta
############################################################################
############################################################################
############################################################################
@ -81,6 +91,26 @@ class Screenshot(AbstractObject):
############################################################################
############################################################################
def get_screenshot_dir():
return SCREENSHOT_FOLDER
# get screenshot relative path
def get_screenshot_rel_path(sha256_str, add_extension=False):
screenshot_path = os.path.join(sha256_str[0:2], sha256_str[2:4], sha256_str[4:6], sha256_str[6:8], sha256_str[8:10], sha256_str[10:12], sha256_str[12:])
if add_extension:
screenshot_path = f'{screenshot_path}.png'
return screenshot_path
def get_all_screenshots():
screenshots = []
screenshot_dir = os.path.join(os.environ['AIL_HOME'], SCREENSHOT_FOLDER)
for root, dirs, files in os.walk(screenshot_dir):
for file in files:
screenshot_path = f'{root}{file}'
screenshot_id = screenshot_path.replace(SCREENSHOT_FOLDER, '').replace('/', '')[:-4]
screenshots.append(screenshot_id)
return screenshots
#if __name__ == '__main__':

View file

@ -13,7 +13,7 @@ from pymisp import MISPObject
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
from abstract_subtype_object import AbstractSubtypeObject
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
config_loader = ConfigLoader.ConfigLoader()
@ -82,16 +82,20 @@ class Username(AbstractSubtypeObject):
obj_attr.add_tag(tag)
return obj
############################################################################
############################################################################
############################################################################
def exist_correlation(self):
pass
def get_all_subtypes():
return ['telegram', 'twitter', 'jabber']
############################################################################
############################################################################
def get_all_usernames():
users = {}
for subtype in get_all_subtypes():
users[subtype] = get_all_usernames_by_subtype(subtype)
return users
def get_all_usernames_by_subtype(subtype):
return get_all_id('username', subtype)

View file

@ -18,6 +18,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from packages import Tag
from lib import Duplicate
from lib.correlations_engine import get_correlations, add_obj_correlation, delete_obj_correlation, exists_obj_correlation, is_obj_correlated
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
from lib.Tracker import is_obj_tracked, get_obj_all_trackers, delete_obj_trackers
@ -156,13 +157,35 @@ class AbstractObject(ABC):
def get_misp_object(self):
pass
# # TODO:
# @abstractmethod
# def get_correlations(self, message):
# """
# Get object correlations
# """
# pass
def get_correlations(self):
"""
Get object correlations
"""
return get_correlations(self.type, self.subtype, self.id)
def add_correlation(self, type2, subtype2, id2):
"""
Add object correlation
"""
add_obj_correlation(self.type, self.subtype, self.id, type2, subtype2, id2)
def exists_correlation(self, type2):
"""
Check if an object is correlated
"""
return exists_obj_correlation(self.type, self.subtype, self.id, type2)
def is_correlated(self, type2, subtype2, id2):
"""
Check if an object is correlated by another object
"""
return is_obj_correlated(self.type, self.subtype, self.id, type2, subtype2, id2)
def delete_correlation(self, type2, subtype2, id2):
"""
Get object correlations
"""
delete_obj_correlation(self.type, self.subtype, self.id, type2, subtype2, id2)
# # TODO: get favicon

View file

@ -18,6 +18,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader
from lib.item_basic import is_crawled, get_item_domain
# LOAD CONFIG
config_loader = ConfigLoader()
@ -43,7 +44,6 @@ class AbstractSubtypeObject(AbstractObject):
self.type = obj_type
self.subtype = subtype
# # TODO: # FIXME: REMOVE R_INT ????????????????????????????????????????????????????????????????????
def get_first_seen(self, r_int=False):
first_seen = r_metadata.hget(f'{self.type}_metadata_{self.subtype}:{self.id}', 'first_seen')
if r_int:
@ -54,7 +54,6 @@ class AbstractSubtypeObject(AbstractObject):
else:
return first_seen
# # TODO: # FIXME: REMOVE R_INT ????????????????????????????????????????????????????????????????????
def get_last_seen(self, r_int=False):
last_seen = r_metadata.hget(f'{self.type}_metadata_{self.subtype}:{self.id}', 'last_seen')
if r_int:
@ -113,49 +112,38 @@ class AbstractSubtypeObject(AbstractObject):
if date > last_seen:
self.set_last_seen(date)
#
# HANDLE Others objects ????
#
# NEW field => first record(last record)
# by subtype ??????
# => data Retention + efficicent search
#
#
def add(self, date, item_id):
self.update_correlation_daterange()
self.update_daterange(date)
# daily
r_metadata.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1)
# all type
# all subtypes
r_metadata.zincrby(f'{self.type}_all:{self.subtype}', self.id, 1)
#######################################################################
#######################################################################
# REPLACE WITH CORRELATION ?????
# global set
r_serv_metadata.sadd(f'set_{self.type}_{self.subtype}:{self.id}', item_id)
## object_metadata
# item
r_serv_metadata.sadd(f'item_{self.type}_{self.subtype}:{item_id}', self.id)
# new correlation
#
# How to filter by correlation type ????
#
f'correlation:obj:{self.type}:{self.subtype}:{self.id}', f'{obj_type}:{obj_subtype}:{obj_id}'
f'correlation:obj:{self.type}:{self.subtype}:{obj_type}:{self.id}', f'{obj_subtype}:{obj_id}'
#
#
#
#
#
#
#
#
# Correlations
self.add_correlation('item', '', item_id)
# domain
if is_crawled(item_id):
domain = get_item_domain(item_id)
self.add_correlation('domain', '', domain)
# # domain
# if item_basic.is_crawled(item_id):
# domain = item_basic.get_item_domain(item_id)
# self.save_domain_correlation(domain, subtype, obj_id)
# TODO:ADD objects + Stats
def create(self, first_seen, last_seen):
pass
self.set_first_seen(first_seen)
self.set_last_seen(last_seen)
def _delete(self):
@ -168,16 +156,7 @@ class AbstractSubtypeObject(AbstractObject):
# get_metadata
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
def get_all_id(obj_type, subtype):
return r_metadata.zrange(f'{obj_type}_all:{subtype}', 0, -1)

View file

@ -73,7 +73,7 @@ class Credential(AbstractModule):
# Database
config_loader = ConfigLoader.ConfigLoader()
self.server_cred = config_loader.get_redis_conn("ARDB_TermCred")
#self.server_cred = config_loader.get_redis_conn("ARDB_TermCred")
self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")
# Config values
@ -172,32 +172,33 @@ class Credential(AbstractModule):
self.redis_logger.info(to_print)
print(f'found {nb_cred} credentials')
# For searching credential in termFreq
for cred in all_credentials:
cred = cred.split('@')[0] #Split to ignore mail address
# unique number attached to unique path
uniq_num_path = self.server_cred.incr(Credential.REDIS_KEY_NUM_PATH)
self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item.get_id(): uniq_num_path})
self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item.get_id()})
# unique number attached to unique username
uniq_num_cred = self.server_cred.hget(Credential.REDIS_KEY_ALL_CRED_SET, cred)
if uniq_num_cred is None:
# cred do not exist, create new entries
uniq_num_cred = self.server_cred.incr(Credential.REDIS_KEY_NUM_USERNAME)
self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET, {cred: uniq_num_cred})
self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET_REV, {uniq_num_cred: cred})
# Add the mapping between the credential and the path
self.server_cred.sadd(Credential.REDIS_KEY_MAP_CRED_TO_PATH+'_'+str(uniq_num_cred), uniq_num_path)
# Split credentials on capital letters, numbers, dots and so on
# Add the split to redis, each split point towards its initial credential unique number
splitedCred = re.findall(Credential.REGEX_CRED, cred)
for partCred in splitedCred:
if len(partCred) > self.minimumLengthThreshold:
self.server_cred.sadd(partCred, uniq_num_cred)
# # TODO: # FIXME: TEMP DESABLE
# # For searching credential in termFreq
# for cred in all_credentials:
# cred = cred.split('@')[0] #Split to ignore mail address
#
# # unique number attached to unique path
# uniq_num_path = self.server_cred.incr(Credential.REDIS_KEY_NUM_PATH)
# self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item.get_id(): uniq_num_path})
# self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item.get_id()})
#
# # unique number attached to unique username
# uniq_num_cred = self.server_cred.hget(Credential.REDIS_KEY_ALL_CRED_SET, cred)
# if uniq_num_cred is None:
# # cred do not exist, create new entries
# uniq_num_cred = self.server_cred.incr(Credential.REDIS_KEY_NUM_USERNAME)
# self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET, {cred: uniq_num_cred})
# self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET_REV, {uniq_num_cred: cred})
#
# # Add the mapping between the credential and the path
# self.server_cred.sadd(Credential.REDIS_KEY_MAP_CRED_TO_PATH+'_'+str(uniq_num_cred), uniq_num_path)
#
# # Split credentials on capital letters, numbers, dots and so on
# # Add the split to redis, each split point towards its initial credential unique number
# splitedCred = re.findall(Credential.REGEX_CRED, cred)
# for partCred in splitedCred:
# if len(partCred) > self.minimumLengthThreshold:
# self.server_cred.sadd(partCred, uniq_num_cred)
if __name__ == '__main__':

View file

@ -11,7 +11,6 @@
##################################
import time
import os
import redis
import base64
from hashlib import sha1
import magic
@ -26,10 +25,19 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from Helper import Process
from packages import Item
from lib import ConfigLoader
from lib import Decoded
from lib.ConfigLoader import ConfigLoader
from lib.objects.Items import Item
from lib.objects import Decodeds
config_loader = ConfigLoader()
serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
hex_max_execution_time = config_loader.get_config_int("Hex", "max_execution_time")
binary_max_execution_time = config_loader.get_config_int("Binary", "max_execution_time")
base64_max_execution_time = config_loader.get_config_int("Base64", "max_execution_time")
config_loader = None
#####################################################
#####################################################
# # TODO: use regex_helper
class TimeoutException(Exception):
@ -38,9 +46,15 @@ class TimeoutException(Exception):
def timeout_handler(signum, frame):
raise TimeoutException
# # TODO: # FIXME: Remove signal -> replace with regex_helper
signal.signal(signal.SIGALRM, timeout_handler)
#####################################################
####################################################
class Decoder(AbstractModule):
"""
Decoder module for AIL framework
@ -65,8 +79,6 @@ class Decoder(AbstractModule):
def __init__(self):
super(Decoder, self).__init__()
serv_metadata = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Metadata")
regex_binary = '[0-1]{40,}'
#regex_hex = '(0[xX])?[A-Fa-f0-9]{40,}'
regex_hex = '[A-Fa-f0-9]{40,}'
@ -79,10 +91,6 @@ class Decoder(AbstractModule):
# map decoder function
self.decoder_function = {'binary':self.binary_decoder,'hexadecimal':self.hex_decoder, 'base64':self.base64_decoder}
hex_max_execution_time = self.process.config.getint("Hex", "max_execution_time")
binary_max_execution_time = self.process.config.getint("Binary", "max_execution_time")
base64_max_execution_time = self.process.config.getint("Base64", "max_execution_time")
# list all decoder with regex,
decoder_binary = {'name': 'binary', 'regex': cmp_regex_binary, 'encoded_min_size': 300, 'max_execution_time': binary_max_execution_time}
decoder_hexadecimal = {'name': 'hexadecimal', 'regex': cmp_regex_hex, 'encoded_min_size': 300, 'max_execution_time': hex_max_execution_time}
@ -102,11 +110,9 @@ class Decoder(AbstractModule):
def compute(self, message):
obj_id = Item.get_item_id(message)
# Extract info from message
content = Item.get_item_content(obj_id)
date = Item.get_item_date(obj_id)
item = Item(message)
content = item.get_content()
date = item.get_date()
for decoder in self.decoder_order: # add threshold and size limit
# max execution time on regex
@ -117,16 +123,16 @@ class Decoder(AbstractModule):
except TimeoutException:
encoded_list = []
self.process.incr_module_timeout_statistic() # add encoder type
self.redis_logger.debug(f"{obj_id} processing timeout")
self.redis_logger.debug(f"{item.id} processing timeout")
continue
else:
signal.alarm(0)
if(len(encoded_list) > 0):
content = self.decode_string(content, message, date, encoded_list, decoder['name'], decoder['encoded_min_size'])
content = self.decode_string(content, item.id, date, encoded_list, decoder['name'], decoder['encoded_min_size'])
def decode_string(self, content, item_id, item_date, encoded_list, decoder_name, encoded_min_size):
def decode_string(self, content, item_id, date, encoded_list, decoder_name, encoded_min_size):
find = False
for encoded in encoded_list:
if len(encoded) >= encoded_min_size:
@ -134,16 +140,18 @@ class Decoder(AbstractModule):
find = True
sha1_string = sha1(decoded_file).hexdigest()
mimetype = Decoded.get_file_mimetype(decoded_file)
decoded = Decoded(sha1_string)
mimetype = decoded.guess_mimetype(decoded_file)
if not mimetype:
self.redis_logger.debug(item_id)
self.redis_logger.debug(sha1_string)
print(item_id)
print(sha1_string)
raise Exception('Invalid mimetype')
Decoded.save_decoded_file_content(sha1_string, decoded_file, item_date, mimetype=mimetype)
Decoded.save_item_relationship(sha1_string, item_id)
Decoded.create_decoder_matadata(sha1_string, item_id, decoder_name)
raise Exception(f'Invalid mimetype: {sha1_string} {item_id}')
decoded.create(content, date)
decoded.add(decoder_name, date, item_id, mimetype)
save_item_relationship(sha1_string, item_id) ################################
#remove encoded from item content
content = content.replace(encoded, '', 1)
@ -151,24 +159,18 @@ class Decoder(AbstractModule):
self.redis_logger.debug(f'{item_id} : {decoder_name} - {mimetype}')
print(f'{item_id} : {decoder_name} - {mimetype}')
if(find):
self.set_out_item(decoder_name, item_id)
self.redis_logger.info(f'{decoder_name} decoded')
print(f'{decoder_name} decoded')
# Send to Tags
msg = f'infoleak:automatic-detection="{decoder_name}";{item_id}'
self.send_message_to_queue(msg, 'Tags')
# perf: remove encoded from item content
return content
def set_out_item(self, decoder_name, item_id):
self.redis_logger.info(f'{decoder_name} decoded')
print(f'{decoder_name} decoded')
# Send to duplicate
self.send_message_to_queue(item_id, 'Duplicate')
# Send to Tags
msg = f'infoleak:automatic-detection="{decoder_name}";{item_id}'
self.send_message_to_queue(msg, 'Tags')
if __name__ == '__main__':
# # TODO: TEST ME
module = Decoder()
module.run()

View file

@ -50,6 +50,8 @@ class Telegram(AbstractModule):
# extract telegram links
telegram_links = self.regex_findall(self.re_telegram_link, item.get_id(), item_content)
for telegram_link_tuple in telegram_links:
print(telegram_link_tuple)
print(telegram_link_tuple[2:-2].split("', '", 1))
base_url, url_path = telegram_link_tuple[2:-2].split("', '", 1)
dict_url = telegram.get_data_from_telegram_url(base_url, url_path)
if dict_url.get('username'):

View file

@ -153,6 +153,12 @@ def sanitise_date_range(date_from, date_to, separator='', date_type='str'):
date_to = date_from
if date_type=='str':
# remove separators
if len(date_from) == 10:
date_from = date_from[0:4] + date_from[5:7] + date_from[8:10]
if len(date_to) == 10:
date_to = date_to[0:4] + date_to[5:7] + date_to[8:10]
if not validate_str_date(date_from, separator=separator):
date_from = datetime.date.today().strftime("%Y%m%d")
if not validate_str_date(date_to, separator=separator):

View file

@ -18,6 +18,7 @@ from pymispgalaxies import Galaxies, Clusters
config_loader = ConfigLoader.ConfigLoader()
r_serv_tags = config_loader.get_redis_conn("ARDB_Tags")
#r_serv_tags = config_loader.get_db_conn("Kvrocks_Tags")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
@ -89,7 +90,7 @@ def get_all_taxonomies_tags(): # # TODO: add + REMOVE + Update
def get_all_galaxies_tags(): # # TODO: add + REMOVE + Update
return r_serv_tags.smembers('active_galaxies_tags')
def get_all_custom_tags():
def get_all_custom_tags(): # # TODO: add + REMOVE + Update
return r_serv_tags.smembers('tags:custom')
def get_taxonomies_enabled_tags(r_list=False):

View file

@ -25,7 +25,7 @@ import Date
import Item
config_loader = ConfigLoader.ConfigLoader()
r_serv_term = config_loader.get_redis_conn("Kvrocks_DB")
r_serv_term = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'

View file

@ -52,6 +52,8 @@ from blueprints.hunters import hunters
from blueprints.old_endpoints import old_endpoints
from blueprints.ail_2_ail_sync import ail_2_ail_sync
from blueprints.settings_b import settings_b
from blueprints.objects_decoded import objects_decoded
from blueprints.objects_range import objects_range
Flask_dir = os.environ['AIL_FLASK']
@ -113,6 +115,8 @@ app.register_blueprint(hunters, url_prefix=baseUrl)
app.register_blueprint(old_endpoints, url_prefix=baseUrl)
app.register_blueprint(ail_2_ail_sync, url_prefix=baseUrl)
app.register_blueprint(settings_b, url_prefix=baseUrl)
app.register_blueprint(objects_decoded, url_prefix=baseUrl)
app.register_blueprint(objects_range, url_prefix=baseUrl)
# ========= =========#
# ========= Cookie name ========

View file

@ -19,6 +19,17 @@ import Flask_config
# Import Role_Manager
from Role_Manager import login_admin, login_analyst, login_read_only
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.objects import ail_objects
################################################################################
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import Correlate_object
import Domain
@ -244,22 +255,22 @@ def get_description():
@login_required
@login_read_only
def graph_node_json():
correlation_id = request.args.get('correlation_id')
type_id = request.args.get('type_id')
object_type = request.args.get('object_type')
obj_id = request.args.get('correlation_id') #######################3
subtype = request.args.get('type_id') #######################
obj_type = request.args.get('object_type') #######################
max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes'))
correlation_names = sanitise_correlation_names(request.args.get('correlation_names'))
correlation_objects = sanitise_correlation_objects(request.args.get('correlation_objects'))
# # TODO: remove me, rename screenshot to image
if object_type == 'image':
object_type == 'screenshot'
# # TODO: remove me, rename screenshot
if obj_type == 'image':
obj_type == 'screenshot'
mode = sanitise_graph_mode(request.args.get('mode'))
res = Correlate_object.get_graph_node_object_correlation(object_type, correlation_id, mode, correlation_names, correlation_objects, requested_correl_type=type_id, max_nodes=max_nodes)
return jsonify(res)
filter_types = correlation_names + correlation_objects
json_graph = ail_objects.get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=filter_types, max_nodes=max_nodes, level=2, flask_context=True)
#json_graph = Correlate_object.get_graph_node_object_correlation(obj_type, obj_id, 'union', correlation_names, correlation_objects, requested_correl_type=subtype, max_nodes=max_nodes)
return jsonify(json_graph)
@correlation.route('/correlation/subtype_search', methods=['POST'])
@login_required

View file

@ -0,0 +1,98 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
'''
import os
import sys
import json
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort, send_file
from flask_login import login_required, current_user
# Import Role_Manager
from Role_Manager import login_admin, login_analyst, login_read_only
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.objects import Decodeds
# ============ BLUEPRINT ============
objects_decoded = Blueprint('objects_decoded', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/decoded'))
# ============ VARIABLES ============
# ============ FUNCTIONS ============
# ============= ROUTES ==============
# # TODO: # FIXME: CHECK IF OBJ EXIST
@objects_decoded.route("/object/decoded/download") #completely shows the paste in a new tab
@login_required
@login_read_only
def decoded_download():
obj_id = request.args.get('id')
# # TODO: sanitize hash
obj_id = obj_id.split('/')[0]
decoded = Decodeds.Decoded(obj_id)
if decoded.exists():
filename = f'{decoded.id}.zip'
zip_content = decoded.get_zip_content()
return send_file(zip_content, attachment_filename=filename, as_attachment=True)
else:
abort(404)
@objects_decoded.route("/object/decoded/send_to_vt") #completely shows the paste in a new tab
@login_required
@login_read_only
def send_to_vt():
obj_id = request.args.get('id')
# # TODO: sanitize hash
obj_id = obj_id.split('/')[0]
decoded = Decodeds.Decoded(obj_id)
if decoded.exists():
decoded.send_to_vt()
return jsonify(decoded.get_meta_vt())
else:
abort(404)
@objects_decoded.route("/object/decoded/refresh_vt_report") #completely shows the paste in a new tab
@login_required
@login_read_only
def refresh_vt_report():
obj_id = request.args.get('id')
# # TODO: sanitize hash
obj_id = obj_id.split('/')[0]
decoded = Decodeds.Decoded(obj_id)
if decoded.exists():
report = decoded.refresh_vt_report()
return jsonify(hash=decoded.id, report=report)
else:
abort(404)
@objects_decoded.route("/object/decoded/decoder_pie_chart_json", methods=['GET'])
@login_required
@login_read_only
def decoder_pie_chart_json():
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
mimetype = request.args.get('type')
return jsonify(Decodeds.api_pie_chart_decoder_json(date_from, date_to, mimetype))
#####################################################3

View file

@ -12,7 +12,6 @@ import json
from Date import Date
from io import BytesIO
import zipfile
from hashlib import sha256
@ -208,23 +207,6 @@ def get_all_keys_id_from_item(correlation_type, item_path):
all_keys_id_dump.add( (key_id, type_id) )
return all_keys_id_dump
def one():
return 1
'''
def decode_base58(bc, length):
n = 0
for char in bc:
n = n * 58 + digits58.index(char)
return n.to_bytes(length, 'big')
def check_bc(bc):
try:
bcbytes = decode_base58(bc, 25)
return bcbytes[-4:] == sha256(sha256(bcbytes[:-4]).digest()).digest()[:4]
except Exception:
return False
'''
def get_correlation_type_search_endpoint(correlation_type):
if correlation_type == 'pgpdump':
@ -262,37 +244,6 @@ def get_range_type_json_endpoint(correlation_type):
endpoint = 'hashDecoded.hashDecoded_page'
return endpoint
def get_graph_node_json_endpoint(correlation_type):
if correlation_type == 'pgpdump':
endpoint = 'hashDecoded.pgpdump_graph_node_json'
elif correlation_type == 'cryptocurrency':
endpoint = 'hashDecoded.cryptocurrency_graph_node_json'
elif correlation_type == 'username':
endpoint = 'hashDecoded.username_graph_node_json'
else:
endpoint = 'hashDecoded.hashDecoded_page'
return endpoint
def get_graph_line_json_endpoint(correlation_type):
if correlation_type == 'pgpdump':
endpoint = 'hashDecoded.pgpdump_graph_line_json'
elif correlation_type == 'cryptocurrency':
endpoint = 'hashDecoded.cryptocurrency_graph_line_json'
elif correlation_type == 'username':
endpoint = 'hashDecoded.username_graph_line_json'
else:
endpoint = 'hashDecoded.hashDecoded_page'
return endpoint
def get_font_family(correlation_type):
if correlation_type == 'pgpdump':
font = 'fa'
elif correlation_type == 'cryptocurrency':
font = 'fab'
else:
font = 'fa'
return font
############ CORE CORRELATION ############
def main_correlation_page(correlation_type, type_id, date_from, date_to, show_decoded_files):
@ -392,27 +343,7 @@ def main_correlation_page(correlation_type, type_id, date_from, date_to, show_de
date_from=date_from, date_to=date_to,
show_decoded_files=show_decoded_files)
# def show_correlation(correlation_type, type_id, key_id):
# if is_valid_type_id(correlation_type, type_id):
# key_id_metadata = get_key_id_metadata(correlation_type, type_id, key_id)
# if key_id_metadata:
#
# num_day_sparkline = 6
# date_range_sparkline = get_date_range(num_day_sparkline)
#
# sparkline_values = list_sparkline_type_id_values(date_range_sparkline, correlation_type, type_id, key_id)
# return render_template('showCorrelation.html', key_id=key_id, type_id=type_id,
# correlation_type=correlation_type,
# graph_node_endpoint=get_graph_node_json_endpoint(correlation_type),
# graph_line_endpoint=get_graph_line_json_endpoint(correlation_type),
# font_family=get_font_family(correlation_type),
# key_id_metadata=key_id_metadata,
# type_icon=get_icon(correlation_type, type_id),
# sparkline_values=sparkline_values)
# else:
# return '404'
# else:
# return 'error'
def correlation_type_range_type_json(correlation_type, date_from, date_to):
date_range = []
@ -463,46 +394,6 @@ def correlation_type_range_type_json(correlation_type, date_from, date_to):
return jsonify(range_type)
def correlation_graph_node_json(correlation_type, type_id, key_id):
if key_id is not None and is_valid_type_id(correlation_type, type_id):
nodes_set_dump = set()
nodes_set_paste = set()
links_set = set()
key_id_metadata = get_key_id_metadata(correlation_type, type_id, key_id)
nodes_set_dump.add((key_id, 1, type_id, key_id_metadata['first_seen'], key_id_metadata['last_seen'], key_id_metadata['nb_seen']))
#get related paste
l_pastes = r_serv_metadata.smembers('set_{}_{}:{}'.format(correlation_type, type_id, key_id))
for paste in l_pastes:
nodes_set_paste.add((paste, 2))
links_set.add((key_id, paste))
for key_id_with_type_id in get_all_keys_id_from_item(correlation_type, paste):
new_key_id, typ_id = key_id_with_type_id
if new_key_id != key_id:
key_id_metadata = get_key_id_metadata(correlation_type, typ_id, new_key_id)
nodes_set_dump.add((new_key_id, 3, typ_id, key_id_metadata['first_seen'], key_id_metadata['last_seen'], key_id_metadata['nb_seen']))
links_set.add((new_key_id, paste))
nodes = []
for node in nodes_set_dump:
nodes.append({"id": node[0], "group": node[1], "first_seen": node[3], "last_seen": node[4], "nb_seen_in_paste": node[5], 'icon': get_icon_text(correlation_type, node[2]),"url": url_for(get_show_key_id_endpoint(correlation_type), type_id=node[2], key_id=node[0]), 'hash': True})
for node in nodes_set_paste:
nodes.append({"id": node[0], "group": node[1],"url": url_for('objects_item.showItem', id=node[0]), 'hash': False})
links = []
for link in links_set:
links.append({"source": link[0], "target": link[1]})
json = {"nodes": nodes, "links": links}
return jsonify(json)
else:
return jsonify({})
# ============= ROUTES ==============
@hashDecoded.route("/hashDecoded/all_hash_search", methods=['POST'])
@login_required
@ -634,113 +525,8 @@ def hashDecoded_page():
encoding=encoding, all_encoding=all_encoding, date_from=date_from, date_to=date_to, show_decoded_files=show_decoded_files)
@hashDecoded.route('/hashDecoded/hash_by_type')
@login_required
@login_read_only
def hash_by_type():
type = request.args.get('type')
type = 'text/plain'
return render_template('hash_type.html',type = type)
@hashDecoded.route('/hashDecoded/hash_hash')
@login_required
@login_read_only
def hash_hash():
hash = request.args.get('hash')
return render_template('hash_hash.html')
#
# @hashDecoded.route('/hashDecoded/showHash')
# @login_required
# @login_analyst
# def showHash():
# hash = request.args.get('hash')
# #hash = 'e02055d3efaad5d656345f6a8b1b6be4fe8cb5ea'
#
# # TODO FIXME show error
# if hash is None:
# return hashDecoded_page()
#
# estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
# # hash not found
# # TODO FIXME show error
# if estimated_type is None:
# return hashDecoded_page()
#
# else:
# file_icon = get_file_icon(estimated_type)
# size = r_serv_metadata.hget('metadata_hash:'+hash, 'size')
# first_seen = r_serv_metadata.hget('metadata_hash:'+hash, 'first_seen')
# last_seen = r_serv_metadata.hget('metadata_hash:'+hash, 'last_seen')
# nb_seen_in_all_pastes = r_serv_metadata.hget('metadata_hash:'+hash, 'nb_seen_in_all_pastes')
#
# # get all encoding for this hash
# list_hash_decoder = []
# list_decoder = r_serv_metadata.smembers('all_decoder')
# for decoder in list_decoder:
# encoding = r_serv_metadata.hget('metadata_hash:'+hash, decoder+'_decoder')
# if encoding is not None:
# list_hash_decoder.append({'encoding': decoder, 'nb_seen': encoding})
#
# num_day_type = 6
# date_range_sparkline = get_date_range(num_day_type)
# sparkline_values = list_sparkline_values(date_range_sparkline, hash)
#
# if r_serv_metadata.hexists('metadata_hash:'+hash, 'vt_link'):
# b64_vt = True
# b64_vt_link = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_link')
# b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report')
# else:
# b64_vt = False
# b64_vt_link = ''
# b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report')
# # hash never refreshed
# if b64_vt_report is None:
# b64_vt_report = ''
#
# return render_template('showHash.html', hash=hash, vt_enabled=vt_enabled, b64_vt=b64_vt, b64_vt_link=b64_vt_link,
# b64_vt_report=b64_vt_report,
# size=size, estimated_type=estimated_type, file_icon=file_icon,
# first_seen=first_seen, list_hash_decoder=list_hash_decoder,
# last_seen=last_seen, nb_seen_in_all_pastes=nb_seen_in_all_pastes, sparkline_values=sparkline_values)
@hashDecoded.route('/hashDecoded/downloadHash')
@login_required
@login_read_only
def downloadHash():
hash = request.args.get('hash')
# sanitize hash
hash = hash.split('/')[0]
# hash exist
if r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') is not None:
b64_full_path = Decoded.get_decoded_filepath(hash)
hash_content = ''
try:
with open(b64_full_path, 'rb') as f:
hash_content = f.read()
# zip buffer
result = BytesIO()
temp = BytesIO()
temp.write(hash_content)
with zipfile.ZipFile(result, "w") as zf:
#zf.setpassword(b"infected")
zf.writestr( hash, temp.getvalue())
filename = hash + '.zip'
result.seek(0)
return send_file(result, attachment_filename=filename, as_attachment=True)
except Exception as e:
print(e)
return 'Server Error'
else:
return 'hash: ' + hash + " don't exist"
@hashDecoded.route('/hashDecoded/hash_by_type_json')
@ -778,62 +564,6 @@ def hash_by_type_json():
return jsonify()
@hashDecoded.route('/hashDecoded/decoder_type_json')
@login_required
@login_read_only
def decoder_type_json():
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
typ = request.args.get('type')
if typ == 'All types':
typ = None
# verify file type input
if typ is not None:
#retrieve + char
typ = typ.replace(' ', '+')
if typ not in r_serv_metadata.smembers('hash_all_type'):
typ = None
all_decoder = r_serv_metadata.smembers('all_decoder')
# sort DESC decoder for color
all_decoder = sorted(all_decoder)
date_range = []
if date_from is not None and date_to is not None:
#change format
try:
if len(date_from) != 8:
date_from = date_from[0:4] + date_from[5:7] + date_from[8:10]
date_to = date_to[0:4] + date_to[5:7] + date_to[8:10]
date_range = substract_date(date_from, date_to)
except:
pass
if not date_range:
date_range.append(datetime.date.today().strftime("%Y%m%d"))
nb_decoded = {}
for decoder in all_decoder:
nb_decoded[decoder] = 0
for date in date_range:
for decoder in all_decoder:
if typ is None:
nb_decod = r_serv_metadata.get(decoder+'_decoded:'+date)
else:
nb_decod = r_serv_metadata.zscore(decoder+'_type:'+typ, date)
if nb_decod is not None:
nb_decoded[decoder] = nb_decoded[decoder] + int(nb_decod)
to_json = []
for decoder in all_decoder:
to_json.append({'name': decoder, 'value': nb_decoded[decoder]})
return jsonify(to_json)
@hashDecoded.route('/hashDecoded/top5_type_json')
@login_required
@ -881,7 +611,7 @@ def top5_type_json():
for date in date_range:
for typ in all_type:
for decoder in all_decoder:
nb_decoded = r_serv_metadata.zscore('{}_type:{}'.format(decoder, typ), date)
nb_decoded = r_serv_metadata.zscore(f'{decoder}_type:{typ}', date) # daily_type key:date mimetype 3
if nb_decoded is not None:
if typ in nb_types_decoded:
nb_types_decoded[typ] = nb_types_decoded[typ] + int(nb_decoded)
@ -1005,145 +735,6 @@ def hash_graph_line_json():
else:
return jsonify()
@hashDecoded.route('/hashDecoded/hash_graph_node_json')
@login_required
@login_read_only
def hash_graph_node_json():
hash = request.args.get('hash')
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
if hash is not None and estimated_type is not None:
nodes_set_hash = set()
nodes_set_paste = set()
links_set = set()
url = hash
first_seen = r_serv_metadata.hget('metadata_hash:'+hash, 'first_seen')
last_seen = r_serv_metadata.hget('metadata_hash:'+hash, 'last_seen')
nb_seen_in_paste = r_serv_metadata.hget('metadata_hash:'+hash, 'nb_seen_in_all_pastes')
size = r_serv_metadata.hget('metadata_hash:'+hash, 'size')
nodes_set_hash.add((hash, 1, first_seen, last_seen, estimated_type, nb_seen_in_paste, size, url))
#get related paste
l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1)
for paste in l_pastes:
# dynamic update
if PASTES_FOLDER in paste:
score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste)
r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste)
paste = paste.replace(PASTES_FOLDER, '', 1)
r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste)
url = paste
#nb_seen_in_this_paste = nb_in_file = int(r_serv_metadata.zscore('nb_seen_hash:'+hash, paste))
nb_hash_in_paste = r_serv_metadata.scard('hash_paste:'+paste)
nodes_set_paste.add((paste, 2,nb_hash_in_paste,url))
links_set.add((hash, paste))
l_hash = r_serv_metadata.smembers('hash_paste:'+paste)
for child_hash in l_hash:
if child_hash != hash:
url = child_hash
first_seen = r_serv_metadata.hget('metadata_hash:'+child_hash, 'first_seen')
last_seen = r_serv_metadata.hget('metadata_hash:'+child_hash, 'last_seen')
nb_seen_in_paste = r_serv_metadata.hget('metadata_hash:'+child_hash, 'nb_seen_in_all_pastes')
size = r_serv_metadata.hget('metadata_hash:'+child_hash, 'size')
estimated_type = r_serv_metadata.hget('metadata_hash:'+child_hash, 'estimated_type')
nodes_set_hash.add((child_hash, 3, first_seen, last_seen, estimated_type, nb_seen_in_paste, size, url))
links_set.add((child_hash, paste))
#l_pastes_child = r_serv_metadata.zrange('nb_seen_hash:'+child_hash, 0, -1)
#for child_paste in l_pastes_child:
nodes = []
for node in nodes_set_hash:
nodes.append({"id": node[0], "group": node[1], "first_seen": node[2], "last_seen": node[3], 'estimated_type': node[4], "nb_seen_in_paste": node[5], "size": node[6], 'icon': get_file_icon_text(node[4]),"url": url_for('hashDecoded.showHash', hash=node[7]), 'hash': True})
for node in nodes_set_paste:
nodes.append({"id": node[0], "group": node[1], "nb_seen_in_paste": node[2],"url": url_for('objects_item.showItem', id=node[3]), 'hash': False})
links = []
for link in links_set:
links.append({"source": link[0], "target": link[1]})
json = {"nodes": nodes, "links": links}
return jsonify(json)
else:
return jsonify({})
@hashDecoded.route('/hashDecoded/hash_types')
@login_required
@login_read_only
def hash_types():
date_from = 20180701
date_to = 20180706
return render_template('hash_types.html', date_from=date_from, date_to=date_to)
@hashDecoded.route('/hashDecoded/send_file_to_vt_js')
@login_required
@login_analyst
def send_file_to_vt_js():
hash = request.args.get('hash')
b64_full_path = Decoded.get_decoded_filepath(hash)
b64_content = ''
with open(b64_full_path, 'rb') as f:
b64_content = f.read()
files = {'file': (hash, b64_content)}
response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan', files=files, params= {'apikey': vt_auth})
json_response = response.json()
#print(json_response)
vt_link = json_response['permalink'].split('analysis')[0] + 'analysis/'
r_serv_metadata.hset('metadata_hash:'+hash, 'vt_link', vt_link)
vt_report = 'Please Refresh'
r_serv_metadata.hset('metadata_hash:'+hash, 'vt_report', vt_report)
return jsonify({'vt_link': vt_link, 'vt_report': vt_report})
@hashDecoded.route('/hashDecoded/update_vt_result')
@login_required
@login_analyst
def update_vt_result():
hash = request.args.get('hash')
params = {'apikey': vt_auth, 'resource': hash}
response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params)
if response.status_code == 200:
json_response = response.json()
response_code = json_response['response_code']
# report exist
if response_code == 1:
total = json_response['total']
positive = json_response['positives']
b64_vt_report = 'Detection {}/{}'.format(positive, total)
# no report found
elif response_code == 0:
b64_vt_report = 'No report found'
pass
# file in queue
elif response_code == -2:
b64_vt_report = 'File in queue'
pass
r_serv_metadata.hset('metadata_hash:'+hash, 'vt_report', b64_vt_report)
return jsonify(hash=hash, report_vt=b64_vt_report)
elif response.status_code == 403:
Flask_config.vt_enabled = False
print('Virustotal key is incorrect (e.g. for public API not for virustotal intelligence), authentication failed or reaching limits.')
return jsonify()
else:
# TODO FIXME make json response
return jsonify()
############################ PGPDump ############################
@hashDecoded.route('/decoded/pgp_by_type_json') ## TODO: REFRACTOR
@ -1191,7 +782,7 @@ def pgp_by_type_json():
else:
return jsonify()
############################ Correlation ############################
############################ DateRange ############################
@hashDecoded.route("/correlation/pgpdump", methods=['GET'])
@login_required
@login_read_only
@ -1258,22 +849,10 @@ def all_username_search():
show_decoded_files = request.form.get('show_decoded_files')
return redirect(url_for('hashDecoded.username_page', date_from=date_from, date_to=date_to, type_id=type_id, show_decoded_files=show_decoded_files))
# @hashDecoded.route('/correlation/show_pgpdump')
# @login_required
# @login_analyst
# def show_pgpdump():
# type_id = request.args.get('type_id')
# key_id = request.args.get('key_id')
# return show_correlation('pgpdump', type_id, key_id)
#
#
# @hashDecoded.route('/correlation/show_cryptocurrency')
# @login_required
# @login_analyst
# def show_cryptocurrency():
# type_id = request.args.get('type_id')
# key_id = request.args.get('key_id')
# return show_correlation('cryptocurrency', type_id, key_id)
@hashDecoded.route('/correlation/cryptocurrency_range_type_json')
@login_required
@ -1299,30 +878,16 @@ def username_range_type_json():
date_to = request.args.get('date_to')
return correlation_type_range_type_json('username', date_from, date_to)
@hashDecoded.route('/correlation/pgpdump_graph_node_json')
@login_required
@login_read_only
def pgpdump_graph_node_json():
type_id = request.args.get('type_id')
key_id = request.args.get('key_id')
return correlation_graph_node_json('pgpdump', type_id, key_id)
##########################################################################################
##########################################################################################
##########################################################################################
##########################################################################################
##########################################################################################
##########################################################################################
##########################################################################################
##########################################################################################
##########################################################################################
# # TODO: REFRACTOR
@hashDecoded.route('/correlation/cryptocurrency_graph_node_json')
@login_required
@login_read_only
def cryptocurrency_graph_node_json():
type_id = request.args.get('type_id')
key_id = request.args.get('key_id')
return correlation_graph_node_json('cryptocurrency', type_id, key_id)
@hashDecoded.route('/correlation/username_graph_node_json')
@login_required
@login_read_only
def username_graph_node_json():
type_id = request.args.get('type_id')
key_id = request.args.get('key_id')
return correlation_graph_node_json('username', type_id, key_id)
# # TODO: REFRACTOR
@hashDecoded.route('/correlation/pgpdump_graph_line_json')

View file

@ -253,7 +253,7 @@
chart.stackBarChart = barchart_type_stack("{{ url_for('hashDecoded.range_type_json') }}?date_from={{date_from}}&date_to={{date_to}}", 'id');
{% endif %}
draw_pie_chart("pie_chart_encoded" ,"{{ url_for('hashDecoded.decoder_type_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}", "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}&encoding=");
draw_pie_chart("pie_chart_encoded" ,"{{ url_for('objects_decoded.decoder_pie_chart_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}", "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}&encoding=");
draw_pie_chart("pie_chart_top5_types" ,"{{ url_for('hashDecoded.top5_type_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}", "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type=");
chart.onResize();
@ -280,18 +280,18 @@ function toggle_sidebar(){
<script>
function updateVTReport(hash) {
//updateReport
$.getJSON("{{ url_for('hashDecoded.update_vt_result') }}?hash="+hash,
$.getJSON("{{ url_for('objects_decoded.refresh_vt_report') }}?id="+hash,
function(data) {
content = '<i class="fas fa-sync-alt"></i> ' +data['report_vt']
content = '<i class="fas fa-sync-alt"></i> ' +data['report']
$( "#report_vt_"+hash ).html(content);
});
}
function sendFileToVT(hash) {
//send file to vt
$.getJSON("{{ url_for('hashDecoded.send_file_to_vt_js') }}?hash="+hash,
$.getJSON("{{ url_for('objects_decoded.send_to_vt') }}?id="+hash,
function(data) {
var content = '<a id="submit_vt_'+hash+'" class="btn btn-primary" target="_blank" href="'+ data['vt_link'] +'"><i class="fa fa-link"> '+ ' VT Report' +'</i></a>';
var content = '<a id="submit_vt_'+hash+'" class="btn btn-primary" target="_blank" href="'+ data['link'] +'"><i class="fa fa-link"> '+ ' VT Report' +'</i></a>';
$('#submit_vt_'+hash).remove();
$('darkbutton_'+hash).append(content);
});

View file

@ -521,7 +521,31 @@ def get_item_cryptocurrency_bitcoin():
'''
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # DOMAIN # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # CRAWLER # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@restApi.route("api/v1/crawl/domain", methods=['POST'])
@token_required('analyst')
def crawl_domain():
data = request.get_json()
url = data.get('url', None)
screenshot = data.get('screenshot', None)
har = data.get('har', None)
depth_limit = data.get('depth_limit', None)
max_pages = data.get('max_pages', None)
auto_crawler = data.get('auto_crawler', None)
crawler_delta = data.get('crawler_delta', None)
crawler_type = data.get('url', None)
cookiejar_uuid = data.get('url', None)
user_agent = data.get('url', None)
res = crawlers.api_create_crawler_task(json_dict)
res[0]['domain'] = domain
return create_json_response(res[0], res[1])
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # DOMAIN # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@restApi.route("api/v1/get/domain/status/minimal", methods=['POST'])
@token_required('analyst')
@ -654,6 +678,11 @@ def import_json_item():
res = importer.api_import_json_item(data_json)
return Response(json.dumps(res[0]), mimetype='application/json'), res[1]
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # CORE # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
#

View file

@ -84,7 +84,7 @@
Virus Total submission is disabled
{% endif %}
<a href="{{ url_for('hashDecoded.downloadHash') }}?hash={{ dict_object["correlation_id"] }}" target="blank" class="float-right" style="font-size: 15px">
<a href="{{ url_for('objects_decoded.decoded_download') }}?id={{ dict_object["correlation_id"] }}" target="blank" class="float-right" style="font-size: 15px">
<button class='btn btn-info'><i class="fas fa-download"></i> Download Decoded file
</button>
</a>
@ -107,9 +107,9 @@
<script>
function sendFileToVT(hash) {
//send file to vt
$.getJSON("{{ url_for('hashDecoded.send_file_to_vt_js') }}?hash="+hash,
$.getJSON("{{ url_for('objects_decoded.send_to_vt') }}?id="+hash,
function(data) {
var content = '<a id="submit_vt_b" class="btn btn-primary" target="_blank" href="'+ data['vt_link'] +'"><i class="fas fa-link"> '+ ' VT Report' +'</i></a>';
var content = '<a id="submit_vt_b" class="btn btn-primary" target="_blank" href="'+ data['link'] +'"><i class="fas fa-link"> '+ ' VT Report' +'</i></a>';
$('#submit_vt_b').remove();
$('darkbutton').append(content);
});
@ -117,9 +117,9 @@
function updateVTReport(hash) {
//updateReport
$.getJSON("{{ url_for('hashDecoded.update_vt_result') }}?hash="+hash,
$.getJSON("{{ url_for('objects_decoded.refresh_vt_report') }}?id="+hash,
function(data) {
var content = '<i class="fas fa-sync-alt"></i> ' +data['report_vt'];
var content = '<i class="fas fa-sync-alt"></i> ' +data['report'];
$( "#report_vt_b" ).html(content);
});
}