chg: [crawler + core + cve] migrate crawler to lacus + add new CVE object and correlation + migrate core

This commit is contained in:
Terrtia 2022-10-25 16:25:19 +02:00
parent eeff786ea5
commit 104eaae793
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
109 changed files with 4310 additions and 4551 deletions

View file

@ -1,457 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import re
import uuid
import json
import redis
import datetime
import time
import subprocess
import requests
from collections import deque
from pyfaup.faup import Faup
sys.path.append(os.environ['AIL_BIN'])
from Helper import Process
from pubsublogger import publisher
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import crawlers
# ======== FUNCTIONS ========
def load_blacklist(service_type):
try:
with open(os.environ['AIL_BIN']+'/torcrawler/blacklist_{}.txt'.format(service_type), 'r') as f:
redis_crawler.delete('blacklist_{}'.format(service_type))
lines = f.read().splitlines()
for line in lines:
redis_crawler.sadd('blacklist_{}'.format(service_type), line)
except Exception:
pass
def update_auto_crawler():
current_epoch = int(time.time())
list_to_crawl = redis_crawler.zrangebyscore('crawler_auto_queue', '-inf', current_epoch)
for elem_to_crawl in list_to_crawl:
mess, type = elem_to_crawl.rsplit(';', 1)
redis_crawler.sadd('{}_crawler_priority_queue'.format(type), mess)
redis_crawler.zrem('crawler_auto_queue', elem_to_crawl)
# Extract info form url (url, domain, domain url, ...)
def unpack_url(url):
to_crawl = {}
faup.decode(url)
url_unpack = faup.get()
# # FIXME: # TODO: remove me
try:
to_crawl['domain'] = url_unpack['domain'].decode()
except:
to_crawl['domain'] = url_unpack['domain']
to_crawl['domain'] = to_crawl['domain'].lower()
# force lower case domain/subdomain (rfc4343)
# # FIXME: # TODO: remove me
try:
url_host = url_unpack['host'].decode()
except:
url_host = url_unpack['host']
new_url_host = url_host.lower()
url_lower_case = url.replace(url_host, new_url_host, 1)
if url_unpack['scheme'] is None:
to_crawl['scheme'] = 'http'
url= 'http://{}'.format(url_lower_case)
else:
# # FIXME: # TODO: remove me
try:
scheme = url_unpack['scheme'].decode()
except Exception as e:
scheme = url_unpack['scheme']
if scheme in default_proto_map:
to_crawl['scheme'] = scheme
url = url_lower_case
else:
redis_crawler.sadd('new_proto', '{} {}'.format(scheme, url_lower_case))
to_crawl['scheme'] = 'http'
url= 'http://{}'.format(url_lower_case.replace(scheme, '', 1))
if url_unpack['port'] is None:
to_crawl['port'] = default_proto_map[to_crawl['scheme']]
else:
# # FIXME: # TODO: remove me
try:
port = url_unpack['port'].decode()
except:
port = url_unpack['port']
# Verify port number #################### make function to verify/correct port number
try:
int(port)
# Invalid port Number
except Exception as e:
port = default_proto_map[to_crawl['scheme']]
to_crawl['port'] = port
#if url_unpack['query_string'] is None:
# if to_crawl['port'] == 80:
# to_crawl['url']= '{}://{}'.format(to_crawl['scheme'], url_unpack['host'].decode())
# else:
# to_crawl['url']= '{}://{}:{}'.format(to_crawl['scheme'], url_unpack['host'].decode(), to_crawl['port'])
#else:
# to_crawl['url']= '{}://{}:{}{}'.format(to_crawl['scheme'], url_unpack['host'].decode(), to_crawl['port'], url_unpack['query_string'].decode())
to_crawl['url'] = url
if to_crawl['port'] == 80:
to_crawl['domain_url'] = '{}://{}'.format(to_crawl['scheme'], new_url_host)
else:
to_crawl['domain_url'] = '{}://{}:{}'.format(to_crawl['scheme'], new_url_host, to_crawl['port'])
# # FIXME: # TODO: remove me
try:
to_crawl['tld'] = url_unpack['tld'].decode()
except:
to_crawl['tld'] = url_unpack['tld']
return to_crawl
def get_crawler_config(redis_server, mode, service_type, domain, url=None):
crawler_options = {}
if mode=='auto':
config = redis_server.get('crawler_config:{}:{}:{}:{}'.format(mode, service_type, domain, url))
else:
config = redis_server.get('crawler_config:{}:{}:{}'.format(mode, service_type, domain))
if config is None:
config = {}
else:
config = json.loads(config)
for option in default_crawler_config:
if option in config:
crawler_options[option] = config[option]
else:
crawler_options[option] = default_crawler_config[option]
if mode == 'auto':
crawler_options['time'] = int(config['time'])
elif mode == 'manual':
redis_server.delete('crawler_config:{}:{}:{}'.format(mode, service_type, domain))
return crawler_options
def load_crawler_config(queue_type, service_type, domain, paste, url, date):
crawler_config = {}
crawler_config['splash_url'] = f'http://{splash_url}'
crawler_config['item'] = paste
crawler_config['service_type'] = service_type
crawler_config['domain'] = domain
crawler_config['date'] = date
if queue_type and queue_type != 'tor':
service_type = queue_type
# Auto and Manual Crawling
# Auto ################################################# create new entry, next crawling => here or when ended ?
if paste == 'auto':
crawler_config['crawler_options'] = get_crawler_config(redis_crawler, 'auto', service_type, domain, url=url)
crawler_config['requested'] = True
# Manual
elif paste == 'manual':
crawler_config['crawler_options'] = get_crawler_config(r_cache, 'manual', service_type, domain)
crawler_config['requested'] = True
# default crawler
else:
crawler_config['crawler_options'] = get_crawler_config(redis_crawler, 'default', service_type, domain)
crawler_config['requested'] = False
return crawler_config
def is_domain_up_day(domain, type_service, date_day):
if redis_crawler.sismember('{}_up:{}'.format(type_service, date_day), domain):
return True
else:
return False
def set_crawled_domain_metadata(type_service, date, domain, father_item):
# first seen
if not redis_crawler.hexists('{}_metadata:{}'.format(type_service, domain), 'first_seen'):
redis_crawler.hset('{}_metadata:{}'.format(type_service, domain), 'first_seen', date['date_day'])
redis_crawler.hset('{}_metadata:{}'.format(type_service, domain), 'paste_parent', father_item)
# last check
redis_crawler.hset('{}_metadata:{}'.format(type_service, domain), 'last_check', date['date_day'])
# Put message back on queue
def on_error_send_message_back_in_queue(type_service, domain, message):
if not redis_crawler.sismember('{}_domain_crawler_queue'.format(type_service), domain):
redis_crawler.sadd('{}_domain_crawler_queue'.format(type_service), domain)
redis_crawler.sadd('{}_crawler_priority_queue'.format(type_service), message)
def crawl_onion(url, domain, port, type_service, message, crawler_config):
crawler_config['url'] = url
crawler_config['port'] = port
print('Launching Crawler: {}'.format(url))
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'crawling_domain', domain)
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
retry = True
nb_retry = 0
while retry:
try:
r = requests.get(f'http://{splash_url}' , timeout=30.0)
retry = False
except Exception:
# TODO: relaunch docker or send error message
nb_retry += 1
if nb_retry == 2:
crawlers.restart_splash_docker(splash_url, splash_name)
time.sleep(20)
if nb_retry == 6:
on_error_send_message_back_in_queue(type_service, domain, message)
publisher.error('{} SPASH DOWN'.format(splash_url))
print('--------------------------------------')
print(' \033[91m DOCKER SPLASH DOWN\033[0m')
print(' {} DOWN'.format(splash_url))
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'SPLASH DOWN')
nb_retry == 0
print(' \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m')
print(' Retry({}) in 10 seconds'.format(nb_retry))
time.sleep(10)
if r.status_code == 200:
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'Crawling')
# save config in cash
UUID = str(uuid.uuid4())
r_cache.set('crawler_request:{}'.format(UUID), json.dumps(crawler_config))
process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', UUID],
stdout=subprocess.PIPE)
while process.poll() is None:
time.sleep(1)
if process.returncode == 0:
output = process.stdout.read().decode()
print(output)
# error: splash:Connection to proxy refused
if 'Connection to proxy refused' in output:
on_error_send_message_back_in_queue(type_service, domain, message)
publisher.error('{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format(splash_url))
print('------------------------------------------------------------------------')
print(' \033[91m SPLASH: Connection to proxy refused')
print('')
print(' PROXY DOWN OR BAD CONFIGURATION\033[0m'.format(splash_url))
print('------------------------------------------------------------------------')
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'Error')
exit(-2)
else:
crawlers.update_splash_manager_connection_status(True)
else:
print(process.stdout.read())
exit(-1)
else:
on_error_send_message_back_in_queue(type_service, domain, message)
print('--------------------------------------')
print(' \033[91m DOCKER SPLASH DOWN\033[0m')
print(' {} DOWN'.format(splash_url))
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'Crawling')
exit(1)
# check external links (full_crawl)
def search_potential_source_domain(type_service, domain):
external_domains = set()
for link in redis_crawler.smembers('domain_{}_external_links:{}'.format(type_service, domain)):
# unpack url
url_data = unpack_url(link)
if url_data['domain'] != domain:
if url_data['tld'] == 'onion' or url_data['tld'] == 'i2p':
external_domains.add(url_data['domain'])
# # TODO: add special tag ?
if len(external_domains) >= 20:
redis_crawler.sadd('{}_potential_source'.format(type_service), domain)
print('New potential source found: domain')
redis_crawler.delete('domain_{}_external_links:{}'.format(type_service, domain))
if __name__ == '__main__':
if len(sys.argv) != 2:
print('usage:', 'Crawler.py', 'splash_url')
exit(1)
##################################################
splash_url = sys.argv[1]
splash_name = crawlers.get_splash_name_by_url(splash_url)
proxy_name = crawlers.get_splash_proxy(splash_name)
crawler_type = crawlers.get_splash_crawler_type(splash_name)
print(f'SPLASH Name: {splash_name}')
print(f'Proxy Name: {proxy_name}')
print(f'Crawler Type: {crawler_type}')
#time.sleep(10)
#sys.exit(0)
#rotation_mode = deque(['onion', 'regular'])
all_crawler_queues = crawlers.get_crawler_queue_types_by_splash_name(splash_name)
rotation_mode = deque(all_crawler_queues)
print(rotation_mode)
default_proto_map = {'http': 80, 'https': 443}
######################################################## add ftp ???
publisher.port = 6380
publisher.channel = "Script"
publisher.info("Script Crawler started")
config_section = 'Crawler'
# Setup the I/O queues
p = Process(config_section)
print('splash url: {}'.format(splash_url))
r_cache = redis.StrictRedis(
host=p.config.get("Redis_Cache", "host"),
port=p.config.getint("Redis_Cache", "port"),
db=p.config.getint("Redis_Cache", "db"),
decode_responses=True)
redis_crawler = redis.StrictRedis(
host=p.config.get("ARDB_Onion", "host"),
port=p.config.getint("ARDB_Onion", "port"),
db=p.config.getint("ARDB_Onion", "db"),
decode_responses=True)
faup = crawlers.get_faup()
# get HAR files
default_crawler_har = p.config.getboolean("Crawler", "default_crawler_har")
if default_crawler_har:
default_crawler_har = True
else:
default_crawler_har = False
# get PNG files
default_crawler_png = p.config.getboolean("Crawler", "default_crawler_png")
if default_crawler_png:
default_crawler_png = True
else:
default_crawler_png = False
# Default crawler options
default_crawler_config = {'html': True,
'har': default_crawler_har,
'png': default_crawler_png,
'depth_limit': p.config.getint("Crawler", "crawler_depth_limit"),
'closespider_pagecount': p.config.getint("Crawler", "default_crawler_closespider_pagecount"),
'cookiejar_uuid': None,
'user_agent': p.config.get("Crawler", "default_crawler_user_agent")}
# Track launched crawler
r_cache.sadd('all_splash_crawlers', splash_url)
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'Waiting')
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
# update hardcoded blacklist
load_blacklist('onion')
load_blacklist('regular')
while True:
update_auto_crawler()
rotation_mode.rotate()
to_crawl = crawlers.get_elem_to_crawl_by_queue_type(rotation_mode)
if to_crawl:
url_data = unpack_url(to_crawl['url'])
# remove domain from queue
redis_crawler.srem('{}_domain_crawler_queue'.format(to_crawl['type_service']), url_data['domain'])
print()
print()
print('\033[92m------------------START CRAWLER------------------\033[0m')
print('crawler type: {}'.format(to_crawl['type_service']))
print('\033[92m-------------------------------------------------\033[0m')
print('url: {}'.format(url_data['url']))
print('domain: {}'.format(url_data['domain']))
print('domain_url: {}'.format(url_data['domain_url']))
print()
# Check blacklist
if not redis_crawler.sismember('blacklist_{}'.format(to_crawl['type_service']), url_data['domain']):
date = {'date_day': datetime.datetime.now().strftime("%Y%m%d"),
'date_month': datetime.datetime.now().strftime("%Y%m"),
'epoch': int(time.time())}
# Update crawler status type
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'type', to_crawl['type_service'])
crawler_config = load_crawler_config(to_crawl['queue_type'], to_crawl['type_service'], url_data['domain'], to_crawl['paste'], to_crawl['url'], date)
# check if default crawler
if not crawler_config['requested']:
# Auto crawl only if service not up this month
if redis_crawler.sismember('month_{}_up:{}'.format(to_crawl['type_service'], date['date_month']), url_data['domain']):
continue
set_crawled_domain_metadata(to_crawl['type_service'], date, url_data['domain'], to_crawl['paste'])
#### CRAWLER ####
# Manual and Auto Crawler
if crawler_config['requested']:
######################################################crawler strategy
# CRAWL domain
crawl_onion(url_data['url'], url_data['domain'], url_data['port'], to_crawl['type_service'], to_crawl['original_message'], crawler_config)
# Default Crawler
else:
# CRAWL domain
crawl_onion(url_data['domain_url'], url_data['domain'], url_data['port'], to_crawl['type_service'], to_crawl['original_message'], crawler_config)
#if url != domain_url and not is_domain_up_day(url_data['domain'], to_crawl['type_service'], date['date_day']):
# crawl_onion(url_data['url'], url_data['domain'], to_crawl['original_message'])
# Save last_status day (DOWN)
if not is_domain_up_day(url_data['domain'], to_crawl['type_service'], date['date_day']):
redis_crawler.sadd('{}_down:{}'.format(to_crawl['type_service'], date['date_day']), url_data['domain'])
# if domain was UP at least one time
if redis_crawler.exists('crawler_history_{}:{}:{}'.format(to_crawl['type_service'], url_data['domain'], url_data['port'])):
# add crawler history (if domain is down)
if not redis_crawler.zrangebyscore('crawler_history_{}:{}:{}'.format(to_crawl['type_service'], url_data['domain'], url_data['port']), date['epoch'], date['epoch']):
# Domain is down
redis_crawler.zadd('crawler_history_{}:{}:{}'.format(to_crawl['type_service'], url_data['domain'], url_data['port']), int(date['epoch']), int(date['epoch']))
############################
# extract page content
############################
# update list, last crawled domains
redis_crawler.lpush('last_{}'.format(to_crawl['type_service']), '{}:{};{}'.format(url_data['domain'], url_data['port'], date['epoch']))
redis_crawler.ltrim('last_{}'.format(to_crawl['type_service']), 0, 15)
#update crawler status
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'Waiting')
r_cache.hdel('metadata_crawler:{}'.format(splash_url), 'crawling_domain')
# Update crawler status type
r_cache.hdel('metadata_crawler:{}'.format(splash_url), 'type', to_crawl['type_service'])
# add next auto Crawling in queue:
if to_crawl['paste'] == 'auto':
redis_crawler.zadd('crawler_auto_queue', int(time.time()+crawler_config['crawler_options']['time']) , '{};{}'.format(to_crawl['original_message'], to_crawl['type_service']))
# update list, last auto crawled domains
redis_crawler.lpush('last_auto_crawled', '{}:{};{}'.format(url_data['domain'], url_data['port'], date['epoch']))
redis_crawler.ltrim('last_auto_crawled', 0, 9)
else:
print(' Blacklisted Domain')
print()
print()
else:
time.sleep(1)

View file

@ -119,11 +119,11 @@ def core_migration():
# Auto Export Migration
ail_misp = r_serv_db.get('ail:misp')
if ail_misp != 'True':
ail_misp == 'False'
ail_misp = 'False'
r_kvrocks.set('ail:misp', ail_misp)
ail_thehive = r_serv_db.get('ail:thehive')
if ail_thehive != 'True':
ail_thehive == 'False'
ail_thehive = 'False'
r_kvrocks.set('ail:thehive', ail_thehive)
@ -494,7 +494,7 @@ def domain_migration():
domain = Domains.Domain(dom)
domain.update_daterange(first_seen)
domain.update_daterange(last_check)
domain._set_ports(ports)
domain._set_ports(ports) # TODO ############################################################################
if last_origin:
domain.set_last_origin(last_origin)
for language in languages:
@ -520,13 +520,13 @@ def domain_migration():
epoch = history['epoch']
# DOMAIN DOWN
if not history.get('status'): # domain DOWN
domain.add_history(epoch, port)
domain.add_history(epoch)
print(f'DOWN {epoch}')
# DOMAIN UP
else:
root_id = history.get('root')
if root_id:
domain.add_history(epoch, port, root_item=root_id)
domain.add_history(epoch, root_item=root_id)
print(f'UP {root_id}')
crawled_items = get_crawled_items(dom, root_id)
for item_id in crawled_items:
@ -534,7 +534,7 @@ def domain_migration():
item_father = get_item_father(item_id)
if item_father and url:
print(f'{url} {item_id}')
domain.add_crawled_item(url, port, item_id, item_father)
domain.add_crawled_item(url, item_id, item_father)
#print()

View file

@ -18,17 +18,18 @@ import time
import re
import sys
from pubsublogger import publisher
from packages import Paste
from lib.objects.Items import Item
from Helper import Process
from ipaddress import IPv4Network, IPv4Address
# TODO REWRITE ME -> IMPROVE + MIGRATE TO MODULE
def search_ip(message):
paste = Paste.Paste(message)
content = paste.get_p_content()
item = Item(message)
content = item.get_content()
# regex to find IPs
reg_ip = re.compile(r'^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', flags=re.MULTILINE)
# list of the regex results in the Paste, may be null
# list of the regex results in the Item, may be null
results = reg_ip.findall(content)
matching_ips = []
@ -40,14 +41,13 @@ def search_ip(message):
matching_ips.append(address)
if len(matching_ips) > 0:
print('{} contains {} IPs'.format(paste.p_name, len(matching_ips)))
publisher.warning('{} contains {} IPs'.format(paste.p_name, len(matching_ips)))
print(f'{item.get_id()} contains {len(matching_ips)} IPs')
publisher.warning(f'{item.get_id()} contains {item.get_id()} IPs')
#Tag message with IP
msg = 'infoleak:automatic-detection="ip";{}'.format(message)
# Tag message with IP
msg = f'infoleak:automatic-detection="ip";{item.get_id()}'
p.populate_set_out(msg, 'Tags')
#Send to duplicate
p.populate_set_out(message, 'Duplicate')
if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)

View file

@ -40,7 +40,6 @@ is_ail_core=`screen -ls | egrep '[0-9]+.Core_AIL' | cut -d. -f1`
is_ail_2_ail=`screen -ls | egrep '[0-9]+.AIL_2_AIL' | cut -d. -f1`
isscripted=`screen -ls | egrep '[0-9]+.Script_AIL' | cut -d. -f1`
isflasked=`screen -ls | egrep '[0-9]+.Flask_AIL' | cut -d. -f1`
iscrawler=`screen -ls | egrep '[0-9]+.Crawler_AIL' | cut -d. -f1`
isfeeded=`screen -ls | egrep '[0-9]+.Feeder_Pystemon' | cut -d. -f1`
function helptext {
@ -126,6 +125,8 @@ function launching_logs {
screen -S "Logging_AIL" -X screen -t "LogScript" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Script -l ../logs/; read x"
sleep 0.1
screen -S "Logging_AIL" -X screen -t "LogScript" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Sync -l ../logs/; read x"
sleep 0.1
screen -S "Logging_AIL" -X screen -t "LogScript" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Crawler -l ../logs/; read x"
}
function launching_queues {
@ -174,8 +175,6 @@ function launching_scripts {
screen -S "Script_AIL" -X screen -t "JSON_importer" bash -c "cd ${AIL_BIN}/import; ${ENV_PY} ./JSON_importer.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Crawler_manager" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./Crawler_manager.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "D4_client" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./D4_client.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./DbCleaner.py; read x"
@ -202,6 +201,9 @@ function launching_scripts {
screen -S "Script_AIL" -X screen -t "SubmitPaste" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./submit_paste.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Crawler" bash -c "cd ${AIL_BIN}/crawlers; ${ENV_PY} ./Crawler.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Sync_module" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./Sync_module.py; read x"
sleep 0.1
@ -225,8 +227,6 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "Mail" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Mail.py; read x"
sleep 0.1
# screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./SentimentAnalysis.py; read x"
# sleep 0.1
screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./ModuleStats.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Onion" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Onion.py; read x"
@ -265,8 +265,12 @@ function launching_scripts {
##################################
# DISABLED MODULES #
##################################
#screen -S "Script_AIL" -X screen -t "Phone" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Phone.py; read x"
#sleep 0.1
# screen -S "Script_AIL" -X screen -t "Phone" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Phone.py; read x"
# sleep 0.1
# screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./SentimentAnalysis.py; read x"
# sleep 0.1
# screen -S "Script_AIL" -X screen -t "Release" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Release.py; read x"
# sleep 0.1
##################################
# #
@ -285,8 +289,6 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "IPAddress" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./IPAddress.py; read x"
#screen -S "Script_AIL" -X screen -t "Release" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Release.py; read x"
#sleep 0.1
}
@ -476,19 +478,19 @@ function launch_feeder {
}
function killscript {
if [[ $islogged || $isqueued || $is_ail_core || $isscripted || $isflasked || $isfeeded || $iscrawler || $is_ail_2_ail ]]; then
if [[ $islogged || $isqueued || $is_ail_core || $isscripted || $isflasked || $isfeeded || $is_ail_2_ail ]]; then
echo -e $GREEN"Killing Script"$DEFAULT
kill $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $iscrawler $is_ail_2_ail
kill $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $is_ail_2_ail
sleep 0.2
echo -e $ROSE`screen -ls`$DEFAULT
echo -e $GREEN"\t* $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $iscrawler $is_ail_2_ail killed."$DEFAULT
echo -e $GREEN"\t* $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $is_ail_2_ail killed."$DEFAULT
else
echo -e $RED"\t* No script to kill"$DEFAULT
fi
}
function killall {
if [[ $isredis || $isardb || $iskvrocks || $islogged || $isqueued || $is_ail_2_ail || $isscripted || $isflasked || $isfeeded || $iscrawler || $is_ail_core || $is_ail_2_ail ]]; then
if [[ $isredis || $isardb || $iskvrocks || $islogged || $isqueued || $is_ail_2_ail || $isscripted || $isflasked || $isfeeded || $is_ail_core || $is_ail_2_ail ]]; then
if [[ $isredis ]]; then
echo -e $GREEN"Gracefully closing redis servers"$DEFAULT
shutting_down_redis;
@ -503,10 +505,10 @@ function killall {
shutting_down_kvrocks;
fi
echo -e $GREEN"Killing all"$DEFAULT
kill $isredis $isardb $iskvrocks $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $iscrawler $is_ail_2_ail
kill $isredis $isardb $iskvrocks $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $is_ail_2_ail
sleep 0.2
echo -e $ROSE`screen -ls`$DEFAULT
echo -e $GREEN"\t* $isredis $isardb $iskvrocks $islogged $isqueued $isscripted $is_ail_2_ail $isflasked $isfeeded $iscrawler $is_ail_core killed."$DEFAULT
echo -e $GREEN"\t* $isredis $isardb $iskvrocks $islogged $isqueued $isscripted $is_ail_2_ail $isflasked $isfeeded $is_ail_core killed."$DEFAULT
else
echo -e $RED"\t* No screen to kill"$DEFAULT
fi

View file

@ -15,7 +15,7 @@ import json
import redis
import psutil
from subprocess import PIPE, Popen
from packages import Paste
from lib.objects.Items import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
@ -51,7 +51,7 @@ QUEUE_STATUS = {}
CPU_TABLE = {}
CPU_OBJECT_TABLE = {}
# Path of the current paste for a pid
# Path of the current item for a pid
COMPLETE_PASTE_PATH_PER_PID = {}
'''
@ -443,10 +443,10 @@ class Show_paste(Frame):
self.label_list[i]._text = ""
return
paste = Paste.Paste(COMPLETE_PASTE_PATH_PER_PID[current_selected_value])
old_content = paste.get_p_content()[0:4000] # Limit number of char to be displayed
item = Item(COMPLETE_PASTE_PATH_PER_PID[current_selected_value])
old_content = item.get_content()[0:4000] # Limit number of char to be displayed
#Replace unprintable char by ?
# Replace unprintable char by ?
content = ""
for i, c in enumerate(old_content):
if ord(c) > 127: # Used to avoid printing unprintable char
@ -456,7 +456,7 @@ class Show_paste(Frame):
else:
content += c
#Print in the correct label, END or more
# Print in the correct label, END or more
to_print = ""
i = 0
for line in content.split("\n"):
@ -472,7 +472,7 @@ class Show_paste(Frame):
self.label_list[i]._text = "- END of PASTE -"
i += 1
while i<self.num_label: #Clear out remaining lines
while i<self.num_label: # Clear out remaining lines
self.label_list[i]._text = ""
i += 1
@ -491,6 +491,7 @@ class Show_paste(Frame):
for i in range(2,self.num_label):
self.label_list[i]._text = ""
'''
END SCENES DEFINITION
'''

View file

@ -72,6 +72,7 @@ def sendEmailNotification(recipient, mail_subject, mail_body):
traceback.print_tb(err.__traceback__)
publisher.warning(err)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Test notification sender.')
parser.add_argument("addr", help="Test mail 'to' address")

View file

@ -180,9 +180,9 @@ if __name__ == '__main__':
key_id_str = 'Key ID - '
regex_key_id = '{}.+'.format(key_id_str)
regex_pgp_public_blocs = '-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----'
regex_pgp_signature = '-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----'
regex_pgp_message = '-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----'
regex_pgp_public_blocs = r'-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----'
regex_pgp_signature = r'-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----'
regex_pgp_message = r'-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----'
regex_tool_version = r"\bVersion:.*\n"
regex_block_comment = r"\bComment:.*\n"

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import time
from packages import Paste
from lib.objects.Items import Item
from pubsublogger import publisher
from Helper import Process
import re
@ -45,8 +45,8 @@ if __name__ == "__main__":
time.sleep(10)
continue
paste = Paste.Paste(filepath)
content = paste.get_p_content()
item = Item(filepath)
content = item.get_content()
#signal.alarm(max_execution_time)
try:
@ -54,16 +54,16 @@ if __name__ == "__main__":
if len(releases) == 0:
continue
to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_rel_path)
print(to_print)
if len(releases) > 30:
publisher.warning(to_print)
else:
publisher.info(to_print)
to_print = f'Release;{item.get_source()};{item.get_date()};{item.get_basename()};{len(releases)} releases;{item.get_id()}'
print(to_print)
if len(releases) > 30:
publisher.warning(to_print)
else:
publisher.info(to_print)
except TimeoutException:
p.incr_module_timeout_statistic()
print ("{0} processing timeout".format(paste.p_rel_path))
print(f"{item.get_id()} processing timeout")
continue
else:
signal.alarm(0)

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import time
from packages import Paste
from lib.objects.Items import Item
from pubsublogger import publisher
from Helper import Process
import re
@ -13,19 +13,19 @@ if __name__ == "__main__":
p = Process(config_section)
publisher.info("Finding Source Code")
critical = 0 # AS TO BE IMPORTANT, MIGHT BE REMOVED
critical = 0 # AS TO BE IMPORTANT, MIGHT BE REMOVED
#RELEVANTS LANGUAGES
shell = "[a-zA-Z0-9]+@[a-zA-Z0-9\-]+\:\~\$"
c = "\#include\ \<[a-z\/]+.h\>"
php = "\<\?php"
python = "import\ [\w]+"
bash = "#!\/[\w]*\/bash"
javascript = "function\(\)"
ruby = "require \ [\w]+"
adr = "0x[a-f0-9]{2}"
# RELEVANT LANGUAGES
shell = r"[a-zA-Z0-9]+@[a-zA-Z0-9\-]+\:\~\$"
c = r"\#include\ \<[a-z\/]+.h\>"
php = r"\<\?php"
python = r"import\ [\w]+"
bash = r"#!\/[\w]*\/bash"
javascript = r"function\(\)"
ruby = r"require \ [\w]+"
adr = r"0x[a-f0-9]{2}"
#asm = "\"((?s).{1}x[0-9a-f]{2}){3,}" ISSUES WITH FINDALL, pattern like \x54\xaf\x23\..
# asm = r"\"((?s).{1}x[0-9a-f]{2}){3,}" ISSUES WITH FINDALL, pattern like \x54\xaf\x23\..
languages = [shell, c, php, bash, python, javascript, bash, ruby, adr]
regex = '|'.join(languages)
@ -41,13 +41,13 @@ if __name__ == "__main__":
filepath, count = message.split()
paste = Paste.Paste(filepath)
content = paste.get_p_content()
item = Item(filepath)
content = item.get_content()
match_set = set(re.findall(regex, content))
if len(match_set) == 0:
continue
to_print = 'SourceCode;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message)
to_print = f'SourceCode;{item.get_source()};{item.get_date()};{item.get_basename()};{item.get_id()}'
if len(match_set) > critical:
publisher.warning(to_print)

View file

@ -1,68 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import ConfigLoader
import crawlers
config_loader = ConfigLoader.ConfigLoader()
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
# # TODO: lauch me in core screen
# # TODO: check if already launched in tor screen
# # TODO: handle mutltiple splash_manager
if __name__ == '__main__':
is_manager_connected = crawlers.ping_splash_manager()
if not is_manager_connected:
print('Error, Can\'t connect to Splash manager')
session_uuid = None
else:
print('Splash manager connected')
session_uuid = crawlers.get_splash_manager_session_uuid()
is_manager_connected = crawlers.reload_splash_and_proxies_list()
print(is_manager_connected)
if is_manager_connected:
if crawlers.test_ail_crawlers():
crawlers.relaunch_crawlers()
last_check = int(time.time())
while True:
# # TODO: avoid multiple ping
# check if manager is connected
if int(time.time()) - last_check > 60:
is_manager_connected = crawlers.is_splash_manager_connected()
current_session_uuid = crawlers.get_splash_manager_session_uuid()
# reload proxy and splash list
if current_session_uuid and current_session_uuid != session_uuid:
is_manager_connected = crawlers.reload_splash_and_proxies_list()
if is_manager_connected:
print('reload proxies and splash list')
if crawlers.test_ail_crawlers():
crawlers.relaunch_crawlers()
session_uuid = current_session_uuid
if not is_manager_connected:
print('Error, Can\'t connect to Splash manager')
last_check = int(time.time())
# # TODO: lauch crawlers if was never connected
# refresh splash and proxy list
elif False:
crawlers.reload_splash_and_proxies_list()
print('list of splash and proxies refreshed')
else:
time.sleep(5)
# kill/launch new crawler / crawler manager check if already launched
# # TODO: handle mutltiple splash_manager
# catch reload request

331
bin/crawlers/Crawler.py Executable file
View file

@ -0,0 +1,331 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from lib import crawlers
from lib.ConfigLoader import ConfigLoader
from lib.objects.Domains import Domain
from lib.objects import Screenshots
class Crawler(AbstractModule):
def __init__(self):
super(Crawler, self, ).__init__(logger_channel='Crawler')
# Waiting time in seconds between to message processed
self.pending_seconds = 1
config_loader = ConfigLoader()
self.r_log_submit = config_loader.get_redis_conn('Redis_Log_submit')
self.default_har = config_loader.get_config_boolean('Crawler', 'default_har')
self.default_screenshot = config_loader.get_config_boolean('Crawler', 'default_screenshot')
self.default_depth_limit = config_loader.get_config_int('Crawler', 'default_depth_limit')
# TODO: LIMIT MAX NUMBERS OF CRAWLED PAGES
# update hardcoded blacklist
crawlers.load_blacklist()
# update captures cache
crawlers.reload_crawler_captures()
# LACUS
self.lacus = crawlers.get_lacus()
# Capture
self.har = None
self.screenshot = None
self.root_item = None
self.har_dir = None
self.items_dir = None
self.domain = None
# Send module state to logs
self.redis_logger.info('Crawler initialized')
def print_crawler_start_info(self, url, domain, domain_url):
print()
print()
print('\033[92m------------------START CRAWLER------------------\033[0m')
print(f'crawler type: {domain}')
print('\033[92m-------------------------------------------------\033[0m')
print(f'url: {url}')
print(f'domain: {domain}')
print(f'domain_url: {domain_url}')
print()
def get_message(self):
# Check if a new Capture can be Launched
if crawlers.get_nb_crawler_captures() < crawlers.get_crawler_max_captures():
task_row = crawlers.get_crawler_task_from_queue()
if task_row:
print(task_row)
task_uuid, priority = task_row
self.enqueue_capture(task_uuid, priority)
# Check if a Capture is Done
capture = crawlers.get_crawler_capture()
if capture:
print(capture)
capture_uuid = capture[0][0]
capture_status = self.lacus.get_capture_status(capture_uuid)
if capture_status != crawlers.CaptureStatus.DONE: # TODO ADD GLOBAL TIMEOUT-> Save start time
crawlers.update_crawler_capture(capture_uuid)
print(capture_uuid, capture_status, int(time.time()))
else:
self.compute(capture_uuid)
crawlers.remove_crawler_capture(capture_uuid)
print('capture', capture_uuid, 'completed')
time.sleep(self.pending_seconds)
def enqueue_capture(self, task_uuid, priority):
task = crawlers.get_crawler_task(task_uuid)
print(task)
# task = {
# 'uuid': task_uuid,
# 'url': 'https://foo.be',
# 'domain': 'foo.be',
# 'depth': 1,
# 'har': True,
# 'screenshot': True,
# 'user_agent': crawlers.get_default_user_agent(),
# 'cookiejar': [],
# 'header': '',
# 'proxy': 'force_tor',
# 'parent': 'manual',
# }
url = task['url']
force = priority != 0
# TODO unpack cookiejar
# TODO HEADER
capture_uuid = self.lacus.enqueue(url=url,
depth=task['depth'],
user_agent=task['user_agent'],
proxy=task['proxy'],
cookies=[],
force=force,
general_timeout_in_sec=90)
crawlers.add_crawler_capture(task_uuid, capture_uuid)
print(task_uuid, capture_uuid, 'launched')
return capture_uuid
# CRAWL DOMAIN
# TODO: CATCH ERRORS
def compute(self, capture_uuid):
print('saving capture', capture_uuid)
task_uuid = crawlers.get_crawler_capture_task_uuid(capture_uuid)
task = crawlers.get_crawler_task(task_uuid)
print(task['domain'])
self.domain = Domain(task['domain'])
# TODO CHANGE EPOCH
epoch = int(time.time())
parent_id = task['parent']
print(task)
entries = self.lacus.get_capture(capture_uuid)
print(entries['status'])
self.har = task['har']
self.screenshot = task['screenshot']
str_date = crawlers.get_current_date(separator=True)
self.har_dir = crawlers.get_date_har_dir(str_date)
self.items_dir = crawlers.get_date_crawled_items_source(str_date)
self.root_item = None
# Save Capture
self.save_capture_response(parent_id, entries)
self.domain.update_daterange(str_date.replace('/', ''))
# Origin + History
if self.root_item:
# domain.add_ports(port)
self.domain.set_last_origin(parent_id)
self.domain.add_history(epoch, root_item=self.root_item)
elif self.domain.was_up():
self.domain.add_history(epoch, root_item=epoch)
crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch)
crawlers.clear_crawler_task(task_uuid, self.domain.get_domain_type())
def save_capture_response(self, parent_id, entries):
print(entries.keys())
if 'error' in entries:
# TODO IMPROVE ERROR MESSAGE
self.redis_logger.warning(str(entries['error']))
print(entries['error'])
if entries.get('html'):
print('retrieved content')
# print(entries.get('html'))
# TODO LOGS IF != domain
if 'last_redirected_url' in entries and entries['last_redirected_url']:
last_url = entries['last_redirected_url']
unpacked_last_url = crawlers.unpack_url(last_url)
current_domain = unpacked_last_url['domain']
# REDIRECTION TODO CHECK IF WEB
if current_domain != self.domain.id and not self.root_item:
self.redis_logger.warning(f'External redirection {self.domain.id} -> {current_domain}')
print(f'External redirection {self.domain.id} -> {current_domain}')
if not self.root_item:
self.domain = Domain(current_domain)
# TODO LAST URL
# FIXME
else:
last_url = f'http://{self.domain.id}'
if 'html' in entries and entries['html']:
item_id = crawlers.create_item_id(self.items_dir, self.domain.id)
print(item_id)
gzip64encoded = crawlers.get_gzipped_b64_item(item_id, entries['html'])
# send item to Global
relay_message = f'{item_id} {gzip64encoded}'
self.send_message_to_queue(relay_message, 'Mixer')
# increase nb of paste by feeder name
self.r_log_submit.hincrby('mixer_cache:list_feeder', 'crawler', 1)
# Tag
msg = f'infoleak:submission="crawler";{item_id}'
self.send_message_to_queue(msg, 'Tags')
crawlers.create_item_metadata(item_id, self.domain.id, last_url, parent_id)
if self.root_item is None:
self.root_item = item_id
parent_id = item_id
# SCREENSHOT
if self.screenshot:
if 'png' in entries and entries['png']:
screenshot = Screenshots.create_screenshot(entries['png'], b64=False)
if screenshot:
# Create Correlations
screenshot.add_correlation('item', '', item_id)
screenshot.add_correlation('domain', '', self.domain.id)
# HAR
if self.har:
if 'har' in entries and entries['har']:
crawlers.save_har(self.har_dir, item_id, entries['har'])
# Next Children
entries_children = entries.get('children')
if entries_children:
for children in entries_children:
self.save_capture_response(parent_id, children)
if __name__ == '__main__':
module = Crawler()
module.debug = True
# module.compute(('ooooo', 0))
module.run()
##################################
##################################
##################################
##################################
##################################
# from Helper import Process
# from pubsublogger import publisher
# ======== FUNCTIONS ========
# def update_auto_crawler():
# current_epoch = int(time.time())
# list_to_crawl = redis_crawler.zrangebyscore('crawler_auto_queue', '-inf', current_epoch)
# for elem_to_crawl in list_to_crawl:
# mess, type = elem_to_crawl.rsplit(';', 1)
# redis_crawler.sadd('{}_crawler_priority_queue'.format(type), mess)
# redis_crawler.zrem('crawler_auto_queue', elem_to_crawl)
# Extract info form url (url, domain, domain url, ...)
# def unpack_url(url):
# to_crawl = {}
# faup.decode(url)
# url_unpack = faup.get()
# to_crawl['domain'] = to_crawl['domain'].lower()
# new_url_host = url_host.lower()
# url_lower_case = url.replace(url_host, new_url_host, 1)
#
# if url_unpack['scheme'] is None:
# to_crawl['scheme'] = 'http'
# url= 'http://{}'.format(url_lower_case)
# else:
# try:
# scheme = url_unpack['scheme'].decode()
# except Exception as e:
# scheme = url_unpack['scheme']
# if scheme in default_proto_map:
# to_crawl['scheme'] = scheme
# url = url_lower_case
# else:
# redis_crawler.sadd('new_proto', '{} {}'.format(scheme, url_lower_case))
# to_crawl['scheme'] = 'http'
# url= 'http://{}'.format(url_lower_case.replace(scheme, '', 1))
#
# if url_unpack['port'] is None:
# to_crawl['port'] = default_proto_map[to_crawl['scheme']]
# else:
# try:
# port = url_unpack['port'].decode()
# except:
# port = url_unpack['port']
# # Verify port number #################### make function to verify/correct port number
# try:
# int(port)
# # Invalid port Number
# except Exception as e:
# port = default_proto_map[to_crawl['scheme']]
# to_crawl['port'] = port
#
# #if url_unpack['query_string'] is None:
# # if to_crawl['port'] == 80:
# # to_crawl['url']= '{}://{}'.format(to_crawl['scheme'], url_unpack['host'].decode())
# # else:
# # to_crawl['url']= '{}://{}:{}'.format(to_crawl['scheme'], url_unpack['host'].decode(), to_crawl['port'])
# #else:
# # to_crawl['url']= '{}://{}:{}{}'.format(to_crawl['scheme'], url_unpack['host'].decode(), to_crawl['port'], url_unpack['query_string'].decode())
#
# to_crawl['url'] = url
# if to_crawl['port'] == 80:
# to_crawl['domain_url'] = '{}://{}'.format(to_crawl['scheme'], new_url_host)
# else:
# to_crawl['domain_url'] = '{}://{}:{}'.format(to_crawl['scheme'], new_url_host, to_crawl['port'])
#
# try:
# to_crawl['tld'] = url_unpack['tld'].decode()
# except:
# to_crawl['tld'] = url_unpack['tld']
#
# return to_crawl
# ##################################################### add ftp ???
# update_auto_crawler()
# # add next auto Crawling in queue:
# if to_crawl['paste'] == 'auto':
# redis_crawler.zadd('crawler_auto_queue', int(time.time()+crawler_config['crawler_options']['time']) , '{};{}'.format(to_crawl['original_message'], to_crawl['type_service']))
# # update list, last auto crawled domains
# redis_crawler.lpush('last_auto_crawled', '{}:{};{}'.format(url_data['domain'], url_data['port'], date['epoch']))
# redis_crawler.ltrim('last_auto_crawled', 0, 9)
#

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
from packages import Paste
from lib.objects.Items import Item
from Helper import Process
import os
@ -12,11 +12,13 @@ import configparser
from collections import defaultdict
# TODO FIX ME OR REMOVE ME
def get_dict_cve(list_paste_cve, only_one_same_cve_by_paste=False):
dict_keyword = {}
for paste_cve in list_paste_cve:
paste_content = Paste.Paste(paste_cve).get_p_content()
paste_content = Item(paste_cve).get_content()
cve_list = reg_cve.findall(paste_content)
if only_one_same_cve_by_paste:

View file

@ -35,17 +35,17 @@ class ConfigLoader(object):
else:
self.cfg.read(default_config_file)
def get_redis_conn(self, redis_name, decode_responses=True): ## TODO: verify redis name
return redis.StrictRedis( host=self.cfg.get(redis_name, "host"),
def get_redis_conn(self, redis_name, decode_responses=True):
return redis.StrictRedis(host=self.cfg.get(redis_name, "host"),
port=self.cfg.getint(redis_name, "port"),
db=self.cfg.getint(redis_name, "db"),
decode_responses=decode_responses )
decode_responses=decode_responses)
def get_db_conn(self, db_name, decode_responses=True): ## TODO: verify redis name
return redis.StrictRedis( host=self.cfg.get(db_name, "host"),
def get_db_conn(self, db_name, decode_responses=True):
return redis.StrictRedis(host=self.cfg.get(db_name, "host"),
port=self.cfg.getint(db_name, "port"),
password=self.cfg.get(db_name, "password"),
decode_responses=decode_responses )
decode_responses=decode_responses)
def get_files_directory(self, key_name):
directory_path = self.cfg.get('Directories', key_name)
@ -79,3 +79,33 @@ class ConfigLoader(object):
return all_keys_values
else:
return []
# # # # Directory Config # # # #
config_loader = ConfigLoader()
ITEMS_FOLDER = config_loader.get_config_str("Directories", "pastes")
if ITEMS_FOLDER == 'PASTES':
ITEMS_FOLDER = os.path.join(os.environ['AIL_HOME'], ITEMS_FOLDER)
ITEMS_FOLDER = ITEMS_FOLDER + '/'
ITEMS_FOLDER = os.path.join(os.path.realpath(ITEMS_FOLDER), '')
HARS_DIR = config_loader.get_files_directory('har')
if HARS_DIR == 'CRAWLED_SCREENSHOT':
HARS_DIR = os.path.join(os.environ['AIL_HOME'], HARS_DIR)
SCREENSHOTS_FOLDER = config_loader.get_files_directory('screenshot')
if SCREENSHOTS_FOLDER == 'CRAWLED_SCREENSHOT/screenshot':
SCREENSHOTS_FOLDER = os.path.join(os.environ['AIL_HOME'], SCREENSHOTS_FOLDER)
config_loader = None
def get_hars_dir():
return HARS_DIR
def get_items_dir():
return ITEMS_FOLDER
def get_screenshots_dir():
return SCREENSHOTS_FOLDER

View file

@ -58,7 +58,6 @@ def get_item_stats_nb_by_date():
def _set_item_stats_nb_by_date(date, source):
return r_statistics.zrange(f'providers_set_{date}', )
# # TODO: load ZSET IN CACHE => FAST UPDATE
def update_item_stats_size_nb(item_id, source, size, date):
# Add/Update in Redis
@ -106,7 +105,7 @@ def update_module_stats(module_name, num, keyword, date):
# check if this keyword is eligible for progression
keyword_total_sum = 0
curr_value = r_statistics.hget(date, module+'-'+keyword)
curr_value = r_statistics.hget(date, f'{module_name}-{keyword}')
keyword_total_sum += int(curr_value) if curr_value is not None else 0
if r_statistics.zcard(f'top_{module_name}_set_{date}') < PIE_CHART_MAX_CARDINALITY:

View file

@ -22,7 +22,7 @@ def get_ail_uuid():
# # TODO: check change paste => item
def get_all_objects():
return ['domain', 'item', 'pgp', 'cryptocurrency', 'decoded', 'screenshot', 'username']
return ['cve', 'domain', 'item', 'pgp', 'cryptocurrency', 'decoded', 'screenshot', 'username']
def get_object_all_subtypes(obj_type):
if obj_type == 'cryptocurrency':

View file

@ -43,12 +43,13 @@ config_loader = None
CORRELATION_TYPES_BY_OBJ = {
"cryptocurrency" : ["domain", "item"],
"decoded" : ["domain", "item"],
"domain": ["cryptocurrency", "decoded", "item", "pgp", "username", "screenshot"],
"item": ["cryptocurrency", "decoded", "domain", "pgp", "username", "screenshot"],
"cve": ["domain", "item"],
"decoded": ["domain", "item"],
"domain": ["cve", "cryptocurrency", "decoded", "item", "pgp", "username", "screenshot"],
"item": ["cve", "cryptocurrency", "decoded", "domain", "pgp", "username", "screenshot"],
"pgp" : ["domain", "item"],
"username" : ["domain", "item"],
"screenshot" : ["domain", "item"],
"username": ["domain", "item"],
"screenshot": ["domain", "item"],
}
def get_obj_correl_types(obj_type):

File diff suppressed because it is too large Load diff

View file

@ -8,7 +8,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import ConfigLoader
config_loader = ConfigLoader.ConfigLoader()
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
r_serv_db = config_loader.get_db_conn("Kvrocks_Objects")
config_loader = None
def get_first_object_date(object_type, subtype, field=''):
@ -24,15 +24,15 @@ def get_last_object_date(object_type, subtype, field=''):
return int(last_date)
def _set_first_object_date(object_type, subtype, date, field=''):
return r_serv_db.zadd('objs:first_date', f'{object_type}:{subtype}:{field}', date)
return r_serv_db.zadd('objs:first_date', {f'{object_type}:{subtype}:{field}': date})
def _set_last_object_date(object_type, subtype, date, field=''):
return r_serv_db.zadd('objs:last_date', f'{object_type}:{subtype}:{field}', date)
return r_serv_db.zadd('objs:last_date', {f'{object_type}:{subtype}:{field}': float(date)})
def update_first_object_date(object_type, subtype, date, field=''):
first_date = get_first_object_date(object_type, subtype, field=field)
if int(date) < first_date:
_set_first_object_date(object_typel, subtype, date, field=field)
_set_first_object_date(object_type, subtype, date, field=field)
return date
else:
return first_date

View file

@ -7,15 +7,15 @@ import gzip
import magic
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import Tag
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ConfigLoader
from lib import Tag
config_loader = ConfigLoader.ConfigLoader()
# get and sanityze PASTE DIRECTORY
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '')
r_cache = config_loader.get_redis_conn("Redis_Cache")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
@ -28,15 +28,15 @@ def exist_item(item_id):
return False
def get_item_filepath(item_id):
filename = os.path.join(PASTES_FOLDER, item_id)
filename = os.path.join(ConfigLoader.get_items_dir(), item_id)
return os.path.realpath(filename)
def get_item_date(item_id, add_separator=False):
l_directory = item_id.split('/')
l_dir = item_id.split('/')
if add_separator:
return '{}/{}/{}'.format(l_directory[-4], l_directory[-3], l_directory[-2])
return f'{l_dir[-4]}/{l_dir[-3]}/{l_dir[-2]}'
else:
return '{}{}{}'.format(l_directory[-4], l_directory[-3], l_directory[-2])
return f'{l_dir[-4]}{l_dir[-3]}{l_dir[-2]}'
def get_basename(item_id):
return os.path.basename(item_id)
@ -53,17 +53,17 @@ def get_item_domain(item_id):
return item_id[19:-36]
def get_item_content_binary(item_id):
item_full_path = os.path.join(PASTES_FOLDER, item_id)
item_full_path = os.path.join(ConfigLoader.get_items_dir(), item_id)
try:
with gzip.open(item_full_path, 'rb') as f:
item_content = f.read()
except Exception as e:
print(e)
item_content = ''
item_content = b''
return item_content
def get_item_content(item_id):
item_full_path = os.path.join(PASTES_FOLDER, item_id)
item_full_path = os.path.join(ConfigLoader.get_items_dir(), item_id)
try:
item_content = r_cache.get(item_full_path)
except UnicodeDecodeError:
@ -84,7 +84,7 @@ def get_item_content(item_id):
def get_item_mimetype(item_id):
return magic.from_buffer(get_item_content(item_id), mime=True)
#### TREE CHILD/FATHER ####
# # # # TREE CHILD/FATHER # # # #
def is_father(item_id):
return r_serv_metadata.exists('paste_children:{}'.format(item_id))
@ -127,6 +127,18 @@ def is_domain_root(item_id):
def get_item_url(item_id):
return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'real_link')
def get_item_har(item_id):
har = '/'.join(item_id.rsplit('/')[-4:])
har = f'{har}.json'
path = os.path.join(ConfigLoader.get_hars_dir(), har)
if os.path.isfile(path):
return har
def get_item_har_content(har):
with open(har, 'rb') as f:
har_content = f.read()
return har_content
def get_nb_children(item_id):
return r_serv_metadata.scard('paste_children:{}'.format(item_id))
@ -140,14 +152,14 @@ def get_item_children(item_id):
# # TODO: handle domain last origin in domain lib
def _delete_node(item_id):
# only if item isn't deleted
#if is_crawled(item_id):
# if is_crawled(item_id):
# r_serv_metadata.hrem('paste_metadata:{}'.format(item_id), 'real_link')
for children_id in get_item_children(item_id):
r_serv_metadata.hdel('paste_metadata:{}'.format(children_id), 'father')
r_serv_metadata.delete('paste_children:{}'.format(item_id))
# delete regular
# simple if leaf
# simple if leaf
# delete item node
@ -210,7 +222,7 @@ def _get_dir_source_name(directory, source_name=None, l_sources_name=set(), filt
else:
for src_name in l_dir:
if len(src_name) == 4:
#try:
# try:
int(src_name)
to_add = os.path.join(source_name)
# filter sources, remove first directory
@ -218,7 +230,7 @@ def _get_dir_source_name(directory, source_name=None, l_sources_name=set(), filt
to_add = to_add.replace('archive/', '').replace('alerts/', '')
l_sources_name.add(to_add)
return l_sources_name
#except:
# except:
# pass
if source_name:
src_name = os.path.join(source_name, src_name)
@ -227,7 +239,7 @@ def _get_dir_source_name(directory, source_name=None, l_sources_name=set(), filt
def get_all_items_sources(filter_dir=False, r_list=False):
res = _get_dir_source_name(PASTES_FOLDER, filter_dir=filter_dir)
res = _get_dir_source_name(ConfigLoader.get_items_dir(), filter_dir=filter_dir)
if res:
if r_list:
res = list(res)

View file

@ -52,9 +52,9 @@ class CryptoCurrency(AbstractSubtypeObject):
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', object_type=self.type, type_id=self.subtype, correlation_id=self.id)
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
else:
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&type_id={self.subtype}&correlation_id={self.id}'
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
return url
def get_svg_icon(self):
@ -89,7 +89,11 @@ class CryptoCurrency(AbstractSubtypeObject):
return obj
def get_meta(self, options=set()):
return self._get_meta()
meta = self._get_meta()
meta['id'] = self.id
meta['subtype'] = self.subtype
meta['tags'] = self.get_tags()
return meta

99
bin/lib/objects/Cves.py Executable file
View file

@ -0,0 +1,99 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
from flask import url_for
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_daterange_object import AbstractDaterangeObject
from packages import Date
config_loader = ConfigLoader()
r_objects = config_loader.get_db_conn("Kvrocks_Objects")
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None
################################################################################
################################################################################
################################################################################
# # TODO: COMPLETE CLASS
class Cve(AbstractDaterangeObject):
"""
AIL Cve Object.
"""
def __init__(self, id):
super(Cve, self).__init__('cve', id)
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True),
# 'compress': 'gzip'}
# return payload
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
# # TODO:
pass
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url
# TODO # CHANGE COLOR
def get_svg_icon(self):
return {'style': 'fas', 'icon': '\uf188', 'color': '#1E88E5', 'radius': 5}
# TODO # TODO # TODO # TODO # TODO # TODO # TODO # TODO
def get_misp_object(self):
pass
def get_meta(self, options=set()):
meta = self._get_meta(options=options)
meta['id'] = self.id
meta['subtype'] = self.subtype
meta['tags'] = self.get_tags()
return meta
def add(self, date, item_id):
self._add(date, item_id)
# TODO # TODO # TODO # TODO # TODO # TODO # TODO # TODO # TODO # TODO
def get_all_cves():
cves = []
return cves
def get_cves_by_date(date):
# return r_objects.zrange(f'cve:date:{date}', 0, -1)
return set(r_objects.hkeys(f'cve:date:{date}'))
def get_cves_by_daterange(date_from, date_to):
cves = set()
for date in Date.substract_date(date_from, date_to):
cves | get_cves_by_date(date)
return cves
def get_cves_meta(cves_id, options=set()):
dict_cve = {}
for cve_id in cves_id:
cve = Cve(cve_id)
dict_cve[cve_id] = cve.get_meta(options=options)
return dict_cve
def api_get_cves_meta_by_daterange(date_from, date_to):
date = Date.sanitise_date_range(date_from, date_to)
return get_cves_meta(get_cves_by_daterange(date['date_from'], date['date_to']), options=['sparkline'])
# if __name__ == '__main__':

View file

@ -69,9 +69,9 @@ class Decoded(AbstractObject):
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', object_type="decoded", correlation_id=self.id)
url = url_for('correlation.show_correlation', type="decoded", id=self.id)
else:
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&correlation_id={self.id}'
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url
def get_svg_icon(self):
@ -90,7 +90,7 @@ class Decoded(AbstractObject):
return {'style': 'fas', 'icon': icon, 'color': '#88CCEE', 'radius':5}
'''
Return the estimed type of a given decoded item.
Return the estimated type of a given decoded item.
:param sha1_string: sha1_string
'''
@ -170,8 +170,11 @@ class Decoded(AbstractObject):
if date > last_seen:
self.set_last_seen(date)
def get_meta(self):
pass
def get_meta(self, options=set()):
meta = {'id': self.id,
'subtype': self.subtype,
'tags': self.get_tags()}
return meta
def get_meta_vt(self):
meta = {}
@ -209,7 +212,7 @@ class Decoded(AbstractObject):
def is_seen_this_day(self, date):
for decoder in get_decoders_names():
if r_metadata.zscore(f'{decoder_name}_date:{date}', self.id):
if r_metadata.zscore(f'{decoder}_date:{date}', self.id):
return True
return False
@ -324,6 +327,9 @@ class Decoded(AbstractObject):
#######################################################################################
#######################################################################################
def is_vt_enabled(self):
return VT_ENABLED
def set_vt_report(self, report):
r_metadata.hset(f'metadata_hash:{self.id}', 'vt_report', report)
@ -354,7 +360,6 @@ class Decoded(AbstractObject):
print(report)
return report
elif response.status_code == 403:
Flask_config.vt_enabled = False
return 'Virustotal key is incorrect (e.g. for public API not for virustotal intelligence), authentication failed'
elif response.status_code == 204:
return 'Rate Limited'

View file

@ -4,18 +4,31 @@
import os
import sys
import time
import zipfile
from datetime import datetime
from flask import url_for
from io import BytesIO
from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
from lib.ConfigLoader import ConfigLoader
##################################
# Import Project packages
##################################
from lib import ConfigLoader
from lib.objects.abstract_object import AbstractObject
from lib.item_basic import get_item_children, get_item_date, get_item_url
from lib.item_basic import get_item_children, get_item_date, get_item_url, get_item_har
from lib import data_retention_engine
config_loader = ConfigLoader()
r_onion = config_loader.get_redis_conn("ARDB_Onion")
from packages import Date
config_loader = ConfigLoader.ConfigLoader()
r_crawler = config_loader.get_db_conn("Kvrocks_Crawler")
r_metadata = config_loader.get_redis_conn("ARDB_Metadata") ######################################
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None
@ -42,36 +55,37 @@ class Domain(AbstractObject):
if str(self.id).endswith('.onion'):
return 'onion'
else:
return 'regular'
return 'web'
def exists(self):
return r_onion.exists(f'{self.domain_type}_metadata:{self.id}')
return r_crawler.exists(f'domain:meta:{self.id}')
def get_first_seen(self, r_int=False, separator=True):
first_seen = r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'first_seen')
first_seen = r_crawler.hget(f'domain:meta:{self.id}', 'first_seen')
if first_seen:
if separator:
first_seen = f'{first_seen[0:4]}/{first_seen[4:6]}/{first_seen[6:8]}'
elif r_int==True:
if r_int:
first_seen = int(first_seen)
elif separator:
first_seen = f'{first_seen[0:4]}/{first_seen[4:6]}/{first_seen[6:8]}'
return first_seen
def get_last_check(self, r_int=False, separator=True):
last_check = r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'last_check')
last_check = r_crawler.hget(f'domain:meta:{self.id}', 'last_check')
if last_check is not None:
if separator:
last_check = f'{last_check[0:4]}/{last_check[4:6]}/{last_check[6:8]}'
elif r_format=="int":
if r_int:
last_check = int(last_check)
elif separator:
last_check = f'{last_check[0:4]}/{last_check[4:6]}/{last_check[6:8]}'
return last_check
def _set_first_seen(self, date):
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'first_seen', date)
r_crawler.hset(f'domain:meta:{self.id}', 'first_seen', date)
def _set_last_check(self, date):
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'last_check', date)
r_crawler.hset(f'domain:meta:{self.id}', 'last_check', date)
def update_daterange(self, date):
date = int(date)
first_seen = self.get_first_seen(r_int=True)
last_check = self.get_last_check(r_int=True)
if not first_seen:
@ -82,65 +96,101 @@ class Domain(AbstractObject):
elif int(last_check) < date:
self._set_last_check(date)
def get_last_origin(self):
return r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'paste_parent')
def get_last_origin(self, obj=False):
origin = {'item': r_crawler.hget(f'domain:meta:{self.id}', 'last_origin')}
if obj and origin['item']:
if origin['item'] != 'manual' and origin['item'] != 'auto':
item_id = origin['item']
origin['domain'] = r_metadata.hget(f'paste_metadata:{item_id}', 'domain')
origin['url'] = r_metadata.hget(f'paste_metadata:{item_id}', 'url')
return origin
def set_last_origin(self, origin_id):
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'paste_parent', origin_id)
r_crawler.hset(f'domain:meta:{self.id}', 'last_origin', origin_id)
def is_up(self, ports=[]):
if not ports:
ports = self.get_ports()
for port in ports:
res = r_onion.zrevrange(f'crawler_history_{self.domain_type}:{self.id}:{port}', 0, 0, withscores=True)
if res:
item_core, epoch = res[0]
try:
epoch = int(item_core)
except:
print('True')
return True
print('False')
def is_up(self):
res = r_crawler.zrevrange(f'domain:history:{self.id}', 0, 0, withscores=True)
if res:
item_core, epoch = res[0]
try:
int(item_core)
except ValueError:
return True
return False
def was_up(self):
return r_onion.hexists(f'{self.domain_type}_metadata:{self.id}', 'ports')
return r_crawler.exists(f'domain:history:{self.id}')
def is_up_by_month(self, date_month):
# FIXME DIRTY PATCH
if r_crawler.exists(f'month_{self.domain_type}_up:{date_month}'):
return r_crawler.sismember(f'month_{self.domain_type}_up:{date_month}', self.get_id())
else:
return False
def is_up_this_month(self):
date_month = datetime.now().strftime("%Y%m")
return self.is_up_by_month(date_month)
def is_down_by_day(self, date):
# FIXME DIRTY PATCH
if r_crawler.exists(f'{self.domain_type}_down:{date}'):
return r_crawler.sismember(f'{self.domain_type}_down:{date}', self.id)
else:
return False
def is_down_today(self):
date = datetime.now().strftime("%Y%m%d")
return self.is_down_by_day(date)
def is_up_by_epoch(self, epoch):
history = r_crawler.zrevrangebyscore(f'domain:history:{self.id}', int(epoch), int(epoch))
if not history:
return False
else:
history = history[0]
try:
int(history)
return False
except ValueError:
return True
def get_ports(self, r_set=False):
l_ports = r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'ports')
l_ports = r_crawler.hget(f'domain:meta:{self.id}', 'ports')
if l_ports:
l_ports = l_ports.split(";")
if r_set:
return set(l_ports)
else:
return l_ports
return []
else:
l_ports = []
if r_set:
return set(l_ports)
else:
return l_ports
def _set_ports(self, ports):
ports = ';'.join(ports)
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'ports', ports)
ports = ';'.join(str(p) for p in ports)
r_crawler.hset(f'domain:meta:{self.id}', 'ports', ports)
def add_ports(self, port):
ports = self.get_ports(r_set=True)
ports.add(port)
self._set_ports(ports)
def get_history_by_port(self, port, status=False, root=False):
'''
def get_history(self, status=False, root=False):
"""
Return .
:return:
:rtype: list of tuple (item_core, epoch)
'''
history_tuple = r_onion.zrange(f'crawler_history_{self.domain_type}:{self.id}:{port}', 0, -1, withscores=True)
"""
history_tuple = r_crawler.zrange(f'domain:history:{self.id}', 0, -1, withscores=True)
history = []
for root_id, epoch in history_tuple:
dict_history = {}
epoch = int(epoch) # force int
epoch = int(epoch) # force int
dict_history["epoch"] = epoch
dict_history["date"] = time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(epoch_val))
dict_history["date"] = time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(epoch))
try:
int(root_item)
int(root_id)
if status:
dict_history['status'] = False
except ValueError:
@ -152,30 +202,31 @@ class Domain(AbstractObject):
return history
def get_languages(self):
return r_onion.smembers(f'domain:language:{self.id}')
return r_crawler.smembers(f'domain:language:{self.id}')
def get_meta_keys(self):
return ['type', 'first_seen', 'last_check', 'last_origin', 'ports', 'status', 'tags', 'languages']
# options: set of optional meta fields
def get_meta(self, options=set()):
meta = {}
meta['type'] = self.domain_type
meta['first_seen'] = self.get_first_seen()
meta['last_check'] = self.get_last_check()
meta['tags'] = self.get_tags(r_list=True)
meta['ports'] = self.get_ports()
meta['status'] = self.is_up(ports=meta['ports'])
meta = {'type': self.domain_type,
'id': self.id,
'domain': self.id, # TODO Remove me -> Fix templates
'first_seen': self.get_first_seen(),
'last_check': self.get_last_check(),
'tags': self.get_tags(r_list=True),
'status': self.is_up()
}
# meta['ports'] = self.get_ports()
if 'last_origin' in options:
meta['last_origin'] = self.get_last_origin()
#meta['is_tags_safe'] = ##################################
meta['last_origin'] = self.get_last_origin(obj=True)
# meta['is_tags_safe'] = ##################################
if 'languages' in options:
meta['languages'] = self.get_languages()
#meta['screenshot'] =
# meta['screenshot'] =
return meta
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
# # TODO:
@ -196,12 +247,12 @@ class Domain(AbstractObject):
else:
style = 'fab'
icon = '\uf13b'
return {'style': style, 'icon': icon, 'color':color, 'radius':5}
return {'style': style, 'icon': icon, 'color': color, 'radius': 5}
def is_crawled_item(self, item_id):
domain_lenght = len(self.id)
if len(item_id) > (domain_lenght+48):
if item_id[-36-domain_lenght:-36] == self.id:
domain_length = len(self.id)
if len(item_id) > (domain_length+48):
if item_id[-36-domain_length:-36] == self.id:
return True
return False
@ -215,169 +266,231 @@ class Domain(AbstractObject):
for item_id in get_item_children(root_id):
if self.is_crawled_item(item_id):
crawled_items.append(item_id)
crawled_items.extend(self.get_crawled_items_children(self.id, item_id))
crawled_items.extend(self.get_crawled_items_children(item_id))
return crawled_items
def get_all_urls(self, date=False): ## parameters to add first_seen/last_seen ??????????????????????????????
def get_last_item_root(self):
root_item = r_crawler.zrevrange(f'domain:history:{self.id}', 0, 0, withscores=True)
if not root_item:
return None
root_item = root_item[0][0]
try:
int(root_item)
return None
except ValueError:
pass
return root_item
def get_item_root_by_epoch(self, epoch):
root_item = r_crawler.zrevrangebyscore(f'domain:history:{self.id}', int(epoch), int(epoch), withscores=True)
if not root_item:
return None
root_item = root_item[0][0]
try:
int(root_item)
return None
except ValueError:
pass
return root_item
def get_crawled_items_by_epoch(self, epoch=None):
if epoch:
root_item = self.get_item_root_by_epoch(epoch)
else:
root_item = self.get_last_item_root()
if root_item:
return self.get_crawled_items(root_item)
# TODO FIXME
def get_all_urls(self, date=False, epoch=None):
if date:
urls = {}
else:
urls = set()
for port in self.get_ports():
for history in self.get_history_by_port(port, root=True):
if history.get('root'):
for item_id in self.get_crawled_items(history.get('root')):
url = get_item_url(item_id)
if url:
if date:
item_date = int(get_item_date(item_id))
if url not in urls:
urls[url] = {'first_seen': item_date,'last_seen': item_date}
else: # update first_seen / last_seen
if item_date < urls[url]['first_seen']:
all_url[url]['first_seen'] = item_date
if item_date > urls[url]['last_seen']:
all_url[url]['last_seen'] = item_date
else:
urls.add(url)
items = self.get_crawled_items_by_epoch(epoch=epoch)
if items:
for item_id in items:
url = get_item_url(item_id)
if url:
if date:
item_date = int(get_item_date(item_id))
if url not in urls:
urls[url] = {'first_seen': item_date, 'last_seen': item_date}
else: # update first_seen / last_seen
if item_date < urls[url]['first_seen']:
urls[url]['first_seen'] = item_date
if item_date > urls[url]['last_seen']:
urls[url]['last_seen'] = item_date
else:
urls.add(url)
return urls
def get_misp_object(self):
def get_misp_object(self, epoch=None):
# create domain-ip obj
obj_attrs = []
obj = MISPObject('domain-crawled', standalone=True)
obj.first_seen = self.get_first_seen()
obj.last_seen = self.get_last_check()
obj_attrs.append( obj.add_attribute('domain', value=self.id) )
urls = self.get_all_urls(date=True)
obj_attrs.append(obj.add_attribute('domain', value=self.id))
urls = self.get_all_urls(date=True, epoch=epoch)
for url in urls:
attribute = obj.add_attribute('url', value=url)
attribute.first_seen = str(urls[url]['first_seen'])
attribute.last_seen = str(urls[url]['last_seen'])
obj_attrs.append( attribute )
obj_attrs.append(attribute)
for obj_attr in obj_attrs:
for tag in self.get_tags():
obj_attr.add_tag(tag)
return obj
# TODO ADD MISP Event Export
# TODO DOWN DOMAIN
def get_download_zip(self, epoch=None):
hars_dir = ConfigLoader.get_hars_dir()
items_dir = ConfigLoader.get_items_dir()
screenshots_dir = ConfigLoader.get_screenshots_dir()
items = self.get_crawled_items_by_epoch(epoch=epoch)
if not items:
return None
map_file = 'ITEM ID : URL'
# zip buffer
zip_buffer = BytesIO()
with zipfile.ZipFile(zip_buffer, "a") as zf:
for item_id in items:
url = get_item_url(item_id)
basename = os.path.basename(item_id)
# Item
_write_in_zip_buffer(zf, os.path.join(items_dir, item_id), f'{basename}.gz')
map_file = map_file + f'\n{item_id} : {url}'
# HAR
har = get_item_har(item_id)
if har:
print(har)
_write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json')
# Screenshot
screenshot = self._get_external_correlation('item', '', item_id, 'screenshot')
if screenshot:
screenshot = screenshot['screenshot'].pop()[1:]
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8],
screenshot[8:10], screenshot[10:12], screenshot[12:])
_write_in_zip_buffer(zf, os.path.join(screenshots_dir, f'{screenshot}.png'), f'{basename}.png')
zf.writestr('_URL_MAP_', BytesIO(map_file.encode()).getvalue())
misp_object = self.get_misp_object().to_json().encode()
zf.writestr('misp.json', BytesIO(misp_object).getvalue())
zip_buffer.seek(0)
return zip_buffer
def add_language(self, language):
r_onion.sadd('all_domains_languages', language)
r_onion.sadd(f'all_domains_languages:{self.domain_type}', language)
r_onion.sadd(f'language:domains:{self.domain_type}:{language}', self.id)
r_onion.sadd(f'domain:language:{self.id}', language)
r_crawler.sadd('all_domains_languages', language)
r_crawler.sadd(f'all_domains_languages:{self.domain_type}', language)
r_crawler.sadd(f'language:domains:{self.domain_type}:{language}', self.id)
r_crawler.sadd(f'domain:language:{self.id}', language)
############################################################################
############################################################################
def create(self, first_seen, last_check, ports, status, tags, languages):
def create(self, first_seen, last_check, status, tags, languages):
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'first_seen', first_seen)
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'last_check', last_check)
r_crawler.hset(f'domain:meta:{self.id}', 'first_seen', first_seen)
r_crawler.hset(f'domain:meta:{self.id}', 'last_check', last_check)
for language in languages:
self.add_language(language)
#### CRAWLER ####
# add root_item to history
# if domain down -> root_item = epoch
def _add_history_root_item(self, root_item, epoch, port):
def _add_history_root_item(self, root_item, epoch):
# Create/Update crawler history
r_onion.zadd(f'crawler_history_{self.domain_type}:{self.id}:{port}', epoch, int(root_item))
r_crawler.zadd(f'domain:history:{self.id}', {root_item: epoch})
# if domain down -> root_item = epoch
def add_history(self, epoch, port, root_item=None, date=None):
def add_history(self, epoch, root_item=None, date=None):
if not date:
date = time.strftime('%Y%m%d', time.gmtime(epoch))
try:
int(root_item)
except ValueError:
root_item = None
root_item = int(root_item)
status = False
except (ValueError, TypeError):
status = True
data_retention_engine.update_object_date('domain', self.domain_type, date)
update_first_object_date(date, self.domain_type)
update_last_object_date(date, self.domain_type)
# UP
if root_item:
r_onion.srem(f'full_{self.domain_type}_down', self.id)
r_onion.sadd(f'full_{self.domain_type}_up', self.id)
r_onion.sadd(f'{self.domain_type}_up:{date}', self.id) # # TODO: -> store first day
r_onion.sadd(f'month_{self.domain_type}_up:{date[0:6]}', self.id) # # TODO: -> store first month
self._add_history_root_item(root_item, epoch, port)
if status:
r_crawler.srem(f'full_{self.domain_type}_down', self.id)
r_crawler.sadd(f'full_{self.domain_type}_up', self.id)
r_crawler.sadd(f'{self.domain_type}_up:{date}', self.id) # # TODO: -> store first day
r_crawler.sadd(f'month_{self.domain_type}_up:{date[0:6]}', self.id) # # TODO: -> store first month
self._add_history_root_item(root_item, epoch)
else:
if port:
r_onion.sadd(f'{self.domain_type}_down:{date}', self.id) # # TODO: -> store first month
self._add_history_root_item(epoch, epoch, port)
r_crawler.sadd(f'{self.domain_type}_down:{date}', self.id)
if self.was_up():
self._add_history_root_item(epoch, epoch)
else:
r_onion.sadd(f'{self.domain_type}_down:{date}', self.id)
if not self.was_up():
r_onion.sadd(f'full_{self.domain_type}_down', self.id)
r_crawler.sadd(f'full_{self.domain_type}_down', self.id)
def add_crawled_item(self, url, port, item_id, item_father):
# TODO RENAME PASTE_METADATA
def add_crawled_item(self, url, item_id, item_father):
r_metadata.hset(f'paste_metadata:{item_id}', 'father', item_father)
r_metadata.hset(f'paste_metadata:{item_id}', 'domain', f'{self.id}:{port}')
r_metadata.hset(f'paste_metadata:{item_id}', 'domain', self.id) # FIXME REMOVE ME -> extract for real link ?????????
r_metadata.hset(f'paste_metadata:{item_id}', 'real_link', url)
# add this item_id to his father
r_metadata.sadd(f'paste_children:{item_father}', item_id)
##-- CRAWLER --##
############################################################################
# In memory zipfile
def _write_in_zip_buffer(zf, path, filename):
with open(path, "rb") as f:
content = f.read()
zf.writestr( filename, BytesIO(content).getvalue())
############################################################################
############################################################################
############################################################################
def get_all_domains_types():
return ['onion', 'regular'] # i2p
return ['onion', 'web'] # i2p
def get_all_domains_languages():
return r_onion.smembers('all_domains_languages')
return r_crawler.smembers('all_domains_languages')
def get_domains_up_by_type(domain_type):
return r_onion.smembers(f'full_{domain_type}_up')
return r_crawler.smembers(f'full_{domain_type}_up')
def get_domains_down_by_type(domain_type):
return r_onion.smembers(f'full_{domain_type}_down')
return r_crawler.smembers(f'full_{domain_type}_down')
def get_first_object_date(subtype, field=''):
first_date = r_onion.zscore('objs:first_date', f'domain:{subtype}:{field}')
if not first_date:
first_date = 99999999
return int(first_date)
def get_domains_up_by_date(date, domain_type):
return r_crawler.smembers(f'{domain_type}_up:{date}')
def get_last_object_date(subtype, field=''):
last_date = r_onion.zscore('objs:last_date', f'domain:{subtype}:{field}')
if not last_date:
last_date = 0
return int(last_date)
def get_domains_down_by_date(date, domain_type):
return r_crawler.smembers(f'{domain_type}_down:{date}')
def _set_first_object_date(date, subtype, field=''):
return r_onion.zadd('objs:first_date', f'domain:{subtype}:{field}', date)
def _set_last_object_date(date, subtype, field=''):
return r_onion.zadd('objs:last_date', f'domain:{subtype}:{field}', date)
def update_first_object_date(date, subtype, field=''):
first_date = get_first_object_date(subtype, field=field)
if int(date) < first_date:
_set_first_object_date(date, subtype, field=field)
return date
else:
return first_date
def update_last_object_date(date, subtype, field=''):
last_date = get_last_object_date(subtype, field=field)
if int(date) > last_date:
_set_last_object_date(date, subtype, field=field)
return date
else:
return last_date
def get_domains_by_daterange(date_from, date_to, domain_type, up=True, down=False):
date_domains = {}
for date in Date.substract_date(date_from, date_to):
domains = []
if up:
domains.extend(get_domains_up_by_date(date, domain_type))
if down:
domains.extend(get_domains_down_by_date(date, domain_type))
if domains:
date_domains[date] = list(domains)
return date_domains
def get_domains_meta(domains):
metas = []
for domain in domains:
dom = Domain(domain)
metas.append(dom.get_meta())
return metas
################################################################################
################################################################################
#if __name__ == '__main__':
if __name__ == '__main__':
dom = Domain('')
dom.get_download_zip()

View file

@ -3,10 +3,10 @@
import base64
import gzip
import magic
import os
import re
import sys
import redis
import cld3
import html2text
@ -233,8 +233,9 @@ class Item(AbstractObject):
return self.id[19:-36]
def get_screenshot(self):
s = r_serv_metadata.hget(f'paste_metadata:{self.id}', 'screenshot')
s = self.get_correlation('screenshot')
if s:
s = s['screenshot'].pop()[1:]
return os.path.join(s[0:2], s[2:4], s[4:6], s[6:8], s[8:10], s[10:12], s[12:])
def get_har(self):
@ -315,6 +316,11 @@ class Item(AbstractObject):
all_languages.append(lang)
return all_languages
def get_mimetype(self, content=None):
if not content:
content = self.get_content()
return magic.from_buffer(content, mime=True)
############################################################################
############################################################################

View file

@ -41,14 +41,18 @@ class Pgp(AbstractSubtypeObject):
pass
# # TODO:
def get_meta(self):
return None
def get_meta(self, options=set()):
meta = self._get_meta()
meta['id'] = self.id
meta['subtype'] = self.subtype
meta['tags'] = self.get_tags()
return meta
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', object_type=self.type, type_id=self.subtype, correlation_id=self.id)
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
else:
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&type_id={self.subtype}&correlation_id={self.id}'
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
return url
def get_svg_icon(self):

View file

@ -1,14 +1,18 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import base64
import os
import sys
from hashlib import sha256
from io import BytesIO
from flask import url_for
sys.path.append(os.environ['AIL_BIN'])
#from lib import Tag
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_object import AbstractObject
@ -17,14 +21,15 @@ r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot')
config_loader = None
class Screenshot(AbstractObject):
"""
AIL Screenshot Object. (strings)
"""
# ID = SHA256
def __init__(self, id):
super(Screenshot, self).__init__('screenshot', id)
def __init__(self, screenshot_id):
super(Screenshot, self).__init__('screenshot', screenshot_id)
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True),
@ -41,13 +46,13 @@ class Screenshot(AbstractObject):
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', object_type=self.type, correlation_id=self.id)
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else:
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&correlation_id={self.id}'
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url
def get_svg_icon(self):
return {'style': 'fas', 'icon': '\uf03e', 'color': '#E1F5DF', 'radius':5}
return {'style': 'fas', 'icon': '\uf03e', 'color': '#E1F5DF', 'radius': 5}
def get_rel_path(self, add_extension=False):
rel_path = os.path.join(self.id[0:2], self.id[2:4], self.id[4:6], self.id[6:8], self.id[8:10], self.id[10:12], self.id[12:])
@ -77,12 +82,11 @@ class Screenshot(AbstractObject):
return obj
def get_meta(self, options=set()):
meta = {}
meta['id'] = self.id
meta['img'] = get_screenshot_rel_path(self.id) ######### # TODO: Rename ME ??????
meta = {'id': self.id}
meta['img'] = get_screenshot_rel_path(self.id) ######### # TODO: Rename ME ??????
meta['tags'] = self.get_tags(r_list=True)
# TODO: ADD IN ABSTRACT CLASS
#meta['is_tags_safe'] = Tag.is_tags_safe(metadata_dict['tags']) ################## # TODO: ADD IN ABSZTRACT CLASS
#meta['is_tags_safe'] = Tag.is_tags_safe(metadata_dict['tags']) ################## # TODO: ADD IN ABSTRACT CLASS
return meta
def get_screenshot_dir():
@ -90,7 +94,7 @@ def get_screenshot_dir():
# get screenshot relative path
def get_screenshot_rel_path(sha256_str, add_extension=False):
screenshot_path = os.path.join(sha256_str[0:2], sha256_str[2:4], sha256_str[4:6], sha256_str[6:8], sha256_str[8:10], sha256_str[10:12], sha256_str[12:])
screenshot_path = os.path.join(sha256_str[0:2], sha256_str[2:4], sha256_str[4:6], sha256_str[6:8], sha256_str[8:10], sha256_str[10:12], sha256_str[12:])
if add_extension:
screenshot_path = f'{screenshot_path}.png'
return screenshot_path
@ -106,5 +110,22 @@ def get_all_screenshots():
screenshots.append(screenshot_id)
return screenshots
# FIXME STR SIZE LIMIT
def create_screenshot(content, size_limit=5000000, b64=True, force=False):
size = (len(content)*3) / 4
if size <= size_limit or size_limit < 0 or force:
if b64:
content = base64.standard_b64decode(content.encode())
screenshot_id = sha256(content).hexdigest()
screenshot = Screenshot(screenshot_id)
if not screenshot.exists():
filepath = screenshot.get_filepath()
dirname = os.path.dirname(filepath)
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(filepath, 'wb') as f:
f.write(content)
return screenshot
return None
#if __name__ == '__main__':

View file

@ -10,12 +10,14 @@ from pymisp import MISPObject
# sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
config_loader = ConfigLoader.ConfigLoader()
config_loader = ConfigLoader()
config_loader = None
@ -44,9 +46,9 @@ class Username(AbstractSubtypeObject):
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', object_type=self.type, type_id=self.subtype, correlation_id=self.id)
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
else:
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&type_id={self.subtype}&correlation_id={self.id}'
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
return url
def get_svg_icon(self):
@ -61,6 +63,13 @@ class Username(AbstractSubtypeObject):
icon = '\uf007'
return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius':5}
def get_meta(self, options=set()):
meta = self._get_meta()
meta['id'] = self.id
meta['subtype'] = self.subtype
meta['tags'] = self.get_tags()
return meta
def get_misp_object(self):
obj_attrs = []
if self.subtype == 'telegram':

View file

@ -0,0 +1,139 @@
# -*-coding:UTF-8 -*
"""
Base Class for AIL Objects
"""
##################################
# Import External packages
##################################
import os
import sys
from abc import abstractmethod, ABC
#from flask import url_for
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader
from lib.item_basic import is_crawled, get_item_domain
from packages import Date
# LOAD CONFIG
config_loader = ConfigLoader()
# r_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_object = config_loader.get_db_conn("Kvrocks_Objects")
config_loader = None
class AbstractDaterangeObject(AbstractObject, ABC):
"""
Abstract Subtype Object
"""
def __init__(self, obj_type, id):
""" Abstract for all the AIL object
:param obj_type: object type (item, ...)
:param id: Object ID
"""
super().__init__(obj_type, id)
def exists(self):
return r_object.exists(f'{self.type}:meta:{self.id}')
def get_first_seen(self, r_int=False):
first_seen = r_object.hget(f'{self.type}:meta:{self.id}', 'first_seen')
if r_int:
if first_seen:
return int(first_seen)
else:
return 99999999
else:
return first_seen
def get_last_seen(self, r_int=False):
last_seen = r_object.hget(f'{self.type}:meta:{self.id}', 'last_seen')
if r_int:
if last_seen:
return int(last_seen)
else:
return 0
else:
return last_seen
def get_nb_seen(self):
return r_object.hget(f'{self.type}:meta:{self.id}', 'nb')
def get_nb_seen_by_date(self, date):
nb = r_object.hget(f'{self.type}:date:{date}', self.id)
if nb is None:
return 0
else:
return int(nb)
def _get_meta(self, options=[]):
meta_dict = {'first_seen': self.get_first_seen(),
'last_seen': self.get_last_seen(),
'nb_seen': self.get_nb_seen()}
if 'sparkline' in options:
meta_dict['sparkline'] = self.get_sparkline()
return meta_dict
def set_first_seen(self, first_seen):
r_object.hset(f'{self.type}:meta:{self.id}', 'first_seen', first_seen)
def set_last_seen(self, last_seen):
r_object.hset(f'{self.type}:meta:{self.id}', 'last_seen', last_seen)
def update_daterange(self, date):
date = int(date)
# obj don't exit
if not self.exists():
self.set_first_seen(date)
self.set_last_seen(date)
else:
first_seen = self.get_first_seen(r_int=True)
last_seen = self.get_last_seen(r_int=True)
if date < first_seen:
self.set_first_seen(date)
if date > last_seen:
self.set_last_seen(date)
def get_sparkline(self):
sparkline = []
for date in Date.get_previous_date_list(6):
sparkline.append(self.get_nb_seen_by_date(date))
return sparkline
def _add(self, date, item_id):
if not self.exists():
self.set_first_seen(date)
self.set_last_seen(date)
r_object.sadd(f'{self.type}:all', self.id)
else:
self.update_daterange(date)
# NB Object seen by day
r_object.hincrby(f'{self.type}:date:{date}', self.id, 1)
r_object.zincrby(f'{self.type}:date:{date}', self.id, 1) # # # # # # # # # #
# NB Object seen
r_object.hincrby(f'{self.type}:meta:{self.id}', 'nb', 1)
# Correlations
self.add_correlation('item', '', item_id)
if is_crawled(item_id): # Domain
domain = get_item_domain(item_id)
self.add_correlation('domain', '', domain)
# TODO:ADD objects + Stats
def _create(self, first_seen, last_seen):
self.set_first_seen(first_seen)
self.set_last_seen(last_seen)
r_object.sadd(f'{self.type}:all', self.id)
# TODO
def _delete(self):
pass

View file

@ -144,7 +144,7 @@ class AbstractObject(ABC):
pass
@abstractmethod
def get_meta(self):
def get_meta(self, options=set()):
"""
get Object metadata
"""
@ -165,6 +165,18 @@ class AbstractObject(ABC):
def get_misp_object(self):
pass
def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type):
"""
Get object correlation
"""
return get_correlations(req_type, req_subtype, req_id, filter_types=[obj_type])
def get_correlation(self, obj_type):
"""
Get object correlation
"""
return get_correlations(self.type, self.subtype, self.id, filter_types=[obj_type])
def get_correlations(self):
"""
Get object correlations

View file

@ -20,6 +20,8 @@ from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader
from lib.item_basic import is_crawled, get_item_domain
from packages import Date
# LOAD CONFIG
config_loader = ConfigLoader()
r_metadata = config_loader.get_redis_conn("ARDB_Metadata")
@ -115,6 +117,11 @@ class AbstractSubtypeObject(AbstractObject):
if date > last_seen:
self.set_last_seen(date)
def get_sparkline(self):
sparkline = []
for date in Date.get_previous_date_list(6):
sparkline.append(self.get_nb_seen_by_date(date))
return sparkline
#
# HANDLE Others objects ????
#

View file

@ -12,11 +12,15 @@ from flask import url_for
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
from lib.ail_core import get_all_objects
from lib import correlations_engine
from lib import btc_ail
from lib import Tag
from lib.objects.CryptoCurrencies import CryptoCurrency
from lib.objects.Cves import Cve
from lib.objects.Decodeds import Decoded
from lib.objects.Domains import Domain
from lib.objects.Items import Item
@ -39,12 +43,11 @@ def is_valid_object_type(obj_type):
def sanitize_objs_types(objs):
l_types = []
print('sanitize')
print(objs)
print(get_all_objects())
for obj in objs:
if is_valid_object_type(obj):
l_types.append(obj)
if not l_types:
l_types = get_all_objects()
return l_types
def get_object(obj_type, subtype, id):
@ -54,6 +57,8 @@ def get_object(obj_type, subtype, id):
return Domain(id)
elif obj_type == 'decoded':
return Decoded(id)
elif obj_type == 'cve':
return Cve(id)
elif obj_type == 'screenshot':
return Screenshot(id)
elif obj_type == 'cryptocurrency':
@ -63,23 +68,48 @@ def get_object(obj_type, subtype, id):
elif obj_type == 'username':
return Username(id, subtype)
def exists_obj(obj_type, subtype, id):
object = get_object(obj_type, subtype, id)
return object.exists()
def exists_obj(obj_type, subtype, obj_id):
obj = get_object(obj_type, subtype, obj_id)
if obj:
return obj.exists()
else:
return False
def get_object_link(obj_type, subtype, id, flask_context=False):
object = get_object(obj_type, subtype, id)
return object.get_link(flask_context=flask_context)
obj = get_object(obj_type, subtype, id)
return obj.get_link(flask_context=flask_context)
def get_object_svg(obj_type, subtype, id):
object = get_object(obj_type, subtype, id)
return object.get_svg_icon()
obj = get_object(obj_type, subtype, id)
return obj.get_svg_icon()
def get_object_meta(obj_type, subtype, id, flask_context=False):
object = get_object(obj_type, subtype, id)
meta = object.get_meta()
meta['icon'] = object.get_svg_icon()
meta['link'] = object.get_link(flask_context=flask_context)
def get_object_meta(obj_type, subtype, id, options=[], flask_context=False):
obj = get_object(obj_type, subtype, id)
meta = obj.get_meta(options=options)
meta['icon'] = obj.get_svg_icon()
meta['link'] = obj.get_link(flask_context=flask_context)
return meta
def get_objects_meta(objs, options=[], flask_context=False):
metas = []
for obj_dict in objs:
metas.append(get_object_meta(obj_dict['type'], obj_dict['subtype'], obj_dict['id'], options=options, flask_context=flask_context))
return metas
def get_object_card_meta(obj_type, subtype, id, related_btc=False):
obj = get_object(obj_type, subtype, id)
meta = obj.get_meta()
meta['icon'] = obj.get_svg_icon()
if subtype or obj_type == 'cve':
meta['sparkline'] = obj.get_sparkline()
if subtype == 'bitcoin' and related_btc:
meta["related_btc"] = btc_ail.get_bitcoin_info(obj.id)
if obj.get_type() == 'decoded':
meta["vt"] = obj.get_meta_vt()
meta["vt"]["status"] = obj.is_vt_enabled()
# TAGS MODAL
if obj.get_type() == 'screenshot' or obj.get_type() == 'decoded':
meta["add_tags_modal"] = Tag.get_modal_add_tags(obj.id, object_type=obj.get_type())
return meta
def get_ui_obj_tag_table_keys(obj_type):
@ -203,7 +233,6 @@ def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True):
dict_node['style']['node_radius'] = dict_node['style']['radius']
# # TODO: # FIXME: in UI
dict_node['style']
dict_node['text'] = obj_id
if node_id == obj_str_id:
dict_node["style"]["node_color"] = 'orange'

View file

@ -36,17 +36,19 @@ def _regex_findall(redis_key, regex, item_content, r_set):
all_items = re.findall(regex, item_content)
if r_set:
if len(all_items) > 1:
r_serv_cache.sadd(redis_key, *all_items)
for item in all_items:
r_serv_cache.sadd(redis_key, str(item))
r_serv_cache.expire(redis_key, 360)
elif all_items:
r_serv_cache.sadd(redis_key, all_items[0])
r_serv_cache.sadd(redis_key, str(all_items[0]))
r_serv_cache.expire(redis_key, 360)
else:
if len(all_items) > 1:
r_serv_cache.lpush(redis_key, *all_items)
for item in all_items:
r_serv_cache.lpush(redis_key, str(item))
r_serv_cache.expire(redis_key, 360)
elif all_items:
r_serv_cache.lpush(redis_key, all_items[0])
r_serv_cache.lpush(redis_key, str(all_items[0]))
r_serv_cache.expire(redis_key, 360)
def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time=30, r_set=True):

View file

@ -11,16 +11,16 @@ Search for API keys on an item content.
"""
import re
import os
import re
import sys
sys.path.append(os.path.join(os.environ['AIL_BIN']))
# project packages
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
from lib import regex_helper
from lib.objects.Items import Item
class ApiKey(AbstractModule):
"""ApiKey module for AIL framework"""
@ -28,13 +28,11 @@ class ApiKey(AbstractModule):
def __init__(self):
super(ApiKey, self).__init__()
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
# # TODO: ENUM or dict
# TODO improve REGEX
#r'(?<![A-Z0-9])=[A-Z0-9]{20}(?![A-Z0-9])'
#r'(?<!=[A-Za-z0-9+])=[A-Za-z0-9+]{40}(?![A-Za-z0-9+])'
# r'(?<![A-Z0-9])=[A-Z0-9]{20}(?![A-Z0-9])'
# r'(?<!=[A-Za-z0-9+])=[A-Za-z0-9+]{40}(?![A-Za-z0-9+])'
self.re_aws_access_key = r'AKIA[0-9A-Z]{16}'
self.re_aws_secret_key = r'[0-9a-zA-Z/+]{40}'
re.compile(self.re_aws_access_key)
@ -48,15 +46,14 @@ class ApiKey(AbstractModule):
self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message, r_result=False):
id, score = message.split()
item = Item(id)
item_id, score = message.split()
item = Item(item_id)
item_content = item.get_content()
google_api_key = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.re_google_api_key, item.get_id(), item_content)
aws_access_key = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.re_aws_access_key, item.get_id(), item_content)
google_api_key = self.regex_findall(self.re_google_api_key, item.get_id(), item_content)
aws_access_key = self.regex_findall(self.re_aws_access_key, item.get_id(), item_content)
if aws_access_key:
aws_secret_key = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.re_aws_secret_key, item.get_id(), item_content)
aws_secret_key = self.regex_findall(self.re_aws_secret_key, item.get_id(), item_content)
if aws_access_key or google_api_key:
to_print = f'ApiKey;{item.get_source()};{item.get_date()};{item.get_basename()};'
@ -68,7 +65,7 @@ class ApiKey(AbstractModule):
msg = f'infoleak:automatic-detection="google-api-key";{item.get_id()}'
self.send_message_to_queue(msg, 'Tags')
# # TODO: # FIXME: AWS regex/validate/sanityze KEY + SECRET KEY
# # TODO: # FIXME: AWS regex/validate/sanitize KEY + SECRET KEY
if aws_access_key:
print(f'found AWS key: {to_print}')
self.redis_logger.warning(f'{to_print}Checked {len(aws_access_key)} found AWS Key;{item.get_id()}')
@ -87,7 +84,8 @@ class ApiKey(AbstractModule):
self.send_message_to_queue(item.get_id(), 'Duplicate')
if r_result:
return (google_api_key, aws_access_key, aws_secret_key)
return google_api_key, aws_access_key, aws_secret_key
if __name__ == "__main__":
module = ApiKey()

View file

@ -43,7 +43,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
from lib.objects.Items import Item
class Categ(AbstractModule):

View file

@ -30,7 +30,6 @@ import os
import sys
import time
import re
import redis
from datetime import datetime
from pyfaup.faup import Faup
@ -39,9 +38,8 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
from lib.objects.Items import Item
from lib import ConfigLoader
from lib import regex_helper
from lib import Statistics
@ -60,21 +58,18 @@ class Credential(AbstractModule):
REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev'
REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping'
def __init__(self):
super(Credential, self).__init__()
self.faup = Faup()
self.regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
self.regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
self.regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
self.regex_web = r"((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
self.regex_cred = r"[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
self.regex_site_for_stats = r"@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"
# Database
config_loader = ConfigLoader.ConfigLoader()
#self.server_cred = config_loader.get_redis_conn("ARDB_TermCred")
# self.server_cred = config_loader.get_redis_conn("ARDB_TermCred")
self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")
# Config values
@ -83,29 +78,27 @@ class Credential(AbstractModule):
self.max_execution_time = 30
# Waiting time in secondes between to message proccessed
# Waiting time in seconds between to message processed
self.pending_seconds = 10
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message):
id, count = message.split()
item = Item(id)
item_id, count = message.split()
item = Item(item_id)
item_content = item.get_content()
# TODO: USE SETS
# Extract all credentials
all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item.get_id(), item_content, max_time=self.max_execution_time)
all_credentials = self.regex_findall(self.regex_cred, item.get_id(), item_content)
if all_credentials:
nb_cred = len(all_credentials)
message = f'Checked {nb_cred} credentials found.'
all_sites = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_web, item.get_id(), item_content, max_time=self.max_execution_time)
all_sites = self.regex_findall(self.regex_web, item.get_id(), item_content)
if all_sites:
discovered_sites = ', '.join(all_sites)
message += f' Related websites: {discovered_sites}'
@ -114,7 +107,7 @@ class Credential(AbstractModule):
to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{item.get_id()}'
#num of creds above tresh, publish an alert
# num of creds above threshold, publish an alert
if nb_cred > self.criticalNumberToAlert:
print(f"========> Found more than 10 credentials in this file : {item.get_id()}")
self.redis_logger.warning(to_print)
@ -122,11 +115,11 @@ class Credential(AbstractModule):
msg = f'infoleak:automatic-detection="credential";{item.get_id()}'
self.send_message_to_queue(msg, 'Tags')
site_occurence = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_site_for_stats, item.get_id(), item_content, max_time=self.max_execution_time, r_set=False)
site_occurrence = self.regex_findall(self.regex_site_for_stats, item.get_id(), item_content)
creds_sites = {}
for site in site_occurence:
for site in site_occurrence:
site_domain = site[1:-1].lower()
if site_domain in creds_sites.keys():
creds_sites[site_domain] += 1
@ -136,7 +129,7 @@ class Credential(AbstractModule):
for url in all_sites:
self.faup.decode(url)
domain = self.faup.get()['domain']
## TODO: # FIXME: remove me, check faup versionb
# # TODO: # FIXME: remove me, check faup versionb
try:
domain = domain.decode()
except:
@ -159,10 +152,10 @@ class Credential(AbstractModule):
date = datetime.now().strftime("%Y%m")
nb_tlds = {}
for cred in all_credentials:
maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0]
maildomains = re.findall(r"@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0]
self.faup.decode(maildomains)
tld = self.faup.get()['tld']
## TODO: # FIXME: remove me
# # TODO: # FIXME: remove me
try:
tld = tld.decode()
except:

View file

@ -17,14 +17,13 @@ It apply credit card regexes on item content and warn if a valid card number is
import os
import re
import sys
import time
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
from lib.objects.Items import Item
from packages import lib_refine
class CreditCards(AbstractModule):
@ -53,15 +52,14 @@ class CreditCards(AbstractModule):
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message, r_result=False):
id, score = message.split()
item = Item(id)
item_id, score = message.split()
item = Item(item_id)
content = item.get_content()
all_cards = re.findall(self.regex, content)
if len(all_cards) > 0:
#self.redis_logger.debug(f'All matching {all_cards}')
# self.redis_logger.debug(f'All matching {all_cards}')
creditcard_set = set([])
for card in all_cards:
@ -70,9 +68,9 @@ class CreditCards(AbstractModule):
self.redis_logger.debug(f'{clean_card} is valid')
creditcard_set.add(clean_card)
#pprint.pprint(creditcard_set)
# pprint.pprint(creditcard_set)
to_print = f'CreditCard;{item.get_source()};{item.get_date()};{item.get_basename()};'
if (len(creditcard_set) > 0):
if len(creditcard_set) > 0:
self.redis_logger.warning(f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.get_id()}')
msg = f'infoleak:automatic-detection="credit-card";{item.get_id()}'
@ -83,7 +81,7 @@ class CreditCards(AbstractModule):
else:
self.redis_logger.info(f'{to_print}CreditCard related;{item.get_id()}')
if __name__ == '__main__':
if __name__ == '__main__':
module = CreditCards()
module.run()

View file

@ -22,6 +22,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from lib.objects import Cves
from lib.objects.Items import Item
@ -36,13 +37,12 @@ class Cve(AbstractModule):
# regex to find CVE
self.reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,5}')
# Waiting time in secondes between to message proccessed
# Waiting time in seconds between to message processed
self.pending_seconds = 1
# Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized')
def compute(self, message):
item_id, count = message.split()
@ -51,17 +51,23 @@ class Cve(AbstractModule):
cves = self.regex_findall(self.reg_cve, item_id, item.get_content())
if cves:
print(cves)
date = item.get_date()
for cve_id in cves:
cve = Cves.Cve(cve_id)
cve.add(date, item_id)
warning = f'{item_id} contains CVEs {cves}'
print(warning)
self.redis_logger.warning(warning)
msg = f'infoleak:automatic-detection="cve";{item_id}'
# Send to Tags Queue
self.send_message_to_queue(msg, 'Tags')
if __name__ == '__main__':
module = Cve()
module.run()
# module.run()
module.compute('crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd 9')

View file

@ -65,49 +65,45 @@ class Decoder(AbstractModule):
#hexStr = ''.join( hex_string.split(" ") )
return bytes(bytearray([int(hexStr[i:i+2], 16) for i in range(0, len(hexStr), 2)]))
# TODO to lambda expr
def binary_decoder(self, binary_string):
return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)]))
# TODO to lambda expr
def base64_decoder(self, base64_string):
return base64.b64decode(base64_string)
def __init__(self):
super(Decoder, self).__init__()
regex_binary = '[0-1]{40,}'
#regex_hex = '(0[xX])?[A-Fa-f0-9]{40,}'
regex_hex = '[A-Fa-f0-9]{40,}'
regex_base64 = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)'
regex_binary = r'[0-1]{40,}'
# regex_hex = r'(0[xX])?[A-Fa-f0-9]{40,}'
regex_hex = r'[A-Fa-f0-9]{40,}'
regex_base64 = r'(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)'
cmp_regex_binary = re.compile(regex_binary)
cmp_regex_hex = re.compile(regex_hex)
cmp_regex_base64 = re.compile(regex_base64)
# map decoder function
self.decoder_function = {'binary':self.binary_decoder,'hexadecimal':self.hex_decoder, 'base64':self.base64_decoder}
self.decoder_function = {'binary': self.binary_decoder, 'hexadecimal': self.hex_decoder, 'base64': self.base64_decoder}
# list all decoder with regex,
decoder_binary = {'name': 'binary', 'regex': cmp_regex_binary, 'encoded_min_size': 300, 'max_execution_time': binary_max_execution_time}
decoder_hexadecimal = {'name': 'hexadecimal', 'regex': cmp_regex_hex, 'encoded_min_size': 300, 'max_execution_time': hex_max_execution_time}
decoder_base64 = {'name': 'base64', 'regex': cmp_regex_base64, 'encoded_min_size': 40, 'max_execution_time': base64_max_execution_time}
self.decoder_order = [ decoder_base64, decoder_binary, decoder_hexadecimal, decoder_base64]
self.decoder_order = [decoder_base64, decoder_binary, decoder_hexadecimal, decoder_base64]
for decoder in self.decoder_order:
serv_metadata.sadd('all_decoder', decoder['name'])
# Waiting time in secondes between to message proccessed
# Waiting time in seconds between to message processed
self.pending_seconds = 1
# Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized')
def compute(self, message):
item = Item(message)
@ -128,10 +124,9 @@ class Decoder(AbstractModule):
else:
signal.alarm(0)
if(len(encoded_list) > 0):
if len(encoded_list) > 0:
content = self.decode_string(content, item.id, date, encoded_list, decoder['name'], decoder['encoded_min_size'])
def decode_string(self, content, item_id, date, encoded_list, decoder_name, encoded_min_size):
find = False
for encoded in encoded_list:
@ -153,12 +148,12 @@ class Decoder(AbstractModule):
save_item_relationship(sha1_string, item_id) ################################
#remove encoded from item content
# remove encoded from item content
content = content.replace(encoded, '', 1)
self.redis_logger.debug(f'{item_id} : {decoder_name} - {mimetype}')
print(f'{item_id} : {decoder_name} - {mimetype}')
if(find):
if find:
self.redis_logger.info(f'{decoder_name} decoded')
print(f'{decoder_name} decoded')
@ -169,6 +164,7 @@ class Decoder(AbstractModule):
# perf: remove encoded from item content
return content
if __name__ == '__main__':
# # TODO: TEST ME

View file

@ -15,7 +15,6 @@ the out output of the Global module.
##################################
import os
import sys
import time
import DomainClassifier.domainclassifier
sys.path.append(os.environ['AIL_BIN'])
@ -23,11 +22,8 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import d4
import item_basic
from lib.objects.Items import Item
from lib import d4
class DomClassifier(AbstractModule):
@ -38,7 +34,7 @@ class DomClassifier(AbstractModule):
def __init__(self):
super(DomClassifier, self).__init__()
# Waiting time in secondes between to message proccessed
# Waiting time in seconds between to message processed
self.pending_seconds = 1
addr_dns = self.process.config.get("DomClassifier", "dns")
@ -51,11 +47,10 @@ class DomClassifier(AbstractModule):
# Send module state to logs
self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, message, r_result=False):
host, id = message.split()
host, item_id = message.split()
item = Item(id)
item = Item(item_id)
item_basename = item.get_basename()
item_date = item.get_date()
item_source = item.get_source()
@ -64,7 +59,7 @@ class DomClassifier(AbstractModule):
self.c.text(rawtext=host)
print(self.c.domain)
self.c.validdomain(passive_dns=True, extended=False)
#self.redis_logger.debug(self.c.vdomain)
# self.redis_logger.debug(self.c.vdomain)
print(self.c.vdomain)
print()

View file

@ -12,14 +12,12 @@ Its input comes from other modules, namely:
Perform comparisions with ssdeep and tlsh
"""
import redis
import os
import sys
import time
#from datetime import datetime, timedelta
# from datetime import datetime, timedelta
import datetime
sys.path.append(os.environ['AIL_BIN'])
@ -51,7 +49,6 @@ class Duplicates(AbstractModule):
self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, message):
# IOError: "CRC Checksum Failed on : {id}"
@ -72,7 +69,7 @@ class Duplicates(AbstractModule):
self.algos['ssdeep']['hash'] = Duplicate.get_ssdeep_hash(content)
self.algos['tlsh']['hash'] = Duplicate.get_tlsh_hash(content)
# TODO: Handle coputed duplicates
# TODO: Handle computed duplicates
nb_duplicates = 0
@ -99,7 +96,7 @@ class Duplicates(AbstractModule):
y = time.time()
print(f'{item.get_id()} Processed in {y-x} sec')
#self.redis_logger.debug('{}Processed in {} sec'.format(to_print, y-x))
# self.redis_logger.debug('{}Processed in {} sec'.format(to_print, y-x))
if __name__ == "__main__":

View file

@ -31,7 +31,6 @@ import os
import sys
import time
import datetime
import redis
from hashlib import md5
from uuid import uuid4
@ -57,19 +56,18 @@ class Global(AbstractModule):
self.processed_item = 0
self.time_last_stats = time.time()
# Get and sanityze ITEM DIRECTORY
# Get and sanitize ITEM DIRECTORY
# # TODO: rename PASTE => ITEM
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], self.process.config.get("Directories", "pastes"))
self.PASTES_FOLDERS = self.PASTES_FOLDER + '/'
self.PASTES_FOLDERS = os.path.join(os.path.realpath(self.PASTES_FOLDERS), '')
# Waiting time in secondes between to message proccessed
# Waiting time in seconds between to message processed
self.pending_seconds = 0.5
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
def computeNone(self):
difftime = time.time() - self.time_last_stats
if int(difftime) > 30:
@ -80,7 +78,6 @@ class Global(AbstractModule):
self.time_last_stats = time.time()
self.processed_item = 0
def compute(self, message, r_result=False):
# Recovering the streamed message informations
splitted = message.split()
@ -129,7 +126,8 @@ class Global(AbstractModule):
item_id = item_id.replace(self.PASTES_FOLDERS, '', 1)
self.send_message_to_queue(item_id)
self.processed_item+=1
self.processed_item += 1
print(item_id)
if r_result:
return item_id
@ -137,7 +135,6 @@ class Global(AbstractModule):
self.redis_logger.debug(f"Empty Item: {message} not processed")
print(f"Empty Item: {message} not processed")
def check_filename(self, filename, new_file_content):
"""
Check if file is not a duplicated file
@ -181,10 +178,8 @@ class Global(AbstractModule):
# File not unzipped
filename = None
return filename
def gunzip_file(self, filename):
"""
Unzip a file
@ -224,7 +219,6 @@ class Global(AbstractModule):
return gunzipped_bytes_obj
def rreplace(self, s, old, new, occurrence):
li = s.rsplit(old, occurrence)
return new.join(li)

View file

@ -17,7 +17,6 @@ It is looking for Hosts
import os
import re
import sys
import time
sys.path.append(os.environ['AIL_BIN'])
##################################
@ -25,9 +24,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader
from lib import regex_helper
#from lib.objects.Items import Item
from packages.Item import Item
from lib.objects.Items import Item
class Hosts(AbstractModule):
"""
@ -40,12 +37,10 @@ class Hosts(AbstractModule):
config_loader = ConfigLoader()
self.r_cache = config_loader.get_redis_conn("Redis_Cache")
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
# regex timeout
self.regex_timeout = 30
# Waiting time in secondes between to message proccessed
# Waiting time in seconds between to message processed
self.pending_seconds = 1
self.host_regex = r'\b([a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)\b'
@ -53,7 +48,6 @@ class Hosts(AbstractModule):
self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, message):
item = Item(message)
@ -61,18 +55,16 @@ class Hosts(AbstractModule):
# if mimetype.split('/')[0] == "text":
content = item.get_content()
hosts = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.host_regex, item.get_id(), content)
hosts = self.regex_findall(self.host_regex, item.get_id(), content)
if hosts:
print(f'{len(hosts)} host {item.get_id()}')
for host in hosts:
#print(host)
# print(host)
msg = f'{host} {item.get_id()}'
self.send_message_to_queue(msg, 'Host')
if __name__ == '__main__':
module = Hosts()

View file

@ -34,7 +34,7 @@ class Iban(AbstractModule):
"""
_LETTERS_IBAN = chain(enumerate(string.digits + string.ascii_uppercase),
enumerate(string.ascii_lowercase, 10))
enumerate(string.ascii_lowercase, 10))
LETTERS_IBAN = {ord(d): str(i) for i, d in _LETTERS_IBAN}
def __init__(self):
@ -44,7 +44,7 @@ class Iban(AbstractModule):
self.pending_seconds = 10
self.regex_timeout = 30
#iban_regex = re.compile(r'\b[A-Za-z]{2}[0-9]{2}(?:[ ]?[0-9]{4}){4}(?:[ ]?[0-9]{1,2})?\b')
# iban_regex = re.compile(r'\b[A-Za-z]{2}[0-9]{2}(?:[ ]?[0-9]{4}){4}(?:[ ]?[0-9]{1,2})?\b')
self.iban_regex = re.compile(r'\b([A-Za-z]{2}[ \-]?[0-9]{2})(?=(?:[ \-]?[A-Za-z0-9]){9,30})((?:[ \-]?[A-Za-z0-9]{3,5}){2,6})([ \-]?[A-Za-z0-9]{1,3})\b')
self.iban_regex_verify = re.compile(r'^([A-Z]{2})([0-9]{2})([A-Z0-9]{9,30})$')
@ -90,6 +90,7 @@ class Iban(AbstractModule):
msg = f'infoleak:automatic-detection="iban";{item_id}'
self.send_message_to_queue(msg, 'Tags')
if __name__ == '__main__':
module = Iban()

View file

@ -26,7 +26,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
from lib.objects.Items import Item
class Indexer(AbstractModule):
@ -57,9 +57,7 @@ class Indexer(AbstractModule):
self.ix = None
if self.indexertype == "whoosh":
self.schema = Schema(title=TEXT(stored=True), path=ID(stored=True,
unique=True),
content=TEXT)
self.schema = Schema(title=TEXT(stored=True), path=ID(stored=True, unique=True), content=TEXT)
if not os.path.exists(self.baseindexpath):
os.mkdir(self.baseindexpath)
@ -96,7 +94,6 @@ class Indexer(AbstractModule):
self.last_refresh = time_now
def compute(self, message):
docpath = message.split(" ", -1)[-1]
@ -109,7 +106,7 @@ class Indexer(AbstractModule):
try:
# Avoid calculating the index's size at each message
if(time.time() - self.last_refresh > self.TIME_WAIT):
if time.time() - self.last_refresh > self.TIME_WAIT:
self.last_refresh = time.time()
if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD*(1000*1000):
timestamp = int(time.time())
@ -145,10 +142,8 @@ class Indexer(AbstractModule):
cur_sum = 0
for root, dirs, files in os.walk(the_index_name):
cur_sum += sum(getsize(join(root, name)) for name in files)
return cur_sum
def move_index_into_old_index_folder(self):
for cur_file in os.listdir(self.baseindexpath):
if not cur_file == "old_index":

View file

@ -17,7 +17,6 @@ RSA private key, certificate messages
##################################
import os
import sys
import time
from enum import Enum
sys.path.append(os.environ['AIL_BIN'])
@ -25,7 +24,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
from lib.objects.Items import Item
class KeyEnum(Enum):
@ -53,10 +52,9 @@ class Keys(AbstractModule):
def __init__(self):
super(Keys, self).__init__()
# Waiting time in secondes between to message proccessed
# Waiting time in seconds between to message processed
self.pending_seconds = 1
def compute(self, message):
item = Item(message)
content = item.get_content()
@ -169,11 +167,12 @@ class Keys(AbstractModule):
if get_pgp_content:
self.send_message_to_queue(item.get_id(), 'PgpDump')
if find :
#Send to duplicate
self.send_message_to_queue(item.get_id(), 'Duplicate')
self.redis_logger.debug(f'{item.get_id()} has key(s)')
print(f'{item.get_id()} has key(s)')
# if find :
# # Send to duplicate
# self.send_message_to_queue(item.get_id(), 'Duplicate')
# self.redis_logger.debug(f'{item.get_id()} has key(s)')
# print(f'{item.get_id()} has key(s)')
if __name__ == '__main__':

View file

@ -11,7 +11,7 @@ sys.path.append(os.environ['AIL_BIN'])
from modules.abstract_module import AbstractModule
from lib.objects.Domains import Domain
from lib.objects.Items import Item
#from lib.ConfigLoader import ConfigLoader
# from lib.ConfigLoader import ConfigLoader
class Languages(AbstractModule):
"""
@ -31,6 +31,7 @@ class Languages(AbstractModule):
for lang in item.get_languages(min_probability=0.8):
domain.add_language(lang.language)
if __name__ == '__main__':
module = Languages()
module.run()

View file

@ -13,12 +13,12 @@ It tries to identify SQL Injections with libinjection.
import os
import sys
import redis
import urllib.request
import pylibinjection
from datetime import datetime
from pyfaup.faup import Faup
from urllib.parse import unquote
sys.path.append(os.environ['AIL_BIN'])
@ -27,7 +27,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader
from packages.Item import Item
from lib.objects.Items import Item
class LibInjection(AbstractModule):
"""docstring for LibInjection module."""
@ -43,38 +43,38 @@ class LibInjection(AbstractModule):
self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, message):
url, id = message.split()
url, item_id = message.split()
self.faup.decode(url)
url_parsed = self.faup.get()
## TODO: # FIXME: remove me
# # TODO: # FIXME: remove me
try:
resource_path = url_parsed['resource_path'].encode()
except:
resource_path = url_parsed['resource_path']
## TODO: # FIXME: remove me
# # TODO: # FIXME: remove me
try:
query_string = url_parsed['query_string'].encode()
except:
query_string = url_parsed['query_string']
result_path = {'sqli' : False}
result_query = {'sqli' : False}
result_path = {'sqli': False}
result_query = {'sqli': False}
if resource_path is not None:
result_path = pylibinjection.detect_sqli(resource_path)
#print(f'path is sqli : {result_path}')
# print(f'path is sqli : {result_path}')
if query_string is not None:
result_query = pylibinjection.detect_sqli(query_string)
#print(f'query is sqli : {result_query}')
# print(f'query is sqli : {result_query}')
if result_path['sqli'] is True or result_query['sqli'] is True:
item = Item(id)
item = Item(item_id)
item_id = item.get_id()
print(f"Detected (libinjection) SQL in URL: {item_id}")
print(urllib.request.unquote(url))
print(unquote(url))
to_print = f'LibInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}'
self.redis_logger.warning(to_print)
@ -86,8 +86,8 @@ class LibInjection(AbstractModule):
msg = f'infoleak:automatic-detection="sql-injection";{item_id}'
self.send_message_to_queue(msg, 'Tags')
#statistics
## TODO: # FIXME: remove me
# statistics
# # TODO: # FIXME: remove me
try:
tld = url_parsed['tld'].decode()
except:
@ -96,7 +96,7 @@ class LibInjection(AbstractModule):
date = datetime.now().strftime("%Y%m")
self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)
if __name__ == "__main__":
if __name__ == "__main__":
module = LibInjection()
module.run()

View file

@ -13,9 +13,7 @@ It apply mail regexes on item content and warn if above a threshold.
import os
import re
import redis
import sys
import time
import datetime
import dns.resolver
@ -52,7 +50,7 @@ class Mail(AbstractModule):
self.mail_threshold = 10
self.regex_timeout = 30
self.email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
self.email_regex = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
re.compile(self.email_regex)
def is_mxdomain_in_cache(self, mxdomain):
@ -64,8 +62,8 @@ class Mail(AbstractModule):
def check_mx_record(self, set_mxdomains):
"""Check if emails MX domains are responding.
:param adress_set: -- (set) This is a set of emails domains
:return: (int) Number of adress with a responding and valid MX domains
:param set_mxdomains: -- (set) This is a set of emails domains
:return: (int) Number of address with a responding and valid MX domains
"""
resolver = dns.resolver.Resolver()
@ -107,7 +105,7 @@ class Mail(AbstractModule):
self.redis_logger.debug('SyntaxError: EmptyLabel')
print('SyntaxError: EmptyLabel')
except dns.resolver.NXDOMAIN:
#save_mxdomain_in_cache(mxdomain)
# save_mxdomain_in_cache(mxdomain)
self.redis_logger.debug('The query name does not exist.')
print('The query name does not exist.')
except dns.name.LabelTooLong:
@ -115,12 +113,12 @@ class Mail(AbstractModule):
print('The Label is too long')
except dns.exception.Timeout:
print('dns timeout')
#save_mxdomain_in_cache(mxdomain)
# save_mxdomain_in_cache(mxdomain)
except Exception as e:
print(e)
return valid_mxdomain
# # TODO: sanityze mails
# # TODO: sanitize mails
def compute(self, message):
item_id, score = message.split()
item = Item(item_id)
@ -134,7 +132,7 @@ class Mail(AbstractModule):
mxdomains_email[mxdomain] = set()
mxdomains_email[mxdomain].add(mail)
## TODO: add MAIL trackers
# # TODO: add MAIL trackers
valid_mx = self.check_mx_record(mxdomains_email.keys())
print(f'valid_mx: {valid_mx}')
@ -144,7 +142,7 @@ class Mail(AbstractModule):
nb_mails = len(mxdomains_email[domain_mx])
num_valid_email += nb_mails
# Create doamin_mail stats
# Create domain_mail stats
msg = f'mail;{nb_mails};{domain_mx};{item_date}'
self.send_message_to_queue(msg, 'ModuleStats')
@ -159,8 +157,8 @@ class Mail(AbstractModule):
for tld in mx_tlds:
Statistics.add_module_tld_stats_by_date('mail', item_date, tld, mx_tlds[tld])
msg = f'Mails;{item.get_source()};{item_date};{item.get_basename()};Checked {num_valid_email} e-mail(s);{item_id}'
if num_valid_email > self.mail_threshold:
msg = f'Mails;{item.get_source()};{item_date};{item.get_basename()};Checked {num_valid_email} e-mail(s);{item_id}'
print(f'{item_id} Checked {num_valid_email} e-mail(s)')
self.redis_logger.warning(msg)
# Tags
@ -170,8 +168,6 @@ class Mail(AbstractModule):
self.redis_logger.info(msg)
if __name__ == '__main__':
module = Mail()
#module.compute('tests/2021/01/01/mails.gz 50')
module.run()

View file

@ -25,12 +25,11 @@ class ModuleStats(AbstractModule):
Module Statistics module for AIL framework
"""
def __init__(self):
super(ModuleStats, self).__init__()
# Waiting time in secondes between to message proccessed
# Waiting time in seconds between to message processed
self.pending_seconds = 20
def compute(self, message):
@ -38,9 +37,10 @@ class ModuleStats(AbstractModule):
# MODULE STATS
if len(message.split(';')) > 1:
module_name, num, keyword, date = message.split(';')
Statisticsupdate_module_stats(module_name, num, keyword, date)
Statistics.update_module_stats(module_name, num, keyword, date)
# ITEM STATS
else:
item_id = message
item = Item(item_id)
source = item.get_source()
date = item.get_date()

View file

@ -13,8 +13,6 @@ Requirements
*Need running Redis instances. (Redis)
"""
import time
import datetime
import os
import sys
import re
@ -25,68 +23,8 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader
from lib.objects.Items import Item
from lib import crawlers
from lib import regex_helper
from packages.Item import Item
## Manually fetch first page if crawler is disabled
# import base64
# import subprocess
#
# torclient_host = '127.0.0.1'
# torclient_port = 9050
#
# def fetch(p, r_cache, urls, domains):
# now = datetime.datetime.now()
# path = os.path.join('onions', str(now.year).zfill(4),
# str(now.month).zfill(2),
# str(now.day).zfill(2),
# str(int(time.mktime(now.utctimetuple()))))
# failed = []
# downloaded = []
# print('{} Urls to fetch'.format(len(urls)))
# for url, domain in zip(urls, domains):
# if r_cache.exists(url) or url in failed:
# continue
# to_fetch = base64.standard_b64encode(url.encode('utf8'))
# print('fetching url: {}'.format(to_fetch))
# process = subprocess.Popen(["python", './tor_fetcher.py', to_fetch],
# stdout=subprocess.PIPE)
# while process.poll() is None:
# time.sleep(1)
#
# if process.returncode == 0:
# r_cache.setbit(url, 0, 1)
# r_cache.expire(url, 360000)
# downloaded.append(url)
# print('downloaded : {}'.format(downloaded))
# '''tempfile = process.stdout.read().strip()
# tempfile = tempfile.decode('utf8')
# #with open(tempfile, 'r') as f:
# filename = path + domain + '.gz'
# fetched = f.read()
# content = base64.standard_b64decode(fetched)
# save_path = os.path.join(os.environ['AIL_HOME'],
# p.config.get("Directories", "pastes"),
# filename)
# dirname = os.path.dirname(save_path)
# if not os.path.exists(dirname):
# os.makedirs(dirname)
# with open(save_path, 'w') as ff:
# ff.write(content)
# p.populate_set_out(save_path, 'Global')
# p.populate_set_out(url, 'ValidOnion')
# p.populate_set_out(fetched, 'FetchedOnion')'''
# yield url
# #os.unlink(tempfile)
# else:
# r_cache.setbit(url, 0, 0)
# r_cache.expire(url, 3600)
# failed.append(url)
# print('Failed at downloading', url)
# print(process.stdout.read())
# print('Failed:', len(failed), 'Downloaded:', len(downloaded))
class Onion(AbstractModule):
"""docstring for Onion module."""
@ -103,68 +41,63 @@ class Onion(AbstractModule):
self.regex_timeout = 30
self.faup = crawlers.get_faup()
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
# activate_crawler = p.config.get("Crawler", "activate_crawler")
self.url_regex = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
self.i2p_regex = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
re.compile(self.url_regex)
re.compile(self.i2p_regex)
self.onion_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
# self.i2p_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
re.compile(self.onion_regex)
# re.compile(self.i2p_regex)
self.redis_logger.info(f"Module: {self.module_name} Launched")
# TEMP var: SAVE I2P Domain (future I2P crawler)
self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p")
# self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p")
def compute(self, message):
# list of tuples: (url, subdomains, domain)
urls_to_crawl = []
onion_urls = []
domains = []
id, score = message.split()
item = Item(id)
item_id, score = message.split()
item = Item(item_id)
item_content = item.get_content()
# max execution time on regex
res = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.url_regex, item.get_id(), item_content)
res = self.regex_findall(self.onion_regex, item.get_id(), item_content)
for x in res:
# String to tuple
x = x[2:-2].replace(" '", "").split("',")
url = x[0]
subdomain = x[4].lower()
self.faup.decode(url)
url_unpack = self.faup.get()
try: ## TODO: # FIXME: check faup version
domain = url_unpack['domain'].decode().lower()
except Exception as e:
domain = url_unpack['domain'].lower()
print(url)
# TODO Crawl subdomain
url_unpack = crawlers.unpack_url(url)
domain = url_unpack['domain']
if crawlers.is_valid_onion_domain(domain):
urls_to_crawl.append((url, subdomain, domain))
domains.append(domain)
onion_urls.append(url)
to_print = f'Onion;{item.get_source()};{item.get_date()};{item.get_basename()};'
if not urls_to_crawl:
self.redis_logger.info(f'{to_print}Onion related;{item.get_id()}')
return
if onion_urls:
if crawlers.is_crawler_activated():
for domain in domains:# TODO LOAD DEFAULT SCREENSHOT + HAR
task_uuid = crawlers.add_crawler_task(domain, parent=item.get_id())
if task_uuid:
print(f'{domain} added to crawler queue: {task_uuid}')
else:
to_print = f'Onion;{item.get_source()};{item.get_date()};{item.get_basename()};'
print(f'{to_print}Detected {len(domains)} .onion(s);{item.get_id()}')
self.redis_logger.warning(f'{to_print}Detected {len(domains)} .onion(s);{item.get_id()}')
# TAG Item
msg = f'infoleak:automatic-detection="onion";{item.get_id()}'
self.send_message_to_queue(msg, 'Tags')
# TAG Item
msg = f'infoleak:automatic-detection="onion";{item.get_id()}'
self.send_message_to_queue(msg, 'Tags')
if crawlers.is_crawler_activated():
for to_crawl in urls_to_crawl:
print(f'{to_crawl[2]} added to crawler queue: {to_crawl[0]}')
crawlers.add_item_to_discovery_queue('onion', to_crawl[2], to_crawl[1], to_crawl[0], item.get_id())
else:
print(f'{to_print}Detected {len(urls_to_crawl)} .onion(s);{item.get_id()}')
self.redis_logger.warning(f'{to_print}Detected {len(urls_to_crawl)} .onion(s);{item.get_id()}')
# keep manual fetcher ????
## Manually fetch first page if crawler is disabled
# for url in fetch(p, r_cache, urls, domains_list):
# publisher.info('{}Checked {};{}'.format(to_print, url, PST.p_rel_path))
if __name__ == "__main__":
module = Onion()
# module.compute('submitted/2022/10/10/submitted_705d1d92-7e9a-4a44-8c21-ccd167bfb7db.gz 9')
module.run()
# 5ajw6aqf3ep7sijnscdzw77t7xq4xjpsy335yb2wiwgouo7yfxtjlmid.onion to debian.org

View file

@ -17,7 +17,6 @@ It apply phone number regexes on item content and warn if above a threshold.
import os
import re
import sys
import time
import phonenumbers
sys.path.append(os.environ['AIL_BIN'])
@ -25,7 +24,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
from lib.objects.Items import Item
# # TODO: # FIXME: improve regex / filter false positives
class Phone(AbstractModule):
@ -37,14 +36,12 @@ class Phone(AbstractModule):
# reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
REG_PHONE = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
def __init__(self):
super(Phone, self).__init__()
# Waiting time in secondes between to message proccessed
# Waiting time in seconds between to message processed
self.pending_seconds = 1
def compute(self, message):
item = Item(message)
content = item.get_content()
@ -79,6 +76,5 @@ class Phone(AbstractModule):
if __name__ == '__main__':
module = Phone()
module.run()

View file

@ -14,11 +14,11 @@ It test different possibility to makes some sqlInjection.
import os
import sys
import re
import redis
import urllib.request
from datetime import datetime
from pyfaup.faup import Faup
from urllib.parse import unquote
sys.path.append(os.environ['AIL_BIN'])
##################################
@ -26,7 +26,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader
from packages.Item import Item
from lib.objects.Items import Item
class SQLInjectionDetection(AbstractModule):
"""docstring for SQLInjectionDetection module."""
@ -46,13 +46,13 @@ class SQLInjectionDetection(AbstractModule):
self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, message):
url, id = message.split()
url, item_id = message.split()
if self.is_sql_injection(url):
self.faup.decode(url)
url_parsed = self.faup.get()
item = Item(id)
item = Item(item_id)
item_id = item.get_id()
print(f"Detected SQL in URL: {item_id}")
print(urllib.request.unquote(url))
@ -69,7 +69,7 @@ class SQLInjectionDetection(AbstractModule):
# statistics
tld = url_parsed['tld']
if tld is not None:
## TODO: # FIXME: remove me
# # TODO: # FIXME: remove me
try:
tld = tld.decode()
except:
@ -77,15 +77,13 @@ class SQLInjectionDetection(AbstractModule):
date = datetime.now().strftime("%Y%m")
self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)
# Try to detect if the url passed might be an sql injection by appliying the regex
# Try to detect if the url passed might be an sql injection by applying the regex
# defined above on it.
def is_sql_injection(self, url_parsed):
line = urllib.request.unquote(url_parsed)
line = unquote(url_parsed)
return re.search(SQLInjectionDetection.SQLI_REGEX, line, re.I) is not None
if __name__ == "__main__":
module = SQLInjectionDetection()
module.run()

View file

@ -34,9 +34,8 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages import Paste
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
from lib.objects.Items import Item
from lib import ConfigLoader
class TimeoutException(Exception):
@ -53,12 +52,10 @@ class SentimentAnalysis(AbstractModule):
SentimentAnalysis module for AIL framework
"""
# Config Variables
accepted_Mime_type = ['text/plain']
line_max_length_threshold = 1000
def __init__(self):
super(SentimentAnalysis, self).__init__()
@ -75,7 +72,6 @@ class SentimentAnalysis(AbstractModule):
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message):
# Max time to compute one entry
signal.alarm(60)
@ -87,16 +83,31 @@ class SentimentAnalysis(AbstractModule):
else:
signal.alarm(0)
def get_p_content_with_removed_lines(self, threshold, item_content):
num_line_removed = 0
line_length_threshold = threshold
string_content = ""
f = item_content
for line_id, line in enumerate(f):
length = len(line)
if length < line_length_threshold:
string_content += line
else:
num_line_removed += 1
return num_line_removed, string_content
def analyse(self, message):
paste = Paste.Paste(message)
item = Item(message)
# get content with removed line + number of them
num_line_removed, p_content = paste.get_p_content_with_removed_lines(SentimentAnalysis.line_max_length_threshold)
provider = paste.p_source
p_date = str(paste._get_p_date())
p_MimeType = paste._get_p_encoding()
num_line_removed, p_content = self.get_p_content_with_removed_lines(SentimentAnalysis.line_max_length_threshold,
item.get_content())
provider = item.get_source()
p_date = item.get_date()
p_MimeType = item.get_mimetype()
# Perform further analysis
if p_MimeType == "text/plain":

View file

@ -20,7 +20,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
from lib.objects.Items import Item
from lib import Tag
@ -32,13 +32,12 @@ class Tags(AbstractModule):
def __init__(self):
super(Tags, self).__init__()
# Waiting time in secondes between to message proccessed
# Waiting time in seconds between to message processed
self.pending_seconds = 10
# Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized')
def compute(self, message):
# Extract item ID and tag from message
mess_split = message.split(';')
@ -62,6 +61,5 @@ class Tags(AbstractModule):
if __name__ == '__main__':
module = Tags()
module.run()

View file

@ -16,7 +16,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
from lib.objects.Items import Item
from lib import regex_helper
from lib import telegram
@ -78,7 +78,7 @@ class Telegram(AbstractModule):
# CREATE TAG
if invite_code_found:
#tags
# tags
msg = f'infoleak:automatic-detection="telegram-invite-hash";{item.get_id()}'
self.send_message_to_queue(msg, 'Tags')

View file

@ -13,7 +13,6 @@ This module extract URLs from an item and send them to others modules.
# Import External packages
##################################
import os
import re
import sys
from pyfaup.faup import Faup
@ -23,8 +22,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
from lib import regex_helper
from lib.objects.Items import Item
# # TODO: Faup packages: Add new binding: Check TLD
@ -40,7 +38,6 @@ class Urls(AbstractModule):
super(Urls, self).__init__()
self.faup = Faup()
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
# Protocol file path
protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
@ -58,21 +55,26 @@ class Urls(AbstractModule):
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message):
"""
Search for Web links from given message
"""
# Extract item
id, score = message.split()
item_id, score = message.split()
item = Item(id)
item = Item(item_id)
item_content = item.get_content()
l_urls = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.url_regex, item.get_id(), item_content)
# TODO Handle invalid URL
l_urls = self.regex_findall(self.url_regex, item.get_id(), item_content)
for url in l_urls:
self.faup.decode(url)
unpack_url = self.faup.get()
url_decoded = self.faup.get()
# decode URL
try:
url = url_decoded['url'].decode()
except AttributeError:
url = url_decoded['url']
to_send = f"{url} {item.get_id()}"
print(to_send)
@ -83,7 +85,7 @@ class Urls(AbstractModule):
to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};'
self.redis_logger.info(f'{to_print}Detected {len(l_urls)} URL;{item.get_id()}')
if __name__ == '__main__':
if __name__ == '__main__':
module = Urls()
module.run()

View file

@ -11,9 +11,8 @@ This module spots zerobins-like services for further processing
##################################
import os
import sys
import time
import pdb
import re
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
@ -42,33 +41,31 @@ class Zerobins(AbstractModule):
# Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized')
def computeNone(self):
"""
Compute when no message in queue
"""
self.redis_logger.debug("No message in queue")
def compute(self, message):
"""regex_helper.regex_findall(self.module_name, self.redis_cache_key
"""
Compute a message in queue
"""
print(message)
url, id = message.split()
url, item_id = message.split()
# Extract zerobins addresses
matching_binz = self.regex_findall(self.regex, id, url)
matching_binz = self.regex_findall(self.regex, item_id, url)
if len(matching_binz) > 0:
for bin in matching_binz:
print("send {} to crawler".format(bin))
crawlers.create_crawler_task(bin, screenshot=False, har=False, depth_limit=1, max_pages=1, auto_crawler=False, crawler_delta=3600, crawler_type=None, cookiejar_uuid=None, user_agent=None)
for bin_url in matching_binz:
print(f'send {bin_url} to crawler')
crawlers.add_crawler_task(bin_url, depth=0, har=False, screenshot=False, proxy='force_tor',
parent='manual', priority=10)
self.redis_logger.debug("Compute message in queue")
# TODO TEST ME
if __name__ == '__main__':
module = Zerobins()
module.run()
module.run()

View file

@ -59,6 +59,9 @@ class AbstractModule(ABC):
# Setup the I/O queues
self.process = Process(self.queue_name)
# Debug Mode
self.debug = False
def get_message(self):
"""
Get message from the Redis Queue (QueueIn)
@ -104,6 +107,8 @@ class AbstractModule(ABC):
# Module processing with the message from the queue
self.compute(message)
except Exception as err:
if self.debug:
raise err
trace = traceback.format_tb(err.__traceback__)
trace = ''.join(trace)
self.redis_logger.critical(f"Error in module {self.module_name}: {err}")

View file

@ -16,7 +16,6 @@ import os
import sys
import gzip
import io
import redis
import base64
import datetime
import time
@ -51,6 +50,7 @@ class SubmitPaste(AbstractModule):
"""
super(SubmitPaste, self).__init__(queue_name='submit_paste')
# TODO KVROCKS
self.r_serv_db = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_DB")
self.r_serv_log_submit = ConfigLoader.ConfigLoader().get_redis_conn("Redis_Log_submit")
self.r_serv_tags = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Tags")
@ -61,7 +61,6 @@ class SubmitPaste(AbstractModule):
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], ConfigLoader.ConfigLoader().get_config_str("Directories", "pastes")) + '/'
def compute(self, uuid):
"""
Main method of the Module to implement
@ -129,7 +128,6 @@ class SubmitPaste(AbstractModule):
self.redis_logger.debug(f'{self.module_name}, waiting for new message, Idling {self.pending_seconds}s')
time.sleep(self.pending_seconds)
def _manage_text(self, uuid, paste_content, ltags, ltagsgalaxies, source):
"""
Create a paste for given text
@ -141,7 +139,6 @@ class SubmitPaste(AbstractModule):
else:
self.abord_file_submission(uuid, f'Text size is over {SubmitPaste.TEXT_MAX_SIZE} bytes')
def _manage_file(self, uuid, file_full_path, ltags, ltagsgalaxies, source):
"""
Create a paste for given file
@ -230,7 +227,6 @@ class SubmitPaste(AbstractModule):
else:
self.abord_file_submission(uuid, "Server Error, the archive can't be found")
def _is_compressed_type(self, file_type):
"""
Check if file type is in the list of compressed file extensions format
@ -239,7 +235,6 @@ class SubmitPaste(AbstractModule):
return file_type in compressed_type
def remove_submit_uuid(self, uuid):
# save temp value on disk
self.r_serv_db.delete(f'{uuid}:ltags')
@ -262,7 +257,6 @@ class SubmitPaste(AbstractModule):
self.redis_logger.debug(f'{uuid} all file submitted')
print(f'{uuid} all file submitted')
def create_paste(self, uuid, paste_content, ltags, ltagsgalaxies, name, source=None):
# # TODO: Use Item create
@ -272,8 +266,8 @@ class SubmitPaste(AbstractModule):
source = source if source else 'submitted'
save_path = source + '/' + now.strftime("%Y") + '/' + now.strftime("%m") + '/' + now.strftime("%d") + '/submitted_' + name + '.gz'
full_path = filename = os.path.join(os.environ['AIL_HOME'],
self.process.config.get("Directories", "pastes"), save_path)
full_path = os.path.join(os.environ['AIL_HOME'],
self.process.config.get("Directories", "pastes"), save_path)
self.redis_logger.debug(f'file path of the paste {full_path}')
@ -281,7 +275,7 @@ class SubmitPaste(AbstractModule):
# file not exists in AIL paste directory
self.redis_logger.debug(f"new paste {paste_content}")
gzip64encoded = self._compress_encode_content(paste_content)
gzip64encoded = self._compress_encode_content(paste_content, uuid)
if gzip64encoded:
@ -321,36 +315,30 @@ class SubmitPaste(AbstractModule):
return result
def _compress_encode_content(self, content):
def _compress_encode_content(self, content, uuid):
gzip64encoded = None
try:
gzipencoded = gzip.compress(content)
gzip64encoded = base64.standard_b64encode(gzipencoded).decode()
except:
self.abord_file_submission(uuid, "file error")
return gzip64encoded
def addError(self, uuid, errorMessage):
self.redis_logger.debug(errorMessage)
print(errorMessage)
error = self.r_serv_log_submit.get(f'{uuid}:error')
if error != None:
if error is not None:
self.r_serv_log_submit.set(f'{uuid}:error', error + '<br></br>' + errorMessage)
self.r_serv_log_submit.incr(f'{uuid}:nb_end')
def abord_file_submission(self, uuid, errorMessage):
self.redis_logger.debug(f'abord {uuid}, {errorMessage}')
self.addError(uuid, errorMessage)
self.r_serv_log_submit.set(f'{uuid}:end', 1)
curr_date = datetime.date.today()
self.serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'submit_abord', 1)
self.serv_statistics.hincrby(curr_date.strftime("%Y%m%d"), 'submit_abord', 1)
self.remove_submit_uuid(uuid)
# # TODO: use Item function
@ -358,14 +346,13 @@ class SubmitPaste(AbstractModule):
l_directory = item_filename.split('/')
return f'{l_directory[-4]}{l_directory[-3]}{l_directory[-2]}'
def verify_extention_filename(self, filename):
if not '.' in filename:
return True
else:
file_type = filename.rsplit('.', 1)[1]
#txt file
# txt file
if file_type in SubmitPaste.ALLOWED_EXTENSIONS:
return True
else:
@ -373,6 +360,5 @@ class SubmitPaste(AbstractModule):
if __name__ == '__main__':
module = SubmitPaste()
module.run()

View file

@ -153,6 +153,9 @@ def sanitise_date_range(date_from, date_to, separator='', date_type='str'):
date_from = date_to
elif not date_to and date_from:
date_to = date_from
elif not date_to and not date_from:
date = datetime.date.today().strftime("%Y%m%d")
return {"date_from": date, "date_to": date}
if date_type=='str':
# remove separators

View file

@ -320,39 +320,6 @@ class HiddenServices(object):
har_path = os.path.join(self.screenshot_directory, item_path) + '.json'
return har_path
def create_domain_basic_archive(self, l_pastes):
all_har = self.get_all_har(l_pastes, filename=True)
all_screenshot = self.get_all_domain_screenshot(l_pastes, filename=True)
all_items = self.get_all_item_full_path(l_pastes, filename=True)
# try:
# zip buffer
zip_buffer = BytesIO()
with zipfile.ZipFile(zip_buffer, "a") as zf:
#print(all_har)
self.write_in_zip_buffer(zf, all_har)
self.write_in_zip_buffer(zf, all_screenshot)
self.write_in_zip_buffer(zf, all_items)
# write map url
map_file_content = self.get_metadata_file(l_pastes).encode()
zf.writestr( '_URL_MAP_', BytesIO(map_file_content).getvalue())
zip_buffer.seek(0)
return zip_buffer
# except Exception as e:
# print(e)
# return 'Server Error'
def write_in_zip_buffer(self, zf, list_file):
for file_path, file_name in list_file:
with open(file_path, "rb") as f:
har_content = f.read()
zf.writestr( file_name, BytesIO(har_content).getvalue())
def get_metadata_file(self, list_items):
file_content = ''

View file

@ -20,7 +20,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
# from lib.objects.Items import Item
class Template(AbstractModule):
"""
@ -36,19 +36,20 @@ class Template(AbstractModule):
# Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized')
def computeNone(self):
"""
Compute when no message in queue
Do something when there is no message in the queue
"""
self.redis_logger.debug("No message in queue")
def compute(self, message):
"""
Compute a message in queue
Compute a message in queue / process the message (item_id, ...)
"""
self.redis_logger.debug("Compute message in queue")
# # if message is an item_id:
# item = Item(message)
# content = item.get_content()
if __name__ == '__main__':

View file

@ -1,71 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import socks
import socket
import urllib.request
import io
import gzip
import base64
import sys
import tempfile
# Max size in Mb
max_size = 5
def create_connection(address, timeout=None, source_address=None):
sock = socks.socksocket()
sock.connect(address)
return sock
def get_page(url, torclient_host='127.0.0.1', torclient_port=9050):
request = urllib.request.Request(url)
# UA of the Tor browser bundle
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0')
return urllib.request.urlopen(request, timeout=5).read(max_size * 100000)
#FIXME don't work at all
def makegzip64(s):
out = io.BytesIO()
with gzip.GzipFile(fileobj=out, mode='ab') as fo:
fo.write(base64.standard_b64encode(s))
return out.getvalue()
if __name__ == "__main__":
if len(sys.argv) != 2:
print('usage:', 'tor_fetcher.py', 'URL (base64 encoded)')
exit(1)
try:
url = base64.standard_b64decode(sys.argv[1]).decode('utf8')
print(url)
except:
print('unable to decode')
exit(1)
torclient_host = '127.0.0.1'
torclient_port = 9050
# Setup Proxy
socks.set_default_proxy(socks.SOCKS5, torclient_host, torclient_port, True)
socket.socket = socks.socksocket
socket.create_connection = create_connection
try:
page = get_page(url)
except:
print('unable to fetch')
exit(1)
to_write = makegzip64(page)
t, path = tempfile.mkstemp()
#with open(path, 'w') as f:
#f.write(to_write)
print(path)
exit(0)

View file

@ -1,328 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import uuid
import datetime
import redis
import json
import time
from hashlib import sha256
from scrapy.spidermiddlewares.httperror import HttpError
from twisted.internet.error import DNSLookupError
from twisted.internet.error import TimeoutError
from twisted.web._newclient import ResponseNeverReceived
from scrapy import Spider
from scrapy.linkextractors import LinkExtractor
from scrapy.crawler import CrawlerProcess, Crawler
from scrapy_splash import SplashRequest, SplashJsonResponse
sys.path.append(os.environ['AIL_BIN'])
from Helper import Process
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import ConfigLoader
import Screenshot
import crawlers
script_cookie = """
function main(splash, args)
-- Default values
splash.js_enabled = true
splash.private_mode_enabled = true
splash.images_enabled = true
splash.webgl_enabled = true
splash.media_source_enabled = true
-- Force enable things
splash.plugins_enabled = true
splash.request_body_enabled = true
splash.response_body_enabled = true
splash.indexeddb_enabled = true
splash.html5_media_enabled = true
splash.http2_enabled = true
-- User Agent
splash:set_user_agent(args.user_agent)
-- User defined
splash.resource_timeout = args.resource_timeout
splash.timeout = args.timeout
-- Allow to pass cookies
splash:init_cookies(args.cookies)
-- Run
ok, reason = splash:go{args.url}
if not ok and not reason:find("http") then
return {
error = reason,
last_url = splash:url()
}
end
if reason == "http504" then
splash:set_result_status_code(504)
return ''
end
splash:wait{args.wait}
-- Page instrumentation
-- splash.scroll_position = {y=1000}
-- splash:wait{args.wait}
-- Response
return {
har = splash:har(),
html = splash:html(),
png = splash:png{render_all=true},
cookies = splash:get_cookies(),
last_url = splash:url(),
}
end
"""
class TorSplashCrawler():
def __init__(self, splash_url, crawler_options):
self.process = CrawlerProcess({'LOG_ENABLED': True})
self.crawler = Crawler(self.TorSplashSpider, {
'USER_AGENT': crawler_options['user_agent'], # /!\ overwritten by lua script
'SPLASH_URL': splash_url,
'ROBOTSTXT_OBEY': False,
'DOWNLOADER_MIDDLEWARES': {'scrapy_splash.SplashCookiesMiddleware': 723,
'scrapy_splash.SplashMiddleware': 725,
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,
},
'SPIDER_MIDDLEWARES': {'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,},
'DUPEFILTER_CLASS': 'scrapy_splash.SplashAwareDupeFilter',
'HTTPERROR_ALLOW_ALL': True,
'RETRY_TIMES': 2,
'CLOSESPIDER_PAGECOUNT': crawler_options['closespider_pagecount'],
'DEPTH_LIMIT': crawler_options['depth_limit'],
'SPLASH_COOKIES_DEBUG': False
})
def crawl(self, splash_url, type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item):
self.process.crawl(self.crawler, splash_url=splash_url, type=type, crawler_options=crawler_options, date=date, requested_mode=requested_mode, url=url, domain=domain, port=port, cookies=cookies, original_item=original_item)
self.process.start()
class TorSplashSpider(Spider):
name = 'TorSplashSpider'
def __init__(self, splash_url, type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item, *args, **kwargs):
self.splash_url = splash_url
self.domain_type = type
self.requested_mode = requested_mode
self.original_item = original_item
self.root_key = None
self.start_urls = url
self.domains = [domain]
self.port = str(port)
date_str = '{}/{}/{}'.format(date['date_day'][0:4], date['date_day'][4:6], date['date_day'][6:8])
self.full_date = date['date_day']
self.date_month = date['date_month']
self.date_epoch = int(date['epoch'])
self.user_agent = crawler_options['user_agent']
self.png = crawler_options['png']
self.har = crawler_options['har']
self.cookies = cookies
config_section = 'Crawler'
self.p = Process(config_section)
self.item_dir = os.path.join(self.p.config.get("Directories", "crawled"), date_str )
config_loader = ConfigLoader.ConfigLoader()
self.har_dir = os.path.join(config_loader.get_files_directory('har') , date_str )
config_loader = None
self.r_serv_log_submit = redis.StrictRedis(
host=self.p.config.get("Redis_Log_submit", "host"),
port=self.p.config.getint("Redis_Log_submit", "port"),
db=self.p.config.getint("Redis_Log_submit", "db"),
decode_responses=True)
self.root_key = None
def build_request_arg(self, cookies):
return {'wait': 10,
'resource_timeout': 30, # /!\ Weird behaviour if timeout < resource_timeout /!\
'timeout': 30,
'user_agent': self.user_agent,
'cookies': cookies,
'lua_source': script_cookie
}
def start_requests(self):
l_cookies = self.build_request_arg(self.cookies)
yield SplashRequest(
self.start_urls,
self.parse,
errback=self.errback_catcher,
endpoint='execute',
meta={'father': self.original_item, 'current_url': self.start_urls},
args=l_cookies
)
# # TODO: remove duplicate and anchor
def parse(self,response):
#print(response.headers)
#print(response.status)
#print(response.meta)
#print(response.data) # # TODO: handle lua script error
#{'type': 'ScriptError', 'info': {'error': "'}' expected (to close '{' at line 47) near 'error_retry'",
#'message': '[string "..."]:53: \'}\' expected (to close \'{\' at line 47) near \'error_retry\'',
#'type': 'LUA_INIT_ERROR', 'source': '[string "..."]', 'line_number': 53},
#'error': 400, 'description': 'Error happened while executing Lua script'}
if response.status == 504:
# no response
#print('504 detected')
pass
# LUA ERROR # # TODO: logs errors
elif 'error' in response.data:
if(response.data['error'] == 'network99'):
## splash restart ##
error_retry = response.meta.get('error_retry', 0)
if error_retry < 3:
error_retry += 1
url = response.data['last_url']
father = response.meta['father']
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url)
time.sleep(10)
if 'cookies' in response.data:
all_cookies = response.data['cookies'] # # TODO: use initial cookie ?????
else:
all_cookies = []
l_cookies = self.build_request_arg(all_cookies)
yield SplashRequest(
url,
self.parse,
errback=self.errback_catcher,
endpoint='execute',
dont_filter=True,
meta={'father': father, 'current_url': url, 'error_retry': error_retry},
args=l_cookies
)
else:
if self.requested_mode == 'test':
crawlers.save_test_ail_crawlers_result(False, 'Connection to proxy refused')
print('Connection to proxy refused')
elif response.data['error'] == 'network3':
if self.requested_mode == 'test':
crawlers.save_test_ail_crawlers_result(False, 'HostNotFoundError: the remote host name was not found (invalid hostname)')
print('HostNotFoundError: the remote host name was not found (invalid hostname)')
else:
if self.requested_mode == 'test':
crawlers.save_test_ail_crawlers_result(False, response.data['error'])
print(response.data['error'])
elif response.status != 200:
print('other response: {}'.format(response.status))
# detect connection to proxy refused
error_log = (json.loads(response.body.decode()))
print(error_log)
#elif crawlers.is_redirection(self.domains[0], response.data['last_url']):
# pass # ignore response
else:
## TEST MODE ##
if self.requested_mode == 'test':
if 'It works!' in response.data['html']:
crawlers.save_test_ail_crawlers_result(True, 'It works!')
else:
print('TEST ERROR')
crawlers.save_test_ail_crawlers_result(False, 'TEST ERROR')
return
## -- ##
item_id = crawlers.create_item_id(self.item_dir, self.domains[0])
self.save_crawled_item(item_id, response.data['html'])
crawlers.create_item_metadata(item_id, self.domains[0], response.data['last_url'], self.port, response.meta['father'])
if self.root_key is None:
self.root_key = item_id
crawlers.add_domain_root_item(item_id, self.domain_type, self.domains[0], self.date_epoch, self.port)
crawlers.create_domain_metadata(self.domain_type, self.domains[0], self.port, self.full_date, self.date_month)
if 'cookies' in response.data:
all_cookies = response.data['cookies']
else:
all_cookies = []
# SCREENSHOT
if 'png' in response.data and self.png:
sha256_string = Screenshot.save_crawled_screeshot(response.data['png'], 5000000, f_save=self.requested_mode)
if sha256_string:
Screenshot.save_item_relationship(sha256_string, item_id)
Screenshot.save_domain_relationship(sha256_string, self.domains[0])
# HAR
if 'har' in response.data and self.har:
crawlers.save_har(self.har_dir, item_id, response.data['har'])
le = LinkExtractor(allow_domains=self.domains, unique=True)
for link in le.extract_links(response):
l_cookies = self.build_request_arg(all_cookies)
yield SplashRequest(
link.url,
self.parse,
errback=self.errback_catcher,
endpoint='execute',
meta={'father': item_id, 'current_url': link.url},
args=l_cookies
)
def errback_catcher(self, failure):
# catch all errback failures,
self.logger.error(repr(failure))
if failure.check(ResponseNeverReceived):
## DEBUG ##
self.logger.error(failure.request)
if failure.value.response:
self.logger.error(failure.value.response)
## ----- ##
# Extract request metadata
url = failure.request.meta['current_url']
father = failure.request.meta['father']
l_cookies = self.build_request_arg(failure.request.meta['splash']['args']['cookies'])
# Check if Splash restarted
if not crawlers.is_splash_reachable(self.splash_url):
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 30s ...', url)
time.sleep(30)
yield SplashRequest(
url,
self.parse,
errback=self.errback_catcher,
endpoint='execute',
meta={'father': father, 'current_url': url},
args=l_cookies
)
else:
self.logger.error(failure.type)
self.logger.error(failure.getErrorMessage())
def save_crawled_item(self, item_id, item_content):
gzip64encoded = crawlers.save_crawled_item(item_id, item_content)
# Send item to queue
# send paste to Global
relay_message = "{0} {1}".format(item_id, gzip64encoded)
self.p.populate_set_out(relay_message, 'Mixer')
# increase nb of paste by feeder name
self.r_serv_log_submit.hincrby("mixer_cache:list_feeder", "crawler", 1)
# tag crawled paste
msg = 'infoleak:submission="crawler";{}'.format(item_id)
self.p.populate_set_out(msg, 'Tags')

View file

@ -1,80 +0,0 @@
#!/bin/bash
issplashed=`screen -ls | egrep '[0-9]+.Docker_Splash' | cut -d. -f1`
usage() { echo "Usage: sudo $0 [-f <config_absolute_path>] [-p <port_start>] [-n <number_of_splash_servers>]" 1>&2;
echo " -f: absolute path to splash docker proxy-profiles directory (used for proxy configuration)";
echo " -p: number of the first splash server port number. This number is incremented for the others splash server";
echo " -n: number of splash servers to start";
echo "";
echo " -options:";
echo " -u: max unbound in-memory cache (Mb, Restart Splash when full, default=3000 Mb)";
echo "";
echo "example:";
echo "sudo ./launch_splash_crawler.sh -f /home/my_user/AIL-framework/configs/docker/splash_onion/etc/splash/proxy-profiles/ -p 8050 -n 3";
exit 1;
}
while getopts ":p:f:n:u:" o; do
case "${o}" in
p)
p=${OPTARG}
;;
f)
f=${OPTARG}
;;
n)
n=${OPTARG}
;;
u)
u=${OPTARG}
;;
*)
usage
;;
esac
done
shift $((OPTIND-1))
if [ -z "${p}" ] || [ -z "${f}" ] || [ -z "${n}" ]; then
usage;
fi
RED="\\033[1;31m"
DEFAULT="\\033[0;39m"
GREEN="\\033[1;32m"
WHITE="\\033[0;02m"
if [ "$EUID" -ne 0 ]; then
echo -e $RED"\t* Please run as root or sudo.\n"$DEFAULT
exit 1
fi
if [ ! -d "${f}" ]; then
printf "$RED\n Error -f, proxy-profiles directory: $WHITE${f}$RED not found\n$DEFAULT Please check if you enter the correct path\n"
exit 1
fi
if [ ! -f "${f}default.ini" ]; then
printf "$RED\n Error -f, proxy configuration file:$WHITE default.ini$RED not found\n$DEFAULT Please check if you enter the correct path\n"
exit 1
fi
if [[ $issplashed ]]; then
echo -e $RED"\t* A screen is already launched, please kill it before creating another one."$DEFAULT
exit 1
fi
if [ -z "${u}" ]; then
u=3000;
fi
screen -dmS "Docker_Splash"
sleep 0.1
for ((i=0;i<=$((${n} - 1));i++)); do
port_number=$((${p} + $i))
screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -d -p '$port_number':8050 --restart=always --cpus=1 --memory=2G -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash --maxrss '$u'; read x'
sleep 0.1
printf "$GREEN Splash server launched on port $port_number$DEFAULT\n"
done

View file

@ -1,53 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import json
import redis
from TorSplashCrawler import TorSplashCrawler
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import crawlers
if __name__ == '__main__':
if len(sys.argv) != 2:
print('usage:', 'tor_crawler.py', 'uuid')
exit(1)
config_loader = ConfigLoader.ConfigLoader()
redis_cache = config_loader.get_redis_conn("Redis_Cache")
config_loader = None
# get crawler config key
uuid = sys.argv[1]
# get configs
crawler_json = json.loads(redis_cache.get('crawler_request:{}'.format(uuid)))
splash_url = crawler_json['splash_url']
service_type = crawler_json['service_type']
url = crawler_json['url']
domain = crawler_json['domain']
port = crawler_json['port']
original_item = crawler_json['item']
crawler_options = crawler_json['crawler_options']
date = crawler_json['date']
requested_mode = crawler_json['requested']
if crawler_options['cookiejar_uuid']:
cookies = crawlers.load_crawler_cookies(crawler_options['cookiejar_uuid'], domain, crawler_type=service_type)
else:
cookies = []
redis_cache.delete('crawler_request:{}'.format(uuid))
try:
crawler = TorSplashCrawler(splash_url, crawler_options)
crawler.crawl(splash_url, service_type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item)
except Exception as e:
print(e)
print(e, file=sys.stderr)

View file

@ -10,7 +10,6 @@ The Retro_Hunt trackers module
# Import External packages
##################################
import os
import re
import sys
import time
import yara
@ -20,15 +19,15 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
from packages.Item import Date
from lib.objects.Items import Item
from packages import Date
from lib import Tracker
import NotificationHelper # # TODO: refractor
class Retro_Hunt(AbstractModule):
#mail_body_template = "AIL Framework,\nNew YARA match: {}\nitem id: {}\nurl: {}{}"
# mail_body_template = "AIL Framework,\nNew YARA match: {}\nitem id: {}\nurl: {}{}"
"""
Retro_Hunt module for AIL framework
@ -39,9 +38,6 @@ class Retro_Hunt(AbstractModule):
self.full_item_url = self.process.config.get("Notifications", "ail_domain") + "/object/item?id="
self.refresh_deleta = 10
self.last_refresh = 0
# reset on each loop
self.task_uuid = None
self.date_from = 0
@ -49,13 +45,12 @@ class Retro_Hunt(AbstractModule):
self.nb_src_done = 0
self.progress = 0
self.item = None
self.tags = []
self.redis_logger.info(f"Module: {self.module_name} Launched")
# # TODO: send mails
# # TODO: # start_time
# end_time
# # TODO: # start_time # end_time
def compute(self, task_uuid):
self.redis_logger.warning(f'{self.module_name}, starting Retro hunt task {task_uuid}')
@ -75,7 +70,7 @@ class Retro_Hunt(AbstractModule):
self.tags = Tracker.get_retro_hunt_task_tags(task_uuid)
curr_date = Tracker.get_retro_hunt_task_current_date(task_uuid)
self.nb_src_done = Tracker.get_retro_hunt_task_nb_src_done(task_uuid, sources=sources)
self.progress = self.update_progress(sources, curr_date)
self.update_progress(sources, curr_date)
# iterate on date
filter_last = True
while int(curr_date) <= int(self.date_to):
@ -91,14 +86,15 @@ class Retro_Hunt(AbstractModule):
self.redis_logger.debug(f'{self.module_name}, Retro Hunt searching in directory {dir}')
l_obj = Tracker.get_items_to_analyze(dir)
for id in l_obj:
#print(f'{dir} / {id}')
# print(f'{dir} / {id}')
self.item = Item(id)
# save current item in cache
Tracker.set_cache_retro_hunt_task_id(task_uuid, id)
self.redis_logger.debug(f'{self.module_name}, Retro Hunt rule {task_uuid}, searching item {id}')
yara_match = rule.match(data=self.item.get_content(), callback=self.yara_rules_match, which_callbacks=yara.CALLBACK_MATCHES, timeout=timeout)
yara_match = rule.match(data=self.item.get_content(), callback=self.yara_rules_match,
which_callbacks=yara.CALLBACK_MATCHES, timeout=timeout)
# save last item
if nb_id % 10 == 0: # # TODO: Add nb before save in DB
@ -110,7 +106,7 @@ class Retro_Hunt(AbstractModule):
self.update_progress(sources, curr_date)
if Tracker.check_retro_hunt_pause(task_uuid):
Tracker.set_retro_hunt_last_analyzed(task_uuid, id)
#self.update_progress(sources, curr_date, save_db=True)
# self.update_progress(sources, curr_date, save_db=True)
Tracker.pause_retro_hunt_task(task_uuid)
Tracker.clear_retro_hunt_task_cache(task_uuid)
return None
@ -142,7 +138,7 @@ class Retro_Hunt(AbstractModule):
def yara_rules_match(self, data):
id = self.item.get_id()
#print(data)
# print(data)
task_uuid = data['namespace']
self.redis_logger.info(f'{self.module_name}, Retro hunt {task_uuid} match found: {id}')
@ -177,9 +173,9 @@ class Retro_Hunt(AbstractModule):
if task_uuid:
# Module processing with the message from the queue
self.redis_logger.debug(task_uuid)
#try:
# try:
self.compute(task_uuid)
#except Exception as err:
# except Exception as err:
# self.redis_logger.error(f'Error in module {self.module_name}: {err}')
# # Remove uuid ref
# self.remove_submit_uuid(uuid)

View file

@ -9,7 +9,6 @@ It processes every item coming from the global module and test the regex
"""
import os
import re
import sys
import time
import requests
@ -19,10 +18,9 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
from lib.objects.Items import Item
from packages import Term
from lib import Tracker
from lib import regex_helper
import NotificationHelper
@ -42,8 +40,6 @@ class Tracker_Regex(AbstractModule):
self.full_item_url = self.process.config.get("Notifications", "ail_domain") + "/object/item?id="
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
# refresh Tracked Regex
self.dict_regex_tracked = Term.get_regex_tracked_words_dict()
self.last_refresh = time.time()
@ -63,7 +59,7 @@ class Tracker_Regex(AbstractModule):
item_content = item.get_content()
for regex in self.dict_regex_tracked:
matched = regex_helper.regex_search(self.module_name, self.redis_cache_key, self.dict_regex_tracked[regex], item_id, item_content, max_time=self.max_execution_time)
matched = self.regex_findall(self.dict_regex_tracked[regex], item_id, item_content)
if matched:
self.new_tracker_found(regex, 'regex', item)
@ -92,8 +88,8 @@ class Tracker_Regex(AbstractModule):
if mail_to_notify:
mail_subject = Tracker.get_email_subject(tracker_uuid)
mail_body = Tracker_Regex.mail_body_template.format(tracker, item_id, self.full_item_url, item_id)
for mail in mail_to_notify:
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
for mail in mail_to_notify:
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
# Webhook
webhook_to_post = Term.get_term_webhook(tracker_uuid)

View file

@ -22,7 +22,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from modules.abstract_module import AbstractModule
import NotificationHelper
from packages.Item import Item
from lib.objects.Items import Item
from packages import Term
from lib import Tracker
@ -96,7 +96,7 @@ class Tracker_Term(AbstractModule):
# Term.create_token_statistics(item_date, word, dict_words_freq[word])
# check solo words
####### # TODO: check if source needed #######
# ###### # TODO: check if source needed #######
for word in self.list_tracked_words:
if word in dict_words_freq:
self.new_term_found(word, 'word', item)
@ -136,10 +136,10 @@ class Tracker_Term(AbstractModule):
if mail_to_notify:
mail_subject = Tracker.get_email_subject(term_uuid)
mail_body = Tracker_Term.mail_body_template.format(term, item_id, self.full_item_url, item_id)
for mail in mail_to_notify:
self.redis_logger.debug(f'Send Mail {mail_subject}')
print(f'S print(item_content)end Mail {mail_subject}')
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
for mail in mail_to_notify:
self.redis_logger.debug(f'Send Mail {mail_subject}')
print(f'S print(item_content)end Mail {mail_subject}')
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
# Webhook
webhook_to_post = Term.get_term_webhook(term_uuid)
@ -162,7 +162,6 @@ class Tracker_Term(AbstractModule):
self.redis_logger.error(f"Webhook request failed for {webhook_to_post}\nReason: Something went wrong")
if __name__ == '__main__':
module = Tracker_Term()
module.run()

View file

@ -8,7 +8,6 @@
# Import External packages
##################################
import os
import re
import sys
import time
import yara
@ -20,10 +19,10 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from modules.abstract_module import AbstractModule
from packages import Term
from packages.Item import Item
from lib.objects.Items import Item
from lib import Tracker
import NotificationHelper # # TODO: refactor
import NotificationHelper # # TODO: refactor
class Tracker_Yara(AbstractModule):
@ -46,7 +45,6 @@ class Tracker_Yara(AbstractModule):
self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, item_id):
# refresh YARA list
if self.last_refresh < Tracker.get_tracker_last_updated_by_type('yara'):
@ -58,7 +56,8 @@ class Tracker_Yara(AbstractModule):
self.item = Item(item_id)
item_content = self.item.get_content()
try:
yara_match = self.rules.match(data=item_content, callback=self.yara_rules_match, which_callbacks=yara.CALLBACK_MATCHES, timeout=60)
yara_match = self.rules.match(data=item_content, callback=self.yara_rules_match,
which_callbacks=yara.CALLBACK_MATCHES, timeout=60)
if yara_match:
self.redis_logger.info(f'{self.item.get_id()}: {yara_match}')
print(f'{self.item.get_id()}: {yara_match}')
@ -91,10 +90,10 @@ class Tracker_Yara(AbstractModule):
if mail_to_notify:
mail_subject = Tracker.get_email_subject(tracker_uuid)
mail_body = Tracker_Yara.mail_body_template.format(data['rule'], item_id, self.full_item_url, item_id)
for mail in mail_to_notify:
self.redis_logger.debug(f'Send Mail {mail_subject}')
print(f'Send Mail {mail_subject}')
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
for mail in mail_to_notify:
self.redis_logger.debug(f'Send Mail {mail_subject}')
print(f'Send Mail {mail_subject}')
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
# Webhook
webhook_to_post = Term.get_term_webhook(tracker_uuid)
@ -116,7 +115,6 @@ class Tracker_Yara(AbstractModule):
except:
self.redis_logger.error(f"Webhook request failed for {webhook_to_post}\nReason: Something went wrong")
return yara.CALLBACK_CONTINUE

View file

@ -262,14 +262,10 @@ db = 0
[Crawler]
activate_crawler = False
crawler_depth_limit = 1
default_crawler_har = True
default_crawler_png = True
default_crawler_closespider_pagecount = 50
default_crawler_user_agent = Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0
splash_url = http://127.0.0.1
splash_port = 8050-8052
domain_proxy = onion.foundation
default_depth_limit = 1
default_har = True
default_screenshot = True
onion_proxy = onion.foundation
[IP]
# list of comma-separated CIDR that you wish to be alerted for. e.g:

View file

@ -94,7 +94,7 @@ DEFAULT_HOME=$(pwd)
#### KVROCKS ####
test ! -d kvrocks/ && git clone https://github.com/apache/incubator-kvrocks.git kvrocks
pushd kvrocks
./build.sh build
./x.py build
popd
DEFAULT_KVROCKS_DATA=$DEFAULT_HOME/DATA_KVROCKS

View file

@ -1,11 +1,12 @@
pyail
pylacus
pymisp>=2.4.144
d4-pyclient>=0.1.6
thehive4py
# Core
redis==2.10.6
redis==3.0.0
python-magic>0.4.15
yara-python>4.0.2

View file

@ -40,6 +40,7 @@ class Test_Module_ApiKey(unittest.TestCase):
def setUp(self):
self.module_obj = ApiKey()
self.module_obj.debug = True
def test_module(self):
item_id = 'tests/2021/01/01/api_keys.gz'
@ -56,6 +57,7 @@ class Test_Module_Categ(unittest.TestCase):
def setUp(self):
self.module_obj = Categ()
self.module_obj.debug = True
def test_module(self):
item_id = 'tests/2021/01/01/categ.gz'
@ -69,14 +71,15 @@ class Test_Module_CreditCards(unittest.TestCase):
def setUp(self):
self.module_obj = CreditCards()
self.module_obj.debug = True
def test_module(self):
item_id = 'tests/2021/01/01/credit_cards.gz 7'
test_cards = ['341039324930797', # American Express
'6011613905509166', # Discover Card
'3547151714018657', # Japan Credit Bureau (JCB)
'5492981206527330', # 16 digits MasterCard
'4024007132849695', # '4532525919781' # 16-digit VISA, with separators
test_cards = ['341039324930797', # American Express
'6011613905509166', # Discover Card
'3547151714018657', # Japan Credit Bureau (JCB)
'5492981206527330', # 16 digits MasterCard
'4024007132849695', # '4532525919781' # 16-digit VISA, with separators
]
result = self.module_obj.compute(item_id, r_result=True)
@ -86,6 +89,7 @@ class Test_Module_DomClassifier(unittest.TestCase):
def setUp(self):
self.module_obj = DomClassifier()
self.module_obj.debug = True
def test_module(self):
test_host = 'foo.be'
@ -98,6 +102,7 @@ class Test_Module_Global(unittest.TestCase):
def setUp(self):
self.module_obj = Global()
self.module_obj.debug = True
def test_module(self):
# # TODO: delete item
@ -138,6 +143,7 @@ class Test_Module_Keys(unittest.TestCase):
def setUp(self):
self.module_obj = Keys()
self.module_obj.debug = True
def test_module(self):
item_id = 'tests/2021/01/01/keys.gz'
@ -148,6 +154,7 @@ class Test_Module_Onion(unittest.TestCase):
def setUp(self):
self.module_obj = Onion()
self.module_obj.debug = True
def test_module(self):
item_id = 'tests/2021/01/01/onion.gz'
@ -157,7 +164,7 @@ class Test_Module_Onion(unittest.TestCase):
self.module_obj.compute(f'{item_id} 3')
if crawlers.is_crawler_activated():
## check domain queues
# # check domain queues
# all domains queue
self.assertTrue(crawlers.is_domain_in_queue('onion', domain_1))
# all url/item queue
@ -177,11 +184,13 @@ class Test_Module_Telegram(unittest.TestCase):
def setUp(self):
self.module_obj = Telegram()
self.module_obj.debug = True
def test_module(self):
item_id = 'tests/2021/01/01/keys.gz'
# # TODO: check results
result = self.module_obj.compute(item_id)
if __name__ == '__main__':
unittest.main()

View file

@ -12,10 +12,8 @@ Requirements:
"""
import redis
import os
import time
from packages import Paste
from pubsublogger import publisher
from Helper import Process

View file

@ -37,8 +37,9 @@ def get_object_correlation_json(correlation_id, subtype, max_nodes):
object_type = 'cryptocurrency'
max_nodes = sanitise_nb_max_nodes(max_nodes)
# FIXME
# ALL correlations
correlation_names = Correlate_object.sanitise_correlation_names('')
#correlation_names = Correlate_object.sanitise_correlation_names('')
#correlation_objects = Correlate_object.sanitise_correlation_objects('')
correlation_objects = ['domain']

View file

@ -29,12 +29,10 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
from lib.Users import User
from lib import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
# Import config
import Flask_config
@ -50,14 +48,14 @@ from blueprints.hunters import hunters
from blueprints.old_endpoints import old_endpoints
from blueprints.ail_2_ail_sync import ail_2_ail_sync
from blueprints.settings_b import settings_b
from blueprints.objects_cve import objects_cve
from blueprints.objects_decoded import objects_decoded
from blueprints.objects_range import objects_range
Flask_dir = os.environ['AIL_FLASK']
# CONFIG #
config_loader = ConfigLoader.ConfigLoader()
config_loader = ConfigLoader()
baseUrl = config_loader.get_config_str("Flask", "baseurl")
host = config_loader.get_config_str("Flask", "host")
baseUrl = baseUrl.replace('/', '')
@ -111,8 +109,8 @@ app.register_blueprint(hunters, url_prefix=baseUrl)
app.register_blueprint(old_endpoints, url_prefix=baseUrl)
app.register_blueprint(ail_2_ail_sync, url_prefix=baseUrl)
app.register_blueprint(settings_b, url_prefix=baseUrl)
app.register_blueprint(objects_cve, url_prefix=baseUrl)
app.register_blueprint(objects_decoded, url_prefix=baseUrl)
app.register_blueprint(objects_range, url_prefix=baseUrl)
# ========= =========#
# ========= Cookie name ========
@ -162,33 +160,32 @@ for root, dirs, files in os.walk(os.path.join(Flask_dir, 'modules')):
if name == 'Flask_config.py':
continue
name = name.strip('.py')
#print('importing {}'.format(name))
importlib.import_module(name)
elif name == 'header_{}.html'.format(module_name):
with open(join(root, name), 'r') as f:
to_add_to_header_dico[module_name] = f.read()
#create header.html
# create header.html
complete_header = ""
with open(os.path.join(Flask_dir, 'templates', 'header_base.html'), 'r') as f:
complete_header = f.read()
modified_header = complete_header
#Add the header in the supplied order
# Add the header in the supplied order
for module_name, txt in list(to_add_to_header_dico.items()):
to_replace = '<!--{}-->'.format(module_name)
if to_replace in complete_header:
modified_header = modified_header.replace(to_replace, txt)
del to_add_to_header_dico[module_name]
#Add the header for no-supplied order
# Add the header for no-supplied order
to_add_to_header = []
for module_name, txt in to_add_to_header_dico.items():
to_add_to_header.append(txt)
modified_header = modified_header.replace('<!--insert here-->', '\n'.join(to_add_to_header))
#Write the header.html file
# Write the header.html file
with open(os.path.join(Flask_dir, 'templates', 'header.html'), 'w') as f:
f.write(modified_header)
@ -250,6 +247,7 @@ def page_not_found(e):
# avoid endpoint enumeration
return render_template('error/404.html'), 404
# ========== INITIAL taxonomies ============
default_taxonomies = ["infoleak", "gdpr", "fpf", "dark-web"]

View file

@ -26,22 +26,6 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from lib.objects import ail_objects
################################################################################
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import Correlate_object
import Domain
import Screenshot
import btc_ail
import Username
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Cryptocurrency
import Pgp
import Decoded
import Tag
bootstrap_label = Flask_config.bootstrap_label
vt_enabled = Flask_config.vt_enabled
@ -74,77 +58,15 @@ def sanitise_nb_max_nodes(nb_max_nodes):
nb_max_nodes = 300
return nb_max_nodes
def sanitise_correlation_names(correlation_names):
'''
correlation_names ex = 'pgp,crypto'
'''
all_correlation_names = Correlate_object.get_all_correlation_names()
if correlation_names is None:
return all_correlation_names
else:
l_correlation_names = []
for correl in correlation_names.split(','):
if correl in all_correlation_names:
l_correlation_names.append(correl)
if l_correlation_names:
return l_correlation_names
else:
return all_correlation_names
def sanitise_correlation_objects(correlation_objects):
'''
correlation_objects ex = 'domain,decoded'
'''
all_correlation_objects = Correlate_object.get_all_correlation_objects()
if correlation_objects is None:
return all_correlation_objects
else:
l_correlation_objects = []
for correl in correlation_objects.split(','):
if correl in all_correlation_objects:
l_correlation_objects.append(correl)
if l_correlation_objects:
return l_correlation_objects
else:
return all_correlation_objects
def get_card_metadata(object_type, correlation_id, type_id=None, expand_card=False):
card_dict = {}
if object_type == 'cryptocurrency':
card_dict["sparkline"] = Cryptocurrency.cryptocurrency.get_list_nb_previous_correlation_object(type_id, correlation_id, 6)
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, type_id)
if type_id == 'bitcoin' and expand_card:
card_dict["related_btc"] = btc_ail.get_bitcoin_info(correlation_id)
elif object_type == 'pgp':
card_dict["sparkline"] = Pgp.pgp.get_list_nb_previous_correlation_object(type_id, correlation_id, 6)
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, type_id)
elif object_type == 'username':
card_dict["sparkline"] = Username.correlation.get_list_nb_previous_correlation_object(type_id, correlation_id, 6)
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, type_id)
elif object_type == 'decoded':
card_dict["sparkline"] = Decoded.get_list_nb_previous_hash(correlation_id, 6)
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, value=correlation_id)
card_dict["vt"] = Decoded.get_decoded_vt_report(correlation_id)
card_dict["vt"]["status"] = vt_enabled
card_dict["add_tags_modal"] = Tag.get_modal_add_tags(correlation_id, object_type='decoded')
elif object_type == 'domain':
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, value=correlation_id)
card_dict["tags"] = Domain.get_domain_tags(correlation_id)
elif object_type == 'screenshot':
card_dict["add_tags_modal"] = Tag.get_modal_add_tags(correlation_id, object_type='image')
elif object_type == 'paste':
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, value=correlation_id)
return card_dict
# ============= ROUTES ==============
@correlation.route('/correlation/show_correlation', methods=['GET', 'POST']) # GET + POST
@correlation.route('/correlation/show', methods=['GET', 'POST']) # GET + POST
@login_required
@login_read_only
def show_correlation():
if request.method == 'POST':
object_type = request.form.get('object_type')
type_id = request.form.get('type_id')
correlation_id = request.form.get('correlation_id')
object_type = request.form.get('obj_type')
subtype = request.form.get('subtype')
obj_id = request.form.get('obj_id')
max_nodes = request.form.get('max_nb_nodes_in')
mode = request.form.get('mode')
if mode:
@ -153,73 +75,71 @@ def show_correlation():
mode = 'union'
## get all selected correlations
correlation_names = []
correlation_objects = []
#correlation_names
filter_types = []
correl_option = request.form.get('CveCheck')
if correl_option:
filter_types.append('cve')
correl_option = request.form.get('CryptocurrencyCheck')
if correl_option:
correlation_names.append('cryptocurrency')
filter_types.append('cryptocurrency')
correl_option = request.form.get('PgpCheck')
if correl_option:
correlation_names.append('pgp')
filter_types.append('pgp')
correl_option = request.form.get('UsernameCheck')
if correl_option:
correlation_names.append('username')
filter_types.append('username')
correl_option = request.form.get('DecodedCheck')
if correl_option:
correlation_names.append('decoded')
filter_types.append('decoded')
correl_option = request.form.get('ScreenshotCheck')
if correl_option:
correlation_names.append('screenshot')
filter_types.append('screenshot')
# correlation_objects
correl_option = request.form.get('DomainCheck')
if correl_option:
correlation_objects.append('domain')
correl_option = request.form.get('PasteCheck')
filter_types.append('domain')
correl_option = request.form.get('ItemCheck')
if correl_option:
correlation_objects.append('item')
filter_types.append('item')
# list as params
correlation_names = ",".join(correlation_names)
correlation_objects = ",".join(correlation_objects)
filter_types = ",".join(filter_types)
# redirect to keep history and bookmark
return redirect(url_for('correlation.show_correlation', object_type=object_type, type_id=type_id, correlation_id=correlation_id, mode=mode,
max_nodes=max_nodes, correlation_names=correlation_names, correlation_objects=correlation_objects))
return redirect(url_for('correlation.show_correlation', type=object_type, subtype=subtype, id=obj_id, mode=mode,
max_nodes=max_nodes, filter=filter_types))
# request.method == 'GET'
else:
object_type = request.args.get('object_type')
type_id = request.args.get('type_id')
correlation_id = request.args.get('correlation_id')
obj_type = request.args.get('type')
subtype = request.args.get('subtype', '')
obj_id = request.args.get('id')
max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes'))
mode = sanitise_graph_mode(request.args.get('mode'))
expand_card = request.args.get('expand_card')
related_btc = bool(request.args.get('expand_card', False))
correlation_names = ail_objects.sanitize_objs_types(request.args.get('correlation_names', '').split(','))
correlation_objects = ail_objects.sanitize_objs_types(request.args.get('correlation_objects', '').split(','))
filter_types = ail_objects.sanitize_objs_types(request.args.get('filter', '').split(','))
# # TODO: remove me, rename screenshot to image
if object_type == 'image':
object_type == 'screenshot'
if obj_type == 'image':
obj_type = 'screenshot'
# check if correlation_id exist
if not Correlate_object.exist_object(object_type, correlation_id, type_id=type_id):
# check if obj_id exist
if not ail_objects.exists_obj(obj_type, subtype, obj_id):
abort(404) # return 404
# oject exist
# object exist
else:
dict_object = {"object_type": object_type, "correlation_id": correlation_id}
dict_object["max_nodes"] = max_nodes
dict_object["mode"] = mode
dict_object["correlation_names"] = correlation_names
dict_object["correlation_names_str"] = ",".join(correlation_names)
dict_object["correlation_objects"] = correlation_objects
dict_object["correlation_objects_str"] = ",".join(correlation_objects)
dict_object["metadata"] = Correlate_object.get_object_metadata(object_type, correlation_id, type_id=type_id)
if type_id:
dict_object["metadata"]['type_id'] = type_id
dict_object["metadata_card"] = get_card_metadata(object_type, correlation_id, type_id=type_id, expand_card=expand_card)
dict_object = {"object_type": obj_type,
"correlation_id": obj_id,
"max_nodes": max_nodes, "mode": mode,
"filter": filter_types, "filter_str": ",".join(filter_types),
"metadata": ail_objects.get_object_meta(obj_type, subtype, obj_id, flask_context=True)
}
print(dict_object)
if subtype:
dict_object["metadata"]['type_id'] = subtype
dict_object["metadata_card"] = ail_objects.get_object_card_meta(obj_type, subtype, obj_id, related_btc=related_btc)
return render_template("show_correlation.html", dict_object=dict_object, bootstrap_label=bootstrap_label)
@correlation.route('/correlation/get/description')
@ -254,19 +174,17 @@ def get_description():
@login_required
@login_read_only
def graph_node_json():
obj_id = request.args.get('correlation_id') #######################3
subtype = request.args.get('type_id') #######################
obj_type = request.args.get('object_type') #######################
obj_id = request.args.get('id')
subtype = request.args.get('subtype')
obj_type = request.args.get('type')
max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes'))
correlation_names = ail_objects.sanitize_objs_types(request.args.get('correlation_names', '').split(','))
correlation_objects = ail_objects.sanitize_objs_types(request.args.get('correlation_objects', '').split(','))
filter_types = ail_objects.sanitize_objs_types(request.args.get('filter', '').split(','))
# # TODO: remove me, rename screenshot
if obj_type == 'image':
obj_type == 'screenshot'
obj_type = 'screenshot'
filter_types = correlation_names + correlation_objects
json_graph = ail_objects.get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=filter_types, max_nodes=max_nodes, level=2, flask_context=True)
#json_graph = Correlate_object.get_graph_node_object_correlation(obj_type, obj_id, 'union', correlation_names, correlation_objects, requested_correl_type=subtype, max_nodes=max_nodes)
return jsonify(json_graph)

View file

@ -6,11 +6,13 @@
'''
import os
import sys
import json
import random
import sys
import time
from datetime import datetime
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, make_response
from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, send_file, abort
from flask_login import login_required, current_user, login_user, logout_user
sys.path.append('modules')
@ -19,15 +21,6 @@ import Flask_config
# Import Role_Manager
from Role_Manager import login_admin, login_analyst, login_read_only
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Tag
sys.path.append(os.environ['AIL_BIN'])
##################################
@ -36,6 +29,10 @@ sys.path.append(os.environ['AIL_BIN'])
from lib import crawlers
from lib import Language
from lib.objects import Domains
from lib.objects.Items import Item
from lib import Tag
from packages import Date
from lib import Domain # # # # # # # # # # # # # # # # TODO:
@ -50,9 +47,9 @@ crawler_splash = Blueprint('crawler_splash', __name__, template_folder=os.path.j
# ============ FUNCTIONS ============
def api_validator(api_response):
if api_response:
return Response(json.dumps(api_response[0], indent=2, sort_keys=True), mimetype='application/json'), api_response[1]
def api_validator(message, code):
if message and code:
return Response(json.dumps(message, indent=2, sort_keys=True), mimetype='application/json'), code
def create_json_response(data, status_code):
return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code
@ -62,26 +59,26 @@ def create_json_response(data, status_code):
@login_required
@login_read_only
def crawlers_dashboard():
# # TODO: get splash manager status
is_manager_connected = crawlers.get_splash_manager_connection_metadata()
all_splash_crawler_status = crawlers.get_all_spash_crawler_status()
splash_crawlers_latest_stats = crawlers.get_splash_crawler_latest_stats()
is_manager_connected = crawlers.get_lacus_connection_metadata()
crawlers_status = crawlers.get_crawler_capture_status()
print(crawlers_status)
crawlers_latest_stats = crawlers.get_crawlers_stats()
print(crawlers_latest_stats)
date = crawlers.get_current_date()
return render_template("dashboard_splash_crawler.html", all_splash_crawler_status = all_splash_crawler_status,
is_manager_connected=is_manager_connected, date=date,
splash_crawlers_latest_stats=splash_crawlers_latest_stats)
return render_template("dashboard_crawler.html", date=date,
is_manager_connected=is_manager_connected,
crawlers_status=crawlers_status,
crawlers_latest_stats=crawlers_latest_stats)
@crawler_splash.route("/crawlers/crawler_dashboard_json", methods=['GET'])
@login_required
@login_read_only
def crawler_dashboard_json():
crawlers_status = crawlers.get_crawler_capture_status()
crawlers_latest_stats = crawlers.get_crawlers_stats()
all_splash_crawler_status = crawlers.get_all_spash_crawler_status()
splash_crawlers_latest_stats = crawlers.get_splash_crawler_latest_stats()
return jsonify({'all_splash_crawler_status': all_splash_crawler_status,
'splash_crawlers_latest_stats':splash_crawlers_latest_stats})
return jsonify({'crawlers_status': crawlers_status,
'stats': crawlers_latest_stats})
@crawler_splash.route("/crawlers/manual", methods=['GET'])
@login_required
@ -89,12 +86,12 @@ def crawler_dashboard_json():
def manual():
user_id = current_user.get_id()
l_cookiejar = crawlers.api_get_cookies_list_select(user_id)
all_crawlers_types = crawlers.get_all_crawlers_queues_types()
all_splash_name = crawlers.get_all_crawlers_to_launch_splash_name()
crawlers_types = crawlers.get_crawler_all_types()
proxies = [] # TODO HANDLE PROXIES
return render_template("crawler_manual.html",
is_manager_connected=crawlers.get_splash_manager_connection_metadata(),
all_crawlers_types=all_crawlers_types,
all_splash_name=all_splash_name,
is_manager_connected=crawlers.get_lacus_connection_metadata(),
crawlers_types=crawlers_types,
proxies=proxies,
l_cookiejar=l_cookiejar)
@crawler_splash.route("/crawlers/send_to_spider", methods=['POST'])
@ -106,17 +103,16 @@ def send_to_spider():
# POST val
url = request.form.get('url_to_crawl')
crawler_type = request.form.get('crawler_queue_type')
splash_name = request.form.get('splash_name')
auto_crawler = request.form.get('crawler_type')
crawler_delta = request.form.get('crawler_epoch')
proxy = request.form.get('proxy_name')
auto_crawler = request.form.get('crawler_type') # TODO Auto Crawler
crawler_delta = request.form.get('crawler_epoch') # TODO Auto Crawler
screenshot = request.form.get('screenshot')
har = request.form.get('har')
depth_limit = request.form.get('depth_limit')
max_pages = request.form.get('max_pages')
cookiejar_uuid = request.form.get('cookiejar')
if splash_name:
crawler_type = splash_name
if crawler_type == 'onion':
proxy = 'force_tor'
if cookiejar_uuid:
if cookiejar_uuid == 'None':
@ -125,13 +121,55 @@ def send_to_spider():
cookiejar_uuid = cookiejar_uuid.rsplit(':')
cookiejar_uuid = cookiejar_uuid[-1].replace(' ', '')
res = crawlers.api_create_crawler_task(user_id, url, screenshot=screenshot, har=har, depth_limit=depth_limit, max_pages=max_pages,
crawler_type=crawler_type,
auto_crawler=auto_crawler, crawler_delta=crawler_delta, cookiejar_uuid=cookiejar_uuid)
if res:
data = {'url': url, 'depth': depth_limit, 'har': har, 'screenshot': screenshot}
if proxy:
data['proxy'] = proxy
if cookiejar_uuid:
data['cookiejar'] = cookiejar_uuid
res = crawlers.api_add_crawler_task(data, user_id=user_id)
if res[1] != 200:
return create_json_response(res[0], res[1])
return redirect(url_for('crawler_splash.manual'))
@crawler_splash.route("/crawlers/last/domains", methods=['GET'])
@login_required
@login_read_only
def crawlers_last_domains():
domain_type = request.args.get('type')
if domain_type not in crawlers.get_crawler_all_types():
return jsonify({'error': 'Invalid domain type'}), 400
# TODO STAT by EPOCH
domains = []
for domain_row in crawlers.get_last_crawled_domains(domain_type):
domain, epoch = domain_row.split(':', 1)
dom = Domains.Domain(domain)
meta = dom.get_meta()
meta['epoch'] = epoch
meta['status_epoch'] = dom.is_up_by_epoch(epoch)
domains.append(meta)
crawler_stats = crawlers.get_crawlers_stats(domain_type=domain_type)
now = datetime.now()
date = now.strftime("%Y%m%d")
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
return render_template("last_crawled.html", domains=domains, type=domain_type,
is_manager_connected=crawlers.get_lacus_connection_metadata(),
date_from=date_string, date_to=date_string,
crawler_stats=crawler_stats)
@crawler_splash.route('/crawlers/last/domains/json')
@login_required
@login_read_only
def crawlers_last_domains_json():
domain_type = request.args.get('type')
if domain_type not in crawlers.get_crawler_all_types():
return jsonify({'error': 'Invalid domain type'}), 400
stats = []
for date in Date.get_date_range(7):
stats.append(crawlers.get_crawlers_stats_by_day(date, domain_type))
return jsonify(stats)
#### Domains ####
@ -143,36 +181,69 @@ def showDomain():
if request.method == 'POST':
domain_name = request.form.get('in_show_domain')
epoch = None
port = None
else:
domain_name = request.args.get('domain')
epoch = request.args.get('epoch')
port = request.args.get('port')
res = api_validator(Domain.api_verify_if_domain_exist(domain_name))
if res:
return res
try:
epoch = int(epoch)
except (ValueError, TypeError):
epoch = None
domain = Domains.Domain(domain_name)
dom = Domain.Domain(domain_name, port=port)
if not domain.exists():
abort(404)
dict_domain = dom.get_domain_metadata()
dict_domain['domain'] = domain_name
if dom.domain_was_up():
dict_domain = domain.get_meta(options=['last_origin', 'languages'])
dict_domain['domain'] = domain.id
if domain.was_up():
dict_domain = {**dict_domain, **domain.get_correlations()}
print(dict_domain)
dict_domain['correlation_nb'] = len(dict_domain['decoded']) + len(dict_domain['username']) + len(dict_domain['pgp']) + len(dict_domain['cryptocurrency']) + len(dict_domain['screenshot'])
dict_domain['father'] = dom.get_domain_father()
dict_domain['languages'] = Language.get_languages_from_iso(dom.get_domain_languages(), sort=True)
dict_domain['tags'] = dom.get_domain_tags()
dict_domain['tags_safe'] = Tag.is_tags_safe(dict_domain['tags'])
dict_domain['history'] = dom.get_domain_history_with_status()
dict_domain['crawler_history'] = dom.get_domain_items_crawled(items_link=True, epoch=epoch, item_screenshot=True, item_tag=True) # # TODO: handle multiple port
if dict_domain['crawler_history'].get('items', []):
dict_domain['crawler_history']['random_item'] = random.choice(dict_domain['crawler_history']['items'])
dict_domain['history'] = domain.get_history(status=True)
curr_epoch = None
# Select valid epoch
if epoch:
for row in dict_domain['history']:
if row['epoch'] == epoch:
curr_epoch = row['epoch']
break
else:
curr_epoch = -1
for row in dict_domain['history']:
if row['epoch'] > curr_epoch:
curr_epoch = row['epoch']
dict_domain['epoch'] = curr_epoch
dict_domain["date"] = time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(curr_epoch))
return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label,
modal_add_tags=Tag.get_modal_add_tags(dict_domain['domain'], object_type="domain"))
print(dict_domain['epoch'])
dict_domain['crawler_history_items'] = []
for item_id in domain.get_crawled_items_by_epoch(epoch):
dict_domain['crawler_history_items'].append(Item(item_id).get_meta(options=['crawler']))
if dict_domain['crawler_history_items']:
dict_domain['random_item'] = random.choice(dict_domain['crawler_history_items'])
return render_template("showDomain.html",
dict_domain=dict_domain, bootstrap_label=bootstrap_label,
modal_add_tags=Tag.get_modal_add_tags(dict_domain['domain'], object_type="domain"))
@crawler_splash.route('/crawlers/domain/download', methods=['GET'])
@login_required
@login_read_only
def crawlers_domain_download():
domain = request.args.get('domain')
epoch = request.args.get('epoch')
try:
epoch = int(epoch)
except (ValueError, TypeError):
epoch = None
dom = Domains.Domain(domain)
if not dom.exists():
abort(404)
zip_file = dom.get_download_zip(epoch=epoch)
if not zip_file:
abort(404)
return send_file(zip_file, download_name=f'{dom.get_id()}.zip', as_attachment=True)
@crawler_splash.route('/domains/explorer/domain_type_post', methods=['POST'])
@login_required
@ -304,13 +375,36 @@ def domains_search_name():
l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label,
domains_types=domains_types)
@crawler_splash.route('/domains/TODO', methods=['GET'])
@crawler_splash.route('/domains/date', methods=['GET'])
@login_required
@login_analyst
def domains_todo():
def domains_search_date():
# TODO sanitize type + date
domain_type = request.args.get('type')
last_domains = Domain.get_last_crawled_domains(domain_type)
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
# page = request.args.get('page')
date = Date.sanitise_date_range(date_from, date_to)
domains_date = Domains.get_domains_by_daterange(date['date_from'], date['date_to'], domain_type)
dict_domains = {}
for d in domains_date:
dict_domains[d] = Domains.get_domains_meta(domains_date[d])
date_from = f"{date['date_from'][0:4]}-{date['date_from'][4:6]}-{date['date_from'][6:8]}"
date_to = f"{date['date_to'][0:4]}-{date['date_to'][4:6]}-{date['date_to'][6:8]}"
return render_template("domains_daterange.html", date_from=date_from, date_to=date_to,
bootstrap_label=bootstrap_label,
dict_domains=dict_domains, type=domain_type)
@crawler_splash.route('/domains/date/post', methods=['POST'])
@login_required
@login_analyst
def domains_search_date_post():
domain_type = request.form.get('type')
date_from = request.form.get('date_from')
date_to = request.form.get('date_to')
return redirect(url_for('crawler_splash.domains_search_date', date_from=date_from, date_to=date_to, type=domain_type))
##-- --##
@ -521,49 +615,8 @@ def crawler_cookiejar_cookie_json_add_post():
return redirect(url_for('crawler_splash.crawler_cookiejar_cookie_add', cookiejar_uuid=cookiejar_uuid))
@crawler_splash.route('/crawler/settings', methods=['GET'])
@login_required
@login_analyst
def crawler_splash_setings():
all_proxies = crawlers.get_all_proxies_metadata()
all_splash = crawlers.get_all_splash_crawler_metadata()
splash_manager_url = crawlers.get_splash_manager_url()
api_key = crawlers.get_hidden_splash_api_key()
is_manager_connected = crawlers.get_splash_manager_connection_metadata(force_ping=True)
#--- Cookiejar ---#
nb_crawlers_to_launch = crawlers.get_nb_crawlers_to_launch()
#crawler_full_config = Config_DB.get_full_config_by_section('crawler')
is_crawler_working = crawlers.is_test_ail_crawlers_successful()
crawler_error_mess = crawlers.get_test_ail_crawlers_message()
return render_template("settings_splash_crawler.html",
is_manager_connected=is_manager_connected,
splash_manager_url=splash_manager_url, api_key=api_key,
all_splash=all_splash, all_proxies=all_proxies,
nb_crawlers_to_launch=nb_crawlers_to_launch,
is_crawler_working=is_crawler_working,
crawler_error_mess=crawler_error_mess,
#crawler_full_config=crawler_full_config
)
@crawler_splash.route('/crawler/settings/crawler_manager', methods=['GET', 'POST'])
@login_required
@login_admin
def crawler_splash_setings_crawler_manager():
if request.method == 'POST':
splash_manager_url = request.form.get('splash_manager_url')
api_key = request.form.get('api_key')
res = crawlers.api_save_splash_manager_url_api({'url':splash_manager_url, 'api_key':api_key})
if res[1] != 200:
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
else:
return redirect(url_for('crawler_splash.crawler_splash_setings'))
else:
splash_manager_url = crawlers.get_splash_manager_url()
api_key = crawlers.get_splash_api_key()
return render_template("settings_edit_splash_crawler_manager.html",
splash_manager_url=splash_manager_url, api_key=api_key)
@crawler_splash.route('/crawler/settings/crawlers_to_lauch', methods=['GET', 'POST'])
@login_required
@ -583,13 +636,6 @@ def crawler_splash_setings_crawlers_to_lauch():
return render_template("settings_edit_crawlers_to_launch.html",
nb_crawlers_to_launch=nb_crawlers_to_launch)
@crawler_splash.route('/crawler/settings/test_crawler', methods=['GET'])
@login_required
@login_admin
def crawler_splash_setings_test_crawler():
crawlers.test_ail_crawlers()
return redirect(url_for('crawler_splash.crawler_splash_setings'))
@crawler_splash.route('/crawler/settings/relaunch_crawler', methods=['GET'])
@login_required
@login_admin
@ -598,3 +644,59 @@ def crawler_splash_setings_relaunch_crawler():
return redirect(url_for('crawler_splash.crawler_splash_setings'))
## - - ##
#### LACUS ####
@crawler_splash.route('/crawler/settings', methods=['GET'])
@login_required
@login_analyst
def crawler_settings():
lacus_url = crawlers.get_lacus_url()
api_key = crawlers.get_hidden_lacus_api_key()
is_manager_connected = crawlers.get_lacus_connection_metadata(force_ping=True)
is_crawler_working = crawlers.is_test_ail_crawlers_successful()
crawler_error_mess = crawlers.get_test_ail_crawlers_message()
# TODO REGISTER PROXY
# all_proxies = crawlers.get_all_proxies_metadata()
# nb_crawlers_to_launch = crawlers.get_nb_crawlers_to_launch()
# crawler_full_config = Config_DB.get_full_config_by_section('crawler')
return render_template("settings_crawler.html",
is_manager_connected=is_manager_connected,
lacus_url=lacus_url, api_key=api_key,
#all_proxies=all_proxies,
#nb_crawlers_to_launch=nb_crawlers_to_launch,
is_crawler_working=is_crawler_working,
crawler_error_mess=crawler_error_mess,
)
@crawler_splash.route('/crawler/settings/crawler/manager', methods=['GET', 'POST'])
@login_required
@login_admin
def crawler_lacus_settings_crawler_manager():
if request.method == 'POST':
lacus_url = request.form.get('lacus_url')
api_key = request.form.get('api_key')
res = crawlers.api_save_lacus_url_key({'url': lacus_url, 'api_key': api_key})
print(res)
if res[1] != 200:
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
else:
return redirect(url_for('crawler_splash.crawler_settings'))
else:
lacus_url = crawlers.get_lacus_url()
api_key = crawlers.get_lacus_api_key()
return render_template("settings_edit_lacus_crawler.html", lacus_url=lacus_url, api_key=api_key)
@crawler_splash.route('/crawler/settings/crawler/test', methods=['GET'])
@login_required
@login_admin
def crawler_settings_crawler_test():
crawlers.test_ail_crawlers()
return redirect(url_for('crawler_splash.crawler_settings'))
#--- LACUS ---#

View file

@ -53,7 +53,7 @@ def show_investigation():
investigation_uuid = request.args.get("uuid")
investigation = Investigations.Investigation(investigation_uuid)
metadata = investigation.get_metadata(r_str=True)
objs = ail_objects.get_objects_meta(investigation.get_objects(), icon=True, url=True, flask_context=True)
objs = ail_objects.get_objects_meta(investigation.get_objects(), flask_context=True)
return render_template("view_investigation.html", bootstrap_label=bootstrap_label,
metadata=metadata, investigation_objs=objs)

View file

@ -0,0 +1,82 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
'''
import os
import sys
import json
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort, send_file
from flask_login import login_required, current_user
# Import Role_Manager
from Role_Manager import login_admin, login_analyst, login_read_only
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.objects import Cves
from packages import Date
# ============ BLUEPRINT ============
objects_cve = Blueprint('objects_cve', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/cve'))
# ============ VARIABLES ============
bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
# ============ FUNCTIONS ============
@objects_cve.route("/objects/cve", methods=['GET'])
@login_required
@login_read_only
def objects_cves():
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
show_objects = request.args.get('show_objects')
date = Date.sanitise_date_range(date_from, date_to)
date_from = date['date_from']
date_to = date['date_to']
# barchart_type
# correlation_type_search_endpoint
dict_objects = Cves.api_get_cves_meta_by_daterange(date_from, date_to)
print(date_from, date_to, dict_objects)
return render_template("CveDaterange.html", date_from=date_from, date_to=date_to,
dict_objects=dict_objects, show_objects=show_objects)
@objects_cve.route("/objects/cve/post", methods=['POST'])
@login_required
@login_read_only
def objects_cves_post():
date_from = request.form.get('date_from')
date_to = request.form.get('date_to')
show_objects = request.form.get('show_objects')
return redirect(url_for('objects_cve.objects_cves', date_from=date_from, date_to=date_to, show_objects=show_objects))
@objects_cve.route("/objects/cve/range/json", methods=['GET'])
@login_required
@login_read_only
def objects_cve_range_json():
return None
@objects_cve.route("/objects/cve/search", methods=['POST'])
@login_required
@login_read_only
def objects_cve_search():
to_search = request.form.get('object_id')
# TODO SANITIZE ID
# TODO Search all
cve = Cves.Cve(to_search)
if not cve.exists():
abort(404)
else:
return redirect(cve.get_link(flask_context=True))
# ============= ROUTES ==============

View file

@ -36,16 +36,16 @@ bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
# ============= ROUTES ==============
@objects_item.route("/object/item") #completely shows the paste in a new tab
@objects_item.route("/object/item")
@login_required
@login_read_only
def showItem(): # # TODO: support post
def showItem(): # # TODO: support post
item_id = request.args.get('id')
if not item_id or not item_basic.exist_item(item_id):
abort(404)
item = Item(item_id)
meta = item.get_meta(options=set(['content', 'crawler', 'duplicates', 'lines', 'size']))
meta = item.get_meta(options=['content', 'crawler', 'duplicates', 'lines', 'size'])
meta['name'] = meta['id'].replace('/', ' / ')
meta['father'] = item_basic.get_item_parent(item_id)
@ -94,4 +94,4 @@ def item_download(): # # TODO: support post
if not item_id or not item_basic.exist_item(item_id):
abort(404)
item = Item(item_id)
return send_file(item.get_raw_content(), attachment_filename=item_id, as_attachment=True)
return send_file(item.get_raw_content(), download_name=item_id, as_attachment=True)

View file

@ -17,7 +17,6 @@ import redis
import unicodedata
import uuid
from io import BytesIO
from Date import Date
from functools import wraps
@ -31,9 +30,9 @@ from flask_login import login_required
# Import Project packages
##################################
from lib import Tag
from lib.objects.Items import Item
import Paste
import Import_helper
from packages import Import_helper
from pytaxonomies import Taxonomies
from pymispgalaxies import Galaxies, Clusters
@ -98,8 +97,6 @@ def limit_content_length():
# ============ FUNCTIONS ============
def one():
return 1
def allowed_file(filename):
if not '.' in filename:
@ -126,15 +123,14 @@ def date_to_str(date):
def misp_create_event(distribution, threat_level_id, analysis, info, l_tags, publish, path):
paste = Paste.Paste(path)
source = path.split('/')[-6:]
source = '/'.join(source)[:-3]
item = Item(path)
source = item.get_source()
ail_uuid = r_serv_db.get('ail:uuid')
pseudofile = BytesIO(paste.get_p_content().encode())
pseudofile = BytesIO(item.get_content(binary=True))
temp = paste._get_p_duplicate()
temp = item.get_duplicates()
#beautifier
# beautifier
if not temp:
temp = ''
@ -181,7 +177,7 @@ def misp_create_event(distribution, threat_level_id, analysis, info, l_tags, pub
leak_obj = MISPObject(obj_name)
leak_obj.add_attribute('sensor', value=ail_uuid, type="text")
leak_obj.add_attribute('origin', value=source, type='text')
leak_obj.add_attribute('last-seen', value=date_to_str(paste.p_date), type='datetime')
leak_obj.add_attribute('last-seen', value=date_to_str(item.get_date()), type='datetime')
leak_obj.add_attribute('raw-data', value=source, data=pseudofile, type="attachment")
if p_duplicate_number > 0:
@ -192,7 +188,8 @@ def misp_create_event(distribution, threat_level_id, analysis, info, l_tags, pub
templateID = [x['ObjectTemplate']['id'] for x in pymisp.get_object_templates_list()['response'] if x['ObjectTemplate']['name'] == obj_name][0]
except IndexError:
valid_types = ", ".join([x['ObjectTemplate']['name'] for x in pymisp.get_object_templates_list()])
print ("Template for type {} not found! Valid types are: {%s}".format(obj_name, valid_types))
print (f"Template for type {obj_name} not found! Valid types are: {valid_types}")
return False
r = pymisp.add_object(eventid, templateID, leak_obj)
if 'errors' in r:
print(r)
@ -206,7 +203,7 @@ def hive_create_case(hive_tlp, threat_level, hive_description, hive_case_title,
ail_uuid = r_serv_db.get('ail:uuid')
source = path.split('/')[-6:]
source = '/'.join(source)[:-3]
# get paste date
# get item date
var = path.split('/')
last_seen = "{0}-{1}-{2}".format(var[-4], var[-3], var[-2])

View file

@ -41,45 +41,6 @@ dic_type_name={'onion':'Onion', 'regular':'Website'}
# ============ FUNCTIONS ============
def one():
return 1
def get_date_range(num_day):
curr_date = datetime.date.today()
date = Date( '{}{}{}'.format(str(curr_date.year), str(curr_date.month).zfill(2), str(curr_date.day).zfill(2)) )
date_list = []
for i in range(0, num_day):
date_list.append(date.substract_day(i))
return list(reversed(date_list))
def substract_date(date_from, date_to):
date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
delta = date_to - date_from # timedelta
l_date = []
for i in range(delta.days + 1):
date = date_from + datetime.timedelta(i)
l_date.append( date.strftime('%Y%m%d') )
return l_date
def unpack_paste_tags(p_tags):
l_tags = []
for tag in p_tags:
complete_tag = tag
tag = tag.split('=')
if len(tag) > 1:
if tag[1] != '':
tag = tag[1][1:-1]
# no value
else:
tag = tag[0][1:-1]
# use for custom tags
else:
tag = tag[0]
l_tags.append( (tag, complete_tag) )
return l_tags
def is_valid_domain(domain):
faup.decode(domain)
@ -89,26 +50,6 @@ def is_valid_domain(domain):
else:
return False
def is_valid_service_type(service_type):
accepted_service = ['onion', 'regular']
if service_type in accepted_service:
return True
else:
return False
def get_onion_status(domain, date):
if r_serv_onion.sismember('onion_up:'+date , domain):
return True
else:
return False
def get_domain_type(domain):
type_id = domain.split(':')[-1]
if type_id == 'onion':
return 'onion'
else:
return 'regular'
def get_type_domain(domain):
if domain is None:
type = 'regular'
@ -133,18 +74,6 @@ def get_domain_from_url(url):
def get_last_domains_crawled(type): # DONE
return r_serv_onion.lrange('last_{}'.format(type), 0 ,-1)
def get_nb_domains_inqueue(type):
nb = r_serv_onion.scard('{}_crawler_queue'.format(type))
nb += r_serv_onion.scard('{}_crawler_priority_queue'.format(type))
return nb
def get_stats_last_crawled_domains(type, date):
statDomains = {}
statDomains['domains_up'] = r_serv_onion.scard('{}_up:{}'.format(type, date))
statDomains['domains_down'] = r_serv_onion.scard('{}_down:{}'.format(type, date))
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
statDomains['domains_queue'] = get_nb_domains_inqueue(type)
return statDomains
def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None, auto_mode=False):
list_crawled_metadata = []
@ -201,22 +130,6 @@ def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None, aut
list_crawled_metadata.append(metadata_domain)
return list_crawled_metadata
def get_crawler_splash_status(type):
crawler_metadata = []
all_crawlers = r_cache.smembers('{}_crawlers'.format(type))
for crawler in all_crawlers:
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
crawler_info = '{} - {}'.format(crawler, started_time)
if status_info=='Waiting' or status_info=='Crawling':
status=True
else:
status=False
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
return crawler_metadata
def delete_auto_crawler(url):
domain = get_domain_from_url(url)
type = get_type_domain(domain)
@ -231,67 +144,6 @@ def delete_auto_crawler(url):
# ============= ROUTES ==============
# @hiddenServices.route("/crawlers/", methods=['GET'])
# @login_required
# @login_read_only
# def dashboard():
# crawler_metadata_onion = get_crawler_splash_status('onion')
# crawler_metadata_regular = get_crawler_splash_status('regular')
#
# now = datetime.datetime.now()
# date = now.strftime("%Y%m%d")
# statDomains_onion = get_stats_last_crawled_domains('onion', date)
# statDomains_regular = get_stats_last_crawled_domains('regular', date)
#
# return render_template("Crawler_dashboard.html", crawler_metadata_onion = crawler_metadata_onion,
# date=date,
# crawler_metadata_regular=crawler_metadata_regular,
# statDomains_onion=statDomains_onion, statDomains_regular=statDomains_regular)
@hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET'])
@login_required
@login_read_only
def crawler_splash_onion():
type = 'onion'
last_onions = get_last_domains_crawled(type)
list_onion = []
now = datetime.datetime.now()
date = now.strftime("%Y%m%d")
statDomains = get_stats_last_crawled_domains(type, date)
list_onion = get_last_crawled_domains_metadata(last_onions, date, type=type)
crawler_metadata = get_crawler_splash_status(type)
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
return render_template("Crawler_Splash_onion.html", last_onions=list_onion, statDomains=statDomains,
crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
@hiddenServices.route("/crawlers/Crawler_Splash_last_by_type", methods=['GET'])
@login_required
@login_read_only
def Crawler_Splash_last_by_type():
type = request.args.get('type')
# verify user input
if type not in list_types:
type = 'onion'
type_name = dic_type_name[type]
list_domains = []
now = datetime.datetime.now()
date = now.strftime("%Y%m%d")
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
statDomains = get_stats_last_crawled_domains(type, date)
list_domains = get_last_crawled_domains_metadata(get_last_domains_crawled(type), date, type=type)
crawler_metadata = get_crawler_splash_status(type)
return render_template("Crawler_Splash_last_by_type.html", type=type, type_name=type_name,
is_manager_connected=crawlers.get_splash_manager_connection_metadata(),
last_domains=list_domains, statDomains=statDomains,
crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
@hiddenServices.route("/crawlers/blacklisted_domains", methods=['GET'])
@login_required
@login_read_only
@ -424,7 +276,7 @@ def auto_crawler():
return render_template("Crawler_auto.html", page=page, nb_page_max=nb_page_max,
last_domains=last_domains,
is_manager_connected=crawlers.get_splash_manager_connection_metadata(),
is_manager_connected=crawlers.get_lacus_connection_metadata(),
auto_crawler_domain_onions_metadata=auto_crawler_domain_onions_metadata,
auto_crawler_domain_regular_metadata=auto_crawler_domain_regular_metadata)
@ -439,285 +291,6 @@ def remove_auto_crawler():
delete_auto_crawler(url)
return redirect(url_for('hiddenServices.auto_crawler', page=page))
# # TODO: refractor
@hiddenServices.route("/hiddenServices/last_crawled_domains_with_stats_json", methods=['GET'])
@login_required
@login_read_only
def last_crawled_domains_with_stats_json():
last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
list_onion = []
now = datetime.datetime.now()
date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d"))
statDomains = {}
statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
for onion in last_onions:
metadata_onion = {}
metadata_onion['domain'] = onion
metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check')
if metadata_onion['last_check'] is None:
metadata_onion['last_check'] = '********'
metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen')
if metadata_onion['first_seen'] is None:
metadata_onion['first_seen'] = '********'
if get_onion_status(onion, metadata_onion['last_check']):
metadata_onion['status_text'] = 'UP'
metadata_onion['status_color'] = 'Green'
metadata_onion['status_icon'] = 'fa-check-circle'
else:
metadata_onion['status_text'] = 'DOWN'
metadata_onion['status_color'] = 'Red'
metadata_onion['status_icon'] = 'fa-times-circle'
list_onion.append(metadata_onion)
crawler_metadata=[]
all_onion_crawler = r_cache.smembers('all_crawler:onion')
for crawler in all_onion_crawler:
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
crawler_info = '{} - {}'.format(crawler, started_time)
if status_info=='Waiting' or status_info=='Crawling':
status=True
else:
status=False
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
return jsonify({'last_onions': list_onion, 'statDomains': statDomains, 'crawler_metadata':crawler_metadata})
@hiddenServices.route("/hiddenServices/get_onions_by_daterange", methods=['POST'])
@login_required
@login_read_only
def get_onions_by_daterange():
date_from = request.form.get('date_from')
date_to = request.form.get('date_to')
service_type = request.form.get('service_type')
domains_up = request.form.get('domains_up')
domains_down = request.form.get('domains_down')
domains_tags = request.form.get('domains_tags')
return redirect(url_for('hiddenServices.show_domains_by_daterange', date_from=date_from, date_to=date_to, service_type=service_type, domains_up=domains_up, domains_down=domains_down, domains_tags=domains_tags))
@hiddenServices.route("/hiddenServices/show_domains_by_daterange", methods=['GET'])
@login_required
@login_read_only
def show_domains_by_daterange():
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
service_type = request.args.get('service_type')
domains_up = request.args.get('domains_up')
domains_down = request.args.get('domains_down')
domains_tags = request.args.get('domains_tags')
# incorrect service type
if not is_valid_service_type(service_type):
service_type = 'onion'
type_name = dic_type_name[service_type]
date_range = []
if date_from is not None and date_to is not None:
#change format
try:
if len(date_from) != 8:
date_from = date_from[0:4] + date_from[5:7] + date_from[8:10]
date_to = date_to[0:4] + date_to[5:7] + date_to[8:10]
date_range = substract_date(date_from, date_to)
except:
pass
if not date_range:
date_range.append(datetime.date.today().strftime("%Y%m%d"))
date_from = date_range[0][0:4] + '-' + date_range[0][4:6] + '-' + date_range[0][6:8]
date_to = date_from
else:
date_from = date_from[0:4] + '-' + date_from[4:6] + '-' + date_from[6:8]
date_to = date_to[0:4] + '-' + date_to[4:6] + '-' + date_to[6:8]
statDomains = {}
statDomains['domains_up'] = 0
statDomains['domains_down'] = 0
statDomains['total'] = 0
statDomains['domains_queue'] = get_nb_domains_inqueue(service_type)
domains_by_day = {}
domain_metadata = {}
stats_by_date = {}
for date in date_range:
stats_by_date[date] = {}
stats_by_date[date]['domain_up'] = 0
stats_by_date[date]['domain_down'] = 0
if domains_up:
domains_up = True
domains_by_day[date] = list(r_serv_onion.smembers('{}_up:{}'.format(service_type, date)))
for domain in domains_by_day[date]:
h = HiddenServices(domain, 'onion')
domain_metadata[domain] = {}
if domains_tags:
domains_tags = True
domain_metadata[domain]['tags'] = h.get_domain_tags(update=True)
domain_metadata[domain]['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'last_check')
if domain_metadata[domain]['last_check'] is None:
domain_metadata[domain]['last_check'] = '********'
domain_metadata[domain]['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'first_seen')
if domain_metadata[domain]['first_seen'] is None:
domain_metadata[domain]['first_seen'] = '********'
domain_metadata[domain]['status_text'] = 'UP'
domain_metadata[domain]['status_color'] = 'Green'
domain_metadata[domain]['status_icon'] = 'fa-check-circle'
statDomains['domains_up'] += 1
stats_by_date[date]['domain_up'] += 1
if domains_down:
domains_down = True
domains_by_day_down = list(r_serv_onion.smembers('{}_down:{}'.format(service_type, date)))
if domains_up:
domains_by_day[date].extend(domains_by_day_down)
else:
domains_by_day[date] = domains_by_day_down
for domain in domains_by_day_down:
#h = HiddenServices(onion_domain, 'onion')
domain_metadata[domain] = {}
#domain_metadata[domain]['tags'] = h.get_domain_tags()
domain_metadata[domain]['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'last_check')
if domain_metadata[domain]['last_check'] is None:
domain_metadata[domain]['last_check'] = '********'
domain_metadata[domain]['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'first_seen')
if domain_metadata[domain]['first_seen'] is None:
domain_metadata[domain]['first_seen'] = '********'
domain_metadata[domain]['status_text'] = 'DOWN'
domain_metadata[domain]['status_color'] = 'Red'
domain_metadata[domain]['status_icon'] = 'fa-times-circle'
statDomains['domains_down'] += 1
stats_by_date[date]['domain_down'] += 1
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
return render_template("domains.html", date_range=date_range, domains_by_day=domains_by_day,
statDomains=statDomains, type_name=type_name,
domain_metadata=domain_metadata,
stats_by_date=stats_by_date,
date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down,
domains_tags=domains_tags, type=service_type, bootstrap_label=bootstrap_label)
@hiddenServices.route("/crawlers/download_domain", methods=['GET'])
@login_required
@login_read_only
@no_cache
def download_domain():
domain = request.args.get('domain')
epoch = request.args.get('epoch')
try:
epoch = int(epoch)
except:
epoch = None
port = request.args.get('port')
faup.decode(domain)
unpack_url = faup.get()
## TODO: # FIXME: remove me
try:
domain = unpack_url['domain'].decode()
except:
domain = unpack_url['domain']
if not port:
if unpack_url['port']:
try:
port = unpack_url['port'].decode()
except:
port = unpack_url['port']
else:
port = 80
try:
port = int(port)
except:
port = 80
type = get_type_domain(domain)
if domain is None or not r_serv_onion.exists('{}_metadata:{}'.format(type, domain)):
return '404'
# # TODO: FIXME return 404
origin_paste = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'paste_parent')
h = HiddenServices(domain, type, port=port)
item_core = h.get_domain_crawled_core_item(epoch=epoch)
if item_core:
l_pastes = h.get_last_crawled_pastes(item_root=item_core['root_item'])
else:
l_pastes = []
#dict_links = h.get_all_links(l_pastes)
zip_file = h.create_domain_basic_archive(l_pastes)
filename = domain + '.zip'
return send_file(zip_file, attachment_filename=filename, as_attachment=True)
@hiddenServices.route("/hiddenServices/onion_son", methods=['GET'])
@login_required
@login_analyst
def onion_son():
onion_domain = request.args.get('onion_domain')
h = HiddenServices(onion_domain, 'onion')
l_pastes = h.get_last_crawled_pastes()
l_son = h.get_domain_son(l_pastes)
return 'l_son'
# ============= JSON ==============
@hiddenServices.route("/hiddenServices/domain_crawled_7days_json", methods=['GET'])
@login_required
@login_read_only
def domain_crawled_7days_json():
type = 'onion'
## TODO: # FIXME: 404 error
date_range = get_date_range(7)
json_domain_stats = []
#try:
for date in date_range:
nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date))
nb_domain_down = r_serv_onion.scard('{}_up:{}'.format(type, date))
date = date[0:4] + '-' + date[4:6] + '-' + date[6:8]
json_domain_stats.append({ 'date': date, 'value': int( nb_domain_up ), 'nb_domain_down': int( nb_domain_down )})
#except:
#return jsonify()
return jsonify(json_domain_stats)
@hiddenServices.route('/hiddenServices/domain_crawled_by_type_json')
@login_required
@login_read_only
def domain_crawled_by_type_json():
current_date = request.args.get('date')
type = request.args.get('type')
if type in list_types:
num_day_type = 7
date_range = get_date_range(num_day_type)
range_decoder = []
for date in date_range:
day_crawled = {}
day_crawled['date']= date[0:4] + '-' + date[4:6] + '-' + date[6:8]
day_crawled['UP']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date))
day_crawled['DOWN']= nb_domain_up = r_serv_onion.scard('{}_down:{}'.format(type, date))
range_decoder.append(day_crawled)
return jsonify(range_decoder)
else:
return jsonify('Incorrect Type')
# ========= REGISTRATION =========
app.register_blueprint(hiddenServices, url_prefix=baseUrl)

View file

@ -1,476 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>AIL-Framework</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
<style>
.bar {
fill: steelblue;
}
.bar:hover{
fill: brown;
cursor: pointer;
}
.bar_stack:hover{
cursor: pointer;
}
div.tooltip {
position: absolute;
text-align: center;
padding: 2px;
font: 12px sans-serif;
background: #ebf4fb;
border: 2px solid #b7ddf2;
border-radius: 8px;
pointer-events: none;
color: #000000;
}
</style>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="row">
<div class="col-12 col-xl-6">
<div class="table-responsive mt-1 table-hover table-borderless table-striped">
<table class="table">
<thead class="thead-dark">
<tr>
<th>Domain</th>
<th>First Seen</th>
<th>Last Check</th>
<th>Status</th>
</tr>
</thead>
<tbody id="tbody_last_crawled">
{% for metadata_onion in last_onions %}
<tr>
<td><a target="_blank" href="{{ url_for('hiddenServices.onion_domain') }}?onion_domain={{ metadata_onion['domain'] }}">{{ metadata_onion['domain'] }}</a></td>
<td>{{'{}/{}/{}'.format(metadata_onion['first_seen'][0:4], metadata_onion['first_seen'][4:6], metadata_onion['first_seen'][6:8])}}</td>
<td>{{'{}/{}/{}'.format(metadata_onion['last_check'][0:4], metadata_onion['last_check'][4:6], metadata_onion['last_check'][6:8])}}</td>
<td><div style="color:{{metadata_onion['status_color']}}; display:inline-block">
<i class="fas {{metadata_onion['status_icon']}} "></i>
{{metadata_onion['status_text']}}
</div>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<a href="{{ url_for('hiddenServices.blacklisted_onion') }}">
<button type="button" class="btn btn-outline-danger">Show Blacklisted Onion</button>
</a>
</div>
<div class="col-12 col-xl-6">
<div class="card text-white bg-dark mb-3 mt-1">
<div class="card-header">
<div class="row">
<div class="col-6">
<span class="badge badge-success">{{ statDomains['domains_up'] }}</span> UP
<span class="badge badge-danger ml-md-3">{{ statDomains['domains_down'] }}</span> DOWN
</div>
<div class="col-6">
<span class="badge badge-success">{{ statDomains['total'] }}</span> Crawled
<span class="badge badge-warning ml-md-3">{{ statDomains['domains_queue'] }}</span> Queue
</div>
</div>
</div>
<div class="card-body">
<h5 class="card-title">Select domains by date range :</h5>
<p class="card-text">Some quick example text to build on the card title and make up the bulk of the card's content.</p>
<form action="{{ url_for('hiddenServices.get_onions_by_daterange') }}" id="hash_selector_form" method='post'>
<div class="row">
<div class="col-6">
<div class="input-group" id="date-range-from">
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
<input class="form-control" id="date-range-from-input" placeholder="yyyy-mm-dd" value="{{ date_from }}" name="date_from">
</div>
<div class="input-group" id="date-range-to">
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
<input class="form-control" id="date-range-to-input" placeholder="yyyy-mm-dd" value="{{ date_to }}" name="date_to">
</div>
</div>
<div class="col-6">
<div class="custom-control custom-switch">
<input class="custom-control-input" type="checkbox" name="domains_up" value="True" id="domains_up_id" checked>
<label class="custom-control-label" for="domains_up_id">
<span class="badge badge-success"><i class="fas fa-check-circle"></i> Domains UP </span>
</label>
</div>
<div class="custom-control custom-switch">
<input class="custom-control-input" type="checkbox" name="domains_down" value="True" id="domains_down_id">
<label class="custom-control-label" for="domains_down_id">
<span class="badge badge-danger"><i class="fas fa-times-circle"></i> Domains DOWN</span>
</label>
</div>
<div class="custom-control custom-switch mt-2">
<input class="custom-control-input" type="checkbox" name="domains_tags" value="True" id="domains_tags_id">
<label class="custom-control-label" for="domains_tags_id">
<span class="badge badge-dark"><i class="fas fa-tags"></i> Domains Tags</span>
</label>
</div>
</div>
</div>
<button class="btn btn-primary">
<i class="fas fa-eye"></i> Show Onions
</button>
<form>
</div>
</div>
<div id="barchart_type">
</div>
<div class="card mt-1 mb-1">
<div class="card-header text-white bg-dark">
Crawlers Status
</div>
<div class="card-body px-0 py-0 ">
<table class="table">
<tbody id="tbody_crawler_info">
{% for crawler in crawler_metadata %}
<tr>
<td>
<i class="fas fa-{%if crawler['status']%}check{%else%}times{%endif%}-circle" style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};"></i> {{crawler['crawler_info']}}
</td>
<td>
{{crawler['crawling_domain']}}
</td>
<td style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};">
{{crawler['status_info']}}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</body>
<script>
var chart = {};
$(document).ready(function(){
$("#page-Crawler").addClass("active");
$("#nav_onion_crawler").addClass("active");
$('#date-range-from').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
$('#date-range-to').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
chart.stackBarChart =barchart_type_stack("{{ url_for('hiddenServices.automatic_onion_crawler_json') }}", 'id');
chart.onResize();
$(window).on("resize", function() {
chart.onResize();
});
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
</script>
<script>/*
function refresh_list_crawled(){
$.getJSON("{{ url_for('hiddenServices.last_crawled_domains_with_stats_json') }}",
function(data) {
var tableRef = document.getElementById('tbody_last_crawled');
$("#tbody_last_crawled").empty()
for (var i = 0; i < data.last_onions.length; i++) {
var data_domain = data.last_onions[i]
var newRow = tableRef.insertRow(tableRef.rows.length);
var newCell = newRow.insertCell(0);
newCell.innerHTML = "<td><a target=\"_blank\" href=\"{{ url_for('hiddenServices.onion_domain') }}?onion_domain="+data_domain['domain']+"\">"+data_domain['domain']+"</a></td>";
newCell = newRow.insertCell(1);
newCell.innerHTML = "<td>"+data_domain['first_seen'].substr(0, 4)+"/"+data_domain['first_seen'].substr(4, 2)+"/"+data_domain['first_seen'].substr(6, 2)+"</td>"
newCell = newRow.insertCell(2);
newCell.innerHTML = "<td>"+data_domain['last_check'].substr(0, 4)+"/"+data_domain['last_check'].substr(4, 2)+"/"+data_domain['last_check'].substr(6, 2)+"</td>"
newCell = newRow.insertCell(3);
newCell.innerHTML = "<td><div style=\"color:"+data_domain['status_color']+"; display:inline-block\"><i class=\"fa "+data_domain['status_icon']+" fa-2x\"></i>"+data_domain['status_text']+"</div></td>"
}
var statDomains = data.statDomains
document.getElementById('text_domain_up').innerHTML = statDomains['domains_up']
document.getElementById('text_domain_down').innerHTML = statDomains['domains_down']
document.getElementById('text_domain_queue').innerHTML = statDomains['domains_queue']
document.getElementById('text_total_domains').innerHTML = statDomains['total']
if(data.crawler_metadata.length!=0){
$("#tbody_crawler_info").empty();
var tableRef = document.getElementById('tbody_crawler_info');
for (var i = 0; i < data.crawler_metadata.length; i++) {
var crawler = data.crawler_metadata[i];
var newRow = tableRef.insertRow(tableRef.rows.length);
var text_color;
var icon;
if(crawler['status']){
text_color = 'Green';
icon = 'check';
} else {
text_color = 'Red';
icon = 'times';
}
var newCell = newRow.insertCell(0);
newCell.innerHTML = "<td><i class=\"fa fa-"+icon+"-circle\" style=\"color:"+text_color+";\"></i>"+crawler['crawler_info']+"</td>";
newCell = newRow.insertCell(1);
newCell.innerHTML = "<td><a target=\"_blank\" href=\"{{ url_for('hiddenServices.onion_domain') }}?onion_domain="+crawler['crawling_domain']+"\">"+crawler['crawling_domain']+"</a></td>";
newCell = newRow.insertCell(2);
newCell.innerHTML = "<td><div style=\"color:"+text_color+";\">"+crawler['status_info']+"</div></td>";
$("#panel_crawler").show();
}
} else {
$("#panel_crawler").hide();
}
}
);
if (to_refresh) {
setTimeout("refresh_list_crawled()", 10000);
}
}*/
</script>
<script>
var margin = {top: 20, right: 90, bottom: 55, left: 0},
width = parseInt(d3.select('#barchart_type').style('width'), 10);
width = 1000 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var x = d3.scaleBand().rangeRound([0, width]).padding(0.1);
var y = d3.scaleLinear().rangeRound([height, 0]);
var xAxis = d3.axisBottom(x);
var yAxis = d3.axisLeft(y);
var color = d3.scaleOrdinal(d3.schemeSet3);
var svg = d3.select("#barchart_type").append("svg")
.attr("id", "thesvg")
.attr("viewBox", "0 0 "+width+" 500")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
function barchart_type_stack(url, id) {
d3.json(url)
.then(function(data){
var labelVar = 'date'; //A
var varNames = d3.keys(data[0])
.filter(function (key) { return key !== labelVar;}); //B
data.forEach(function (d) { //D
var y0 = 0;
d.mapping = varNames.map(function (name) {
return {
name: name,
label: d[labelVar],
y0: y0,
y1: y0 += +d[name]
};
});
d.total = d.mapping[d.mapping.length - 1].y1;
});
x.domain(data.map(function (d) { return (d.date); })); //E
y.domain([0, d3.max(data, function (d) { return d.total; })]);
svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis)
.selectAll("text")
.attr("class", "bar")
.on("click", function (d) { window.location.href = "#" })
.attr("transform", "rotate(-18)" )
//.attr("transform", "rotate(-40)" )
.style("text-anchor", "end");
svg.append("g")
.attr("class", "y axis")
.call(yAxis)
.append("text")
.attr("transform", "rotate(-90)")
.attr("y", 6)
.attr("dy", ".71em")
.style("text-anchor", "end");
var selection = svg.selectAll(".series")
.data(data)
.enter().append("g")
.attr("class", "series")
.attr("transform", function (d) { return "translate(" + x((d.date)) + ",0)"; });
selection.selectAll("rect")
.data(function (d) { return d.mapping; })
.enter().append("rect")
.attr("class", "bar_stack")
.attr("width", x.bandwidth())
.attr("y", function (d) { return y(d.y1); })
.attr("height", function (d) { return y(d.y0) - y(d.y1); })
.style("fill", function (d) { return color(d.name); })
.style("stroke", "grey")
.on("mouseover", function (d) { showPopover.call(this, d); })
.on("mouseout", function (d) { removePopovers(); })
.on("click", function(d){ window.location.href = "#" });
data.forEach(function(d) {
if(d.total != 0){
svg.append("text")
.attr("class", "bar")
.attr("dy", "-.35em")
.attr('x', x(d.date) + x.bandwidth()/2)
.attr('y', y(d.total))
.on("click", function () {window.location.href = "#" })
.style("text-anchor", "middle")
.text(d.total);
}
});
drawLegend(varNames);
});
}
function drawLegend (varNames) {
var legend = svg.selectAll(".legend")
.data(varNames.slice().reverse())
.enter().append("g")
.attr("class", "legend")
.attr("transform", function (d, i) { return "translate(0," + i * 20 + ")"; });
legend.append("rect")
.attr("x", 943)
.attr("width", 10)
.attr("height", 10)
.style("fill", color)
.style("stroke", "grey");
legend.append("text")
.attr("class", "svgText")
.attr("x", 941)
.attr("y", 6)
.attr("dy", ".35em")
.style("text-anchor", "end")
.text(function (d) { return d; });
}
function removePopovers () {
$('.popover').each(function() {
$(this).remove();
});
}
function showPopover (d) {
$(this).popover({
title: d.name,
placement: 'top',
container: 'body',
trigger: 'manual',
html : true,
content: function() {
return d.label +
"<br/>num: " + d3.format(",")(d.value ? d.value: d.y1 - d.y0); }
});
$(this).popover('show')
}
chart.onResize = function () {
var aspect = width / height, chart = $("#thesvg");
var targetWidth = chart.parent().width();
chart.attr("width", targetWidth);
chart.attr("height", targetWidth / 2);
}
window.chart = chart;
</script>

View file

@ -4,7 +4,6 @@
'''
Flask functions and routes for the trending modules page
'''
import redis
import json
import os
import datetime
@ -14,11 +13,12 @@ from flask import Flask, render_template, jsonify, request, Blueprint
from Role_Manager import login_admin, login_analyst
from flask_login import login_required
import Paste
from whoosh import index
from whoosh.fields import Schema, TEXT, ID
from whoosh.qparser import QueryParser
from lib.objects.Items import Item
import time
# ============ VARIABLES ============
@ -27,7 +27,6 @@ import Flask_config
app = Flask_config.app
config_loader = Flask_config.config_loader
baseUrl = Flask_config.baseUrl
r_serv_metadata = Flask_config.r_serv_metadata
max_preview_char = Flask_config.max_preview_char
max_preview_modal = Flask_config.max_preview_modal
bootstrap_label = Flask_config.bootstrap_label
@ -128,15 +127,14 @@ def search():
for x in results:
r.append(x.items()[0][1].replace(PASTES_FOLDER, '', 1))
path = x.items()[0][1].replace(PASTES_FOLDER, '', 1)
paste = Paste.Paste(path)
content = paste.get_p_content()
item = Item(path)
content = item.get_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
c.append(content[0:content_range])
curr_date = str(paste._get_p_date())
curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:]
curr_date = item.get_date(separator=True)
paste_date.append(curr_date)
paste_size.append(paste._get_p_size())
p_tags = r_serv_metadata.smembers('tag:'+path)
paste_size.append(item.get_size())
p_tags = item.get_tags()
l_tags = []
for tag in p_tags:
complete_tag = tag
@ -205,15 +203,14 @@ def get_more_search_result():
path = x.items()[0][1]
path = path.replace(PASTES_FOLDER, '', 1)
path_array.append(path)
paste = Paste.Paste(path)
content = paste.get_p_content()
item = Item(path)
content = item.get_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
preview_array.append(content[0:content_range])
curr_date = str(paste._get_p_date())
curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:]
curr_date = item.get_date(separator=True)
date_array.append(curr_date)
size_array.append(paste._get_p_size())
p_tags = r_serv_metadata.smembers('tag:'+path)
size_array.append(item.get_size())
p_tags = item.get_tags()
l_tags = []
for tag in p_tags:
complete_tag = tag

View file

@ -4,7 +4,6 @@
'''
Flask functions and routes for the trending modules page
'''
import redis
import json
import os
import sys
@ -15,17 +14,14 @@ from Role_Manager import login_admin, login_analyst, login_read_only, no_cache
from flask_login import login_required
import difflib
import ssdeep
import Paste
import requests
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Tag
import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import Domain
# ============ VARIABLES ============
import Flask_config
@ -52,214 +48,11 @@ showsavedpastes = Blueprint('showsavedpastes', __name__, template_folder='templa
def get_item_screenshot_path(item):
screenshot = r_serv_metadata.hget('paste_metadata:{}'.format(item), 'screenshot')
if screenshot:
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8], screenshot[8:10], screenshot[10:12], screenshot[12:])
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8], screenshot[8:10], screenshot[10:12], screenshot[12:])
else:
screenshot = ''
return screenshot
def showpaste(content_range, requested_path):
if PASTES_FOLDER not in requested_path:
# remove full path
requested_path_full = os.path.join(requested_path, PASTES_FOLDER)
else:
requested_path_full = requested_path
requested_path = requested_path.replace(PASTES_FOLDER, '', 1)
# escape directory transversal
if os.path.commonprefix((requested_path_full,PASTES_FOLDER)) != PASTES_FOLDER:
return 'path transversal detected'
vt_enabled = Flask_config.vt_enabled
try:
paste = Paste.Paste(requested_path)
except FileNotFoundError:
abort(404)
p_date = str(paste._get_p_date())
p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4]
p_source = paste.p_source
p_encoding = paste._get_p_encoding()
p_language = 'None'
p_size = paste.p_size
p_mime = paste.p_mime
p_lineinfo = paste.get_lines_info()
p_content = paste.get_p_content()
p_duplicate_str_full_list = paste._get_p_duplicate()
p_duplicate_full_list = []
p_duplicate_list = []
p_simil_list = []
p_date_list = []
p_hashtype_list = []
for dup_list in p_duplicate_str_full_list:
dup_list = dup_list[1:-1].replace('\'', '').replace(' ', '').split(',')
if dup_list[0] == "tlsh":
dup_list[2] = 100 - int(dup_list[2])
else:
dup_list[2] = int(dup_list[2])
p_duplicate_full_list.append(dup_list)
#p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True)
# Combine multiple duplicate paste name and format for display
new_dup_list = []
dup_list_removed = []
for dup_list_index in range(0, len(p_duplicate_full_list)):
if dup_list_index in dup_list_removed:
continue
indices = [i for i, x in enumerate(p_duplicate_full_list) if x[1] == p_duplicate_full_list[dup_list_index][1]]
hash_types = []
comp_vals = []
for i in indices:
hash_types.append(p_duplicate_full_list[i][0])
comp_vals.append(p_duplicate_full_list[i][2])
dup_list_removed.append(i)
#hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types)
#comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals)
if len(p_duplicate_full_list[dup_list_index]) > 3:
try:
date_paste = str(int(p_duplicate_full_list[dup_list_index][3]))
date_paste = date_paste[0:4]+"-"+date_paste[4:6]+"-"+date_paste[6:8]
except ValueError:
date_paste = str(p_duplicate_full_list[dup_list_index][3])
else:
date_paste = "No date available"
new_dup_list.append([hash_types, p_duplicate_full_list[dup_list_index][1], comp_vals, date_paste])
# Create the list to pass to the webpage
for dup_list in new_dup_list:
hash_type, path, simil_percent, date_paste = dup_list
p_duplicate_list.append(path)
p_simil_list.append(simil_percent)
p_hashtype_list.append(hash_type)
p_date_list.append(date_paste)
if content_range != 0:
p_content = p_content[0:content_range]
#active taxonomies
active_taxonomies = r_serv_tags.smembers('active_taxonomies')
l_tags = r_serv_metadata.smembers('tag:'+requested_path)
tags_safe = Tag.is_tags_safe(l_tags)
#active galaxies
active_galaxies = r_serv_tags.smembers('active_galaxies')
list_tags = []
for tag in l_tags:
if(tag[9:28] == 'automatic-detection'):
automatic = True
else:
automatic = False
if r_serv_statistics.sismember('tp:'+tag, requested_path):
tag_status_tp = True
else:
tag_status_tp = False
if r_serv_statistics.sismember('fp:'+tag, requested_path):
tag_status_fp = True
else:
tag_status_fp = False
list_tags.append( (tag, automatic, tag_status_tp, tag_status_fp) )
l_64 = []
# load hash files
if r_serv_metadata.scard('hash_paste:'+requested_path) > 0:
set_b64 = r_serv_metadata.smembers('hash_paste:'+requested_path)
for hash in set_b64:
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
# item list not updated
if nb_in_file is None:
l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1)
for paste_name in l_pastes:
# dynamic update
if PASTES_FOLDER in paste_name:
score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste_name)
r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste_name)
paste_name = paste_name.replace(PASTES_FOLDER, '', 1)
r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste_name)
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
nb_in_file = int(nb_in_file)
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
file_type = estimated_type.split('/')[0]
# set file icon
if file_type == 'application':
file_icon = 'fa-file-o '
elif file_type == 'audio':
file_icon = 'fa-file-video-o '
elif file_type == 'image':
file_icon = 'fa-file-image-o'
elif file_type == 'text':
file_icon = 'fa-file-text-o'
else:
file_icon = 'fa-file'
saved_path = r_serv_metadata.hget('metadata_hash:'+hash, 'saved_path')
if r_serv_metadata.hexists('metadata_hash:'+hash, 'vt_link'):
b64_vt = True
b64_vt_link = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_link')
b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report')
else:
b64_vt = False
b64_vt_link = ''
b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report')
# hash never refreshed
if b64_vt_report is None:
b64_vt_report = ''
l_64.append( (file_icon, estimated_type, hash, saved_path, nb_in_file, b64_vt, b64_vt_link, b64_vt_report) )
crawler_metadata = {}
if 'infoleak:submission="crawler"' in l_tags:
crawler_metadata['get_metadata'] = True
crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain')
crawler_metadata['domain'] = crawler_metadata['domain'].rsplit(':', 1)[0]
if tags_safe:
tags_safe = Tag.is_tags_safe(Domain.get_domain_tags(crawler_metadata['domain']))
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father')
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link')
crawler_metadata['screenshot'] = get_item_screenshot_path(requested_path)
else:
crawler_metadata['get_metadata'] = False
item_parent = Item.get_item_parent(requested_path)
if Flask_config.pymisp is False:
misp = False
else:
misp = True
if Flask_config.HiveApi is False:
hive = False
else:
hive = True
misp_event = r_serv_metadata.get('misp_events:' + requested_path)
if misp_event is None:
misp_eventid = False
misp_url = ''
else:
misp_eventid = True
misp_url = misp_event_url + misp_event
hive_case = r_serv_metadata.get('hive_cases:' + requested_path)
if hive_case is None:
hive_caseid = False
hive_url = ''
else:
hive_caseid = True
hive_url = hive_case_url.replace('id_here', hive_case)
return render_template("show_saved_paste.html", date=p_date, bootstrap_label=bootstrap_label, active_taxonomies=active_taxonomies, active_galaxies=active_galaxies, list_tags=list_tags, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list, date_list=p_date_list,
crawler_metadata=crawler_metadata, tags_safe=tags_safe, item_parent=item_parent,
l_64=l_64, vt_enabled=vt_enabled, misp=misp, hive=hive, misp_eventid=misp_eventid, misp_url=misp_url, hive_caseid=hive_caseid, hive_url=hive_url)
def get_item_basic_info(item):
item_basic_info = {}
item_basic_info['date'] = str(item.get_p_date())
@ -286,7 +79,7 @@ def show_item_min(requested_path , content_range=0):
else:
relative_path = requested_path.replace(PASTES_FOLDER, '', 1)
# remove old full path
#requested_path = requested_path.replace(PASTES_FOLDER, '')
# requested_path = requested_path.replace(PASTES_FOLDER, '')
# escape directory transversal
if os.path.commonprefix((os.path.realpath(requested_path),PASTES_FOLDER)) != PASTES_FOLDER:
return 'path transversal detected'
@ -370,7 +163,7 @@ def show_item_min(requested_path , content_range=0):
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+relative_path, 'father')
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+relative_path,'real_link')
crawler_metadata['screenshot'] = get_item_screenshot_path(relative_path)
#crawler_metadata['har_file'] = Item.get_item_har(relative_path)
# crawler_metadata['har_file'] = Item.get_item_har(relative_path)
else:
crawler_metadata['get_metadata'] = False
@ -462,13 +255,6 @@ def showDiff():
def screenshot(filename):
return send_from_directory(SCREENSHOT_FOLDER, filename+'.png', as_attachment=True)
# @showsavedpastes.route('/har/paste/<path:filename>')
# @login_required
# @login_read_only
# def har(filename):
# har_file = Item.get_item_har(filename)
# return jsonify(har_file)
@showsavedpastes.route('/send_file_to_vt/', methods=['POST'])
@login_required
@login_analyst

View file

@ -15,14 +15,11 @@ from flask import Flask, render_template, jsonify, request, Blueprint, url_for,
from Role_Manager import login_admin, login_analyst, login_user_no_api, login_read_only
from flask_login import login_required, current_user
import re
from pprint import pprint
import Levenshtein
# ---------------------------------------------------------------
import Paste
from lib.objects.Items import Item
import Term
# ============ VARIABLES ============
@ -262,21 +259,21 @@ def credentials_tracker():
@login_required
@login_user_no_api
def credentials_management_query_paste():
cred = request.args.get('cred')
cred = request.args.get('cred')
allPath = request.json['allPath']
paste_info = []
for pathNum in allPath:
path = r_serv_cred.hget(REDIS_KEY_ALL_PATH_SET_REV, pathNum)
paste = Paste.Paste(path)
p_date = str(paste._get_p_date())
p_date = p_date[0:4]+'/'+p_date[4:6]+'/'+p_date[6:8]
p_source = paste.p_source
p_encoding = paste._get_p_encoding()
p_size = paste.p_size
p_mime = paste.p_mime
p_lineinfo = paste.get_lines_info()
p_content = paste.get_p_content()
item = Item(path)
p_date = item.get_date(separator=True)
p_source = item.get_source()
p_content = item.get_content()
p_encoding = item.get_mimetype()
p_size = item.get_size()
p_mime = p_encoding
lineinfo = item.get_meta_lines(content=p_content)
p_lineinfo = lineinfo['nb'], lineinfo['max_length']
if p_content != 0:
p_content = p_content[0:400]
paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content})

View file

@ -51,7 +51,7 @@
<th>Total sent</th>
<th>Balance</th>
<th>Inputs address seen in AIL</th>
<th>Ouputs address seen in AIL</th>
<th>Outputs address seen in AIL</th>
</tr>
</thead>
<tbody>
@ -62,12 +62,12 @@
<td>{{ dict_object["metadata_card"]["related_btc"]["final_balance"] }}</td>
<td>
{% for btc_addr in dict_object["metadata_card"]["related_btc"]["btc_in"] %}
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?object_type=cryptocurrency&type_id=bitcoin&correlation_id={{ btc_addr }}&correlation_objects=paste">{{ btc_addr }}</a>
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type=cryptocurrency&subtype=bitcoin&id={{ btc_addr }}">{{ btc_addr }}</a>
{% endfor %}
</td>
<td>
{% for btc_addr in dict_object["metadata_card"]["related_btc"]["btc_out"] %}
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?object_type=cryptocurrency&type_id=bitcoin&correlation_id={{ btc_addr }}&correlation_objects=paste">{{ btc_addr }}</a>
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type=cryptocurrency&subtype=bitcoin&id={{ btc_addr }}">{{ btc_addr }}</a>
{% endfor %}
</td>
</tr>
@ -75,7 +75,7 @@
</table>
</div>
{% else %}
<a class="btn btn-secondary" href="{{ url_for('correlation.show_correlation') }}?object_type=cryptocurrency&type_id=bitcoin&correlation_id={{ dict_object['correlation_id'] }}&expand_card=True&correlation_objects=paste">Expand Bitcoin address</a>
<a class="btn btn-secondary" href="{{ url_for('correlation.show_correlation') }}?type=cryptocurrency&subtype=bitcoin&id={{ dict_object['correlation_id'] }}&related_btc=True">Expand Bitcoin address</a>
{% endif %}
{% endif %}

View file

@ -0,0 +1,172 @@
<link href="{{ url_for('static', filename='css/tags.css') }}" rel="stylesheet" type="text/css" />
<script src="{{ url_for('static', filename='js/tags.js') }}"></script>
{#{% with modal_add_tags=dict_object['metadata_card']['add_tags_modal']%}#}
{# {% include 'modals/add_tags.html' %}#}
{#{% endwith %}#}
{% include 'modals/edit_tag.html' %}
<div class="card my-3">
<div class="card-header" style="background-color:#d9edf7;font-size: 15px">
<h4 class="text-secondary">{{ dict_object["correlation_id"] }} :</h4>
<ul class="list-group mb-2">
<li class="list-group-item py-0">
<div class="row">
<div class="col-md-10">
<table class="table">
<thead>
<tr>
<th>Object type</th>
<th>First seen</th>
<th>Last seen</th>
<th>Nb seen</th>
</tr>
</thead>
<tbody>
<tr>
<td>{{ dict_object["object_type"] }}</td>
<td>
<svg height="26" width="26">
<g class="nodes">
<circle cx="13" cy="13" r="13" fill="orange"></circle>
<text x="13" y="13" text-anchor="middle" dominant-baseline="central" class="graph_node_icon {{ dict_object["metadata_card"]["icon"]["icon_class"] }}" font-size="16px">{{ dict_object["metadata_card"]["icon"]["icon_text"] }}</text>
</g>
</svg>
</td>
<td>{{ dict_object["metadata"]['first_seen'] }}</td>
<td>{{ dict_object["metadata"]['last_seen'] }}</td>
<td>{{ dict_object["metadata"]['nb_seen'] }}</td>
</tr>
</tbody>
</table>
</div>
<div class="col-md-1">
<div id="sparkline"></div>
</div>
</div>
</li>
{# <li class="list-group-item py-0">#}
{# <br>#}
{# <div class="mb-3">#}
{# Tags:#}
{# {% for tag in dict_object["metadata"]['tags'] %}#}
{# <button class="btn btn-{{ bootstrap_label[loop.index0 % 5] }}" data-toggle="modal" data-target="#edit_tags_modal"#}
{# data-tagid="{{ tag }}" data-objtype="decoded" data-objid="{{ dict_object["correlation_id"] }}">#}
{# {{ tag }}#}
{# </button>#}
{# {% endfor %}#}
{# <button type="button" class="btn btn-light" data-toggle="modal" data-target="#add_tags_modal">#}
{# <i class="far fa-plus-square"></i>#}
{# </button>#}
{# </div>#}
{# </li>#}
</ul>
{% with obj_type='decoded', obj_id=dict_object['correlation_id'], obj_subtype='' %}
{% include 'modals/investigations_register_obj.html' %}
{% endwith %}
<button type="button" class="btn btn-primary" data-toggle="modal" data-target="#investigations_register_obj_modal">
<i class="fas fa-microscope"></i> Investigations
</button>
</div>
</div>
<script src="{{ url_for('static', filename='js/d3/sparklines.js')}}"></script>
<script>
sparkline("sparkline", {{ dict_object["metadata_card"]["sparkline"] }}, {});
</script>
<script>
function create_line_chart(id, url){
var width = 900;
var height = Math.round(width / 4);
var margin = {top: 20, right: 55, bottom: 50, left: 40};
var x = d3.scaleTime().range([0, width]);
var y = d3.scaleLinear().rangeRound([height, 0]);
var xAxis = d3.axisBottom(x);
var yAxis = d3.axisLeft(y);
var parseTime = d3.timeParse("%Y-%m-%d");
var line = d3.line()
.x(function(d) {
return x(d.date);
}).y(function(d) {
return y(d.value);
});
var svg_line = d3.select('#'+id).append('svg')
.attr("id", "graph_div")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append('g')
.attr('transform', "translate("+ margin.left +","+ margin.top +")");
var div = d3.select('body').append('div')
.attr('class', 'tooltip')
.style('opacity', 0);
//add div tooltip
d3.json(url)
.then(function(data){
data.forEach(function(d) {
d.date_label = d.date;
d.date = parseTime(d.date);
d.value = +d.value;
});
// fit the data
x.domain(d3.extent(data, function(d) { return d.date; }));
//x.domain(data.map(function (d) { return d.date; })); //E
y.domain([0, d3.max(data, function(d){ return d.value ; })]);
//line
svg_line.append("path")
.data([data])
.attr("class", "line_graph")
.attr("d", line);
// add X axis
svg_line.append("g")
.attr("transform", "translate(0," + height + ")")
.call(d3.axisBottom(x))
.selectAll("text")
.style("text-anchor", "end")
.attr("transform", "rotate(-45)" );
// Add the Y Axis
svg_line.append("g")
.call(d3.axisLeft(y));
//add a dot circle
svg_line.selectAll('dot')
.data(data).enter()
.append('circle')
.attr('r', 2)
.attr('cx', function(d) { return x(d.date); })
.attr('cy', function(d) { return y(d.value); })
.on('mouseover', function(d) {
div.transition().style('opacity', .9);
div.html('' + d.date_label+ '<br/>' + d.value).style('left', (d3.event.pageX) + 'px')
.style("left", (d3.event.pageX) + "px")
.style("top", (d3.event.pageY - 28) + "px");
})
.on('mouseout', function(d)
{
div.transition().style('opacity', 0);
});
});
}
</script>

View file

@ -39,7 +39,7 @@
</div>
<div class="mb-2 float-right">
<a href="{{ url_for('objects_item.showItem')}}?id={{ dict_object["correlation_id"] }}" target="_blank" style="font-size: 15px">
<button class="btn btn-info"><i class="fas fa-search"></i> Show Paste
<button class="btn btn-info"><i class="fas fa-search"></i> Show Item
</button>
</a>
</div>

View file

@ -99,12 +99,14 @@
{% include 'correlation/metadata_card_username.html' %}
{% elif dict_object["object_type"] == "decoded" %}
{% include 'correlation/metadata_card_decoded.html' %}
{% elif dict_object["object_type"] == "cve" %}
{% include 'correlation/metadata_card_cve.html' %}
{% elif dict_object["object_type"] == "domain" %}
{% include 'correlation/metadata_card_domain.html' %}
{% elif dict_object["object_type"] == "screenshot" %}
{% include 'correlation/metadata_card_screenshot.html' %}
{% elif dict_object["object_type"] == "paste" %}
{% include 'correlation/metadata_card_paste.html' %}
{% elif dict_object["object_type"] == "item" %}
{% include 'correlation/metadata_card_item.html' %}
{% endif %}
<div class="row">
@ -146,82 +148,87 @@
<div class="card-body text-center px-0 py-0">
<ul class="list-group">
<li class="list-group-item list-group-item-info">Select Correlation</i></li>
<li class="list-group-item text-left">
<li class="list-group-item list-group-item-info">Select Correlation</li>
<form action="{{ url_for('correlation.show_correlation') }}" method="post">
<li class="list-group-item text-left">
<form action="{{ url_for('correlation.show_correlation') }}" method="post">
<input type="hidden" id="object_type" name="object_type" value="{{ dict_object["object_type"] }}">
<input type="hidden" id="type_id" name="type_id" value="{{ dict_object["metadata"]["type_id"] }}">
<input type="hidden" id="correlation_id" name="correlation_id" value="{{ dict_object["correlation_id"] }}">
<input type="hidden" id="obj_type" name="obj_type" value="{{ dict_object["object_type"] }}">
<input type="hidden" id="subtype" name="subtype" value="{{ dict_object["metadata"]["type_id"] }}">
<input type="hidden" id="obj_id" name="obj_id" value="{{ dict_object["correlation_id"] }}">
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="CryptocurrencyCheck" name="CryptocurrencyCheck" {%if "cryptocurrency" in dict_object["correlation_names"]%}checked{%endif%}>
<label class="form-check-label" for="CryptocurrencyCheck">Cryptocurrency</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="DecodedCheck" name="DecodedCheck" {%if "decoded" in dict_object["correlation_names"]%}checked{%endif%}>
<label class="form-check-label" for="DecodedCheck">Decoded</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="ScreenshotCheck" name="ScreenshotCheck" {%if "screenshot" in dict_object["correlation_names"]%}checked{%endif%}>
<label class="form-check-label" for="ScreenshotCheck">Screenshot</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="PgpCheck" name="PgpCheck" {%if "pgp" in dict_object["correlation_names"]%}checked{%endif%}>
<label class="form-check-label" for="PgpCheck">PGP</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="UsernameCheck" name="UsernameCheck" {%if "username" in dict_object["correlation_names"]%}checked{%endif%}>
<label class="form-check-label" for="UsernameCheck">Username</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="DomainCheck" name="DomainCheck" {%if "domain" in dict_object["correlation_objects"]%}checked{%endif%}>
<label class="form-check-label" for="DomainCheck">Domain</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="PasteCheck" name="PasteCheck" {%if "item" in dict_object["correlation_objects"]%}checked{%endif%}>
<label class="form-check-label" for="PasteCheck">Item</label>
</div>
</li>
<li class="list-group-item text-left">
<div class="d-flex mt-1">
Union&nbsp;&nbsp;
<div class="custom-control custom-switch">
<input class="custom-control-input" type="checkbox" name="mode" value="True" id="mode" {%if dict_object["mode"]=="inter"%}checked{%endif%}>
<label class="custom-control-label" for="mode">Intersection</label>
</div>
</div>
</li>
<li class="list-group-item text-left">
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="CveCheck" name="CveCheck" {%if "cve" in dict_object["filter"]%}checked{%endif%}>
<label class="form-check-label" for="CveCheck">Cve</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="CryptocurrencyCheck" name="CryptocurrencyCheck" {%if "cryptocurrency" in dict_object["filter"]%}checked{%endif%}>
<label class="form-check-label" for="CryptocurrencyCheck">Cryptocurrency</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="DecodedCheck" name="DecodedCheck" {%if "decoded" in dict_object["filter"]%}checked{%endif%}>
<label class="form-check-label" for="DecodedCheck">Decoded</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="ScreenshotCheck" name="ScreenshotCheck" {%if "screenshot" in dict_object["filter"]%}checked{%endif%}>
<label class="form-check-label" for="ScreenshotCheck">Screenshot</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="PgpCheck" name="PgpCheck" {%if "pgp" in dict_object["filter"]%}checked{%endif%}>
<label class="form-check-label" for="PgpCheck">PGP</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="UsernameCheck" name="UsernameCheck" {%if "username" in dict_object["filter"]%}checked{%endif%}>
<label class="form-check-label" for="UsernameCheck">Username</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="DomainCheck" name="DomainCheck" {%if "domain" in dict_object["filter"]%}checked{%endif%}>
<label class="form-check-label" for="DomainCheck">Domain</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="ItemCheck" name="ItemCheck" {%if "item" in dict_object["filter"]%}checked{%endif%}>
<label class="form-check-label" for="ItemCheck">Item</label>
</div>
<div class="form-group">
<label for="max_nb_nodes_in">Max number of nodes:</label>
<input class="form-control" type="number" value="{{dict_object["max_nodes"]}}" min="2" id="max_nb_nodes_in" name="max_nb_nodes_in">
</div>
</li>
<li class="list-group-item text-left">
<div class="d-flex mt-1">
Union&nbsp;&nbsp;
<div class="custom-control custom-switch">
<input class="custom-control-input" type="checkbox" name="mode" value="True" id="mode" {%if dict_object["mode"]=="inter"%}checked{%endif%}>
<label class="custom-control-label" for="mode">Intersection</label>
</div>
</div>
</li>
<li class="list-group-item text-left">
<div class="text-center">
<input class="btn btn-primary" type="submit" value="Redraw Graph">
</div>
<div class="form-group">
<label for="max_nb_nodes_in">Max number of nodes:</label>
<input class="form-control" type="number" value="{{dict_object["max_nodes"]}}" min="2" id="max_nb_nodes_in" name="max_nb_nodes_in">
</div>
</form>
<div class="text-center">
<input class="btn btn-primary" type="submit" value="Redraw Graph">
</div>
</li>
</li>
</form>
</ul>
<ul class="list-group">
<li class="list-group-item list-group-item-info"><i class="fas fa-info-circle fa-2x"></i></li>
<li class="list-group-item text-left">
<p>Double click on a node to open this object<br><br>
<svg height="26" width="26">
<g class="nodes">
<circle cx="13" cy="13" r="13" fill="orange"></circle>
<li class="list-group-item list-group-item-info"><i class="fas fa-info-circle fa-2x"></i></li>
<li class="list-group-item text-left">
<p>Double click on a node to open this object<br><br>
<svg height="26" width="26">
<g class="nodes">
<circle cx="13" cy="13" r="13" fill="orange"></circle>
</g>
</svg>
Current Correlation<br>
</p>
</li>
</ul>
Current Correlation<br>
</p>
</li>
</ul>
</div>
</div>
@ -236,7 +243,7 @@
</div>
</div>
{% if dict_object["object_type"] in ["decoded", "pgp", "cryptocurrency"] %}
{% if dict_object["object_type"] in ["cve", "decoded", "pgp", "cryptocurrency"] %}
<div class="card">
<div class="card-header">
<i class="fas fa-chart-bar"></i> Graph
@ -257,14 +264,16 @@ var all_graph = {};
$(document).ready(function(){
$("#page-Decoded").addClass("active");
all_graph.node_graph = create_graph("{{ url_for('correlation.graph_node_json') }}?correlation_id={{ dict_object["correlation_id"] }}&object_type={{ dict_object["object_type"] }}&mode={{ dict_object["mode"] }}&correlation_names={{ dict_object["correlation_names_str"] }}&correlation_objects={{ dict_object["correlation_objects_str"] }}&max_nodes={{dict_object["max_nodes"]}}{% if 'type_id' in dict_object["metadata"] %}&type_id={{ dict_object["metadata"]["type_id"] }}{% endif %}");
all_graph.node_graph = create_graph("{{ url_for('correlation.graph_node_json') }}?id={{ dict_object["correlation_id"] }}&type={{ dict_object["object_type"] }}&mode={{ dict_object["mode"] }}&filter={{ dict_object["filter_str"] }}&max_nodes={{dict_object["max_nodes"]}}{% if 'type_id' in dict_object["metadata"] %}&subtype={{ dict_object["metadata"]["type_id"] }}{% endif %}");
{% if dict_object["object_type"] == "pgp" %}
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.pgpdump_graph_line_json') }}?type_id={{dict_object["metadata"]["type_id"]}}&key_id={{dict_object["correlation_id"]}}");
{% elif dict_object["object_type"] == "cryptocurrency" %}
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.cryptocurrency_graph_line_json') }}?type_id={{dict_object["metadata"]["type_id"]}}&key_id={{dict_object["correlation_id"]}}");
{% elif dict_object["object_type"] == "decoded" %}
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.hash_graph_line_json') }}?hash={{dict_object["correlation_id"]}}");
{% endif %}
{% elif dict_object["object_type"] == "cve" %}
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.hash_graph_line_json') }}?hash={{dict_object["correlation_id"]}}");
{% endif %}
all_graph.onResize();
});

View file

@ -37,7 +37,7 @@
<h5 class="card-title">Crawl a Domain</h5>
</div>
<div class="card-body">
<p class="card-text">Enter a domain and choose what kind of data you want.</p>
<p class="card-text">Enter an url or a domain and choose what kind of option you want.</p>
<form action="{{ url_for('crawler_splash.send_to_spider') }}" method='post'>
<div class="row">
<div class="col-12 col-lg-6">
@ -49,22 +49,22 @@
<div class="custom-control custom-switch">
<input class="custom-control-input" type="checkbox" name="queue_type_selector" value="True" id="queue_type_selector">
<label class="custom-control-label" for="queue_type_selector">
<i class="fas fa-splotch"></i> &nbsp;Splash Name
<i class="fas fa-splotch"></i> &nbsp;Proxy
</label>
</div>
</div>
<div id="div_crawler_queue_type">
<select class="custom-select form-control" name="crawler_queue_type" id="crawler_queue_type">
{%for crawler_type in all_crawlers_types%}
{%for crawler_type in crawlers_types%}
<option value="{{crawler_type}}" {%if crawler_type=='tor'%}selected{%endif%}>{{crawler_type}}</option>
{%endfor%}
</select>
</div>
<div id="div_splash_name">
<select class="custom-select form-control" name="splash_name" id="splash_name">
<option value="None" selected>Don't use a special splash crawler</option>
{%for splash_name in all_splash_name%}
<option value="{{splash_name}}">{{splash_name}}</option>
<div id="div_proxy_name">
<select class="custom-select form-control" name="proxy_name" id="proxy_name">
<option value="None" selected>Use a proxy</option>
{%for proxy in proxies%}
<option value="{{proxy}}">{{proxy}}</option>
{%endfor%}
</select>
</div>
@ -122,15 +122,16 @@
<span class="input-group-text">Depth Limit</span>
</div>
</div>
<div class="input-group mt-2">
<div class="input-group-prepend">
<span class="input-group-text bg-light"><i class="fas fa-copy"></i>&nbsp;</span>
</div>
<input class="form-control" type="number" id="max_pages" name="max_pages" min="1" value="1" required>
<div class="input-group-append">
<span class="input-group-text">Max Pages</span>
</div>
</div>
{# TEMPORARY DISABLED #}
{# <div class="input-group mt-2">#}
{# <div class="input-group-prepend">#}
{# <span class="input-group-text bg-light"><i class="fas fa-copy"></i>&nbsp;</span>#}
{# </div>#}
{# <input class="form-control" type="number" id="max_pages" name="max_pages" min="1" value="1" required>#}
{# <div class="input-group-append">#}
{# <span class="input-group-text">Max Pages</span>#}
{# </div>#}
{# </div>#}
</div>
</div>
@ -204,10 +205,10 @@ function manual_crawler_input_controler() {
function queue_type_selector_input_controler() {
if($('#queue_type_selector').is(':checked')){
$("#div_crawler_queue_type").hide();
$("#div_splash_name").show();
$("#div_proxy_name").show();
}else{
$("#div_crawler_queue_type").show();
$("#div_splash_name").hide();
$("#div_proxy_name").hide();
}
}

View file

@ -7,10 +7,13 @@
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
</head>
@ -33,15 +36,15 @@
<div class="card mt-1 mb-1">
<div class="card-header text-white bg-dark">
<h5><a class="text-info" href="{{ url_for('hiddenServices.Crawler_Splash_last_by_type')}}?type=onion"><i class="fas fa-user-secret"></i> Onions Crawlers</a></h5>
<h5><a class="text-info" href="{{ url_for('crawler_splash.crawlers_last_domains')}}?type=onion"><i class="fas fa-user-secret"></i> Onions Crawlers</a></h5>
<div class="row">
<div class="col-6">
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=onion&domains_up=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_onion_domain_up">{{ splash_crawlers_latest_stats['onion']['domains_up'] }}</a> UP
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=onion&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-danger ml-md-3" id="stat_onion_domain_down">{{ splash_crawlers_latest_stats['onion']['domains_down'] }}</a> DOWN
<a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=onion&domains_up=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_onion_domain_up">{{ crawlers_latest_stats['onion']['up'] }}</a> UP
<a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=onion&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-danger ml-md-3" id="stat_onion_domain_down">{{ crawlers_latest_stats['onion']['down'] }}</a> DOWN
</div>
<div class="col-6">
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=onion&domains_up=True&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_onion_total">{{ splash_crawlers_latest_stats['onion']['total'] }}</a> Crawled
<span class="badge badge-warning ml-md-3" id="stat_onion_queue">{{ splash_crawlers_latest_stats['onion']['domains_queue'] }}</span> Queue
<a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=onion&domains_up=True&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_onion_total">{{ crawlers_latest_stats['onion']['crawled'] }}</a> Crawled
<span class="badge badge-warning ml-md-3" id="stat_onion_queue">{{ crawlers_latest_stats['onion']['queue'] }}</span> Queue
</div>
</div>
</div>
@ -51,15 +54,15 @@
<div class="col-xl-6">
<div class="card mt-1 mb-1">
<div class="card-header text-white bg-dark">
<h5><a class="text-info" href="{{ url_for('hiddenServices.Crawler_Splash_last_by_type')}}?type=regular"><i class="fab fa-html5"></i> Regular Crawlers</a></h5>
<h5><a class="text-info" href="{{ url_for('crawler_splash.crawlers_last_domains')}}?type=web"><i class="fab fa-html5"></i> Web Crawlers</a></h5>
<div class="row">
<div class="col-6">
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=regular&domains_up=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_regular_domain_up">{{ splash_crawlers_latest_stats['regular']['domains_up'] }}</a> UP
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=regular&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-danger ml-md-3" id="stat_regular_domain_down">{{ splash_crawlers_latest_stats['regular']['domains_down'] }}</a> DOWN
<a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=web&domains_up=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_web_domain_up">{{ crawlers_latest_stats['web']['up'] }}</a> UP
<a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=web&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-danger ml-md-3" id="stat_web_domain_down">{{ crawlers_latest_stats['web']['down'] }}</a> DOWN
</div>
<div class="col-6">
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=regular&domains_up=True&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_regular_total">{{ splash_crawlers_latest_stats['regular']['total'] }}</a> Crawled
<span class="badge badge-warning ml-md-3" id="stat_regular_queue">{{ splash_crawlers_latest_stats['regular']['domains_queue'] }}</span> Queue
<a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=web&domains_up=True&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_web_total">{{ crawlers_latest_stats['web']['crawled'] }}</a> Crawled
<span class="badge badge-warning ml-md-3" id="stat_web_queue">{{ crawlers_latest_stats['web']['queue'] }}</span> Queue
</div>
</div>
</div>
@ -69,23 +72,23 @@
<table class="table">
<tbody id="tbody_crawler_onion_info">
{% for splash_crawler in all_splash_crawler_status %}
{% for crawler in crawlers_status %}
<tr>
<td>
<i class="fas fa-{%if splash_crawler['status']%}check{%else%}times{%endif%}-circle" style="color:{%if splash_crawler['status']%}Green{%else%}Red{%endif%};"></i> {{splash_crawler['crawler_info']}}
<i class="fas fa-{%if crawler['status']%}check{%else%}times{%endif%}-circle" style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};"></i> {{crawler['start_time']}}
</td>
<td>
{%if splash_crawler['type']=='onion'%}
{%if crawler['type']=='onion'%}
<i class="fas fa-user-secret"></i>
{%else%}
<i class="fab fa-html5">
{%endif%}
</td>
<td>
{{splash_crawler['crawling_domain']}}
{{crawler['domain']}}
</td>
<td style="color:{%if splash_crawler['status']%}Green{%else%}Red{%endif%};">
{{splash_crawler['status_info']}}
<td style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};">
{{crawler['status']}}
</td>
</tr>
{% endfor %}
@ -93,6 +96,9 @@
</table>
{% include 'domains/block_domains_name_search.html' %}
<div class="d-flex justify-content-center my-4">
{% include 'crawler/show_domains_by_daterange.html' %}
</div>
<hr>
<div class="row mb-3">
@ -134,6 +140,32 @@ var to_refresh = false
$(document).ready(function(){
$("#page-Crawler").addClass("active");
$("#nav_dashboard").addClass("active");
$('#date-range-from').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
$('#date-range-to').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
$( window ).on("focus", function() {
to_refresh = true
refresh_crawler_status();
@ -144,6 +176,7 @@ $(document).ready(function(){
to_refresh = true
refresh_crawler_status();
});
function toggle_sidebar(){
@ -165,21 +198,21 @@ function refresh_crawler_status(){
$.getJSON("{{ url_for('crawler_splash.crawler_dashboard_json') }}",
function(data) {
$('#stat_onion_domain_up').text(data.splash_crawlers_latest_stats['onion']['domains_up']);
$('#stat_onion_domain_down').text(data.splash_crawlers_latest_stats['onion']['domains_down']);
$('#stat_onion_total').text(data.splash_crawlers_latest_stats['onion']['total']);
$('#stat_onion_queue').text(data.splash_crawlers_latest_stats['onion']['domains_queue']);
$('#stat_onion_domain_up').text(data.stats['onion']['up']);
$('#stat_onion_domain_down').text(data.stats['onion']['down']);
$('#stat_onion_total').text(data.stats['onion']['crawled']);
$('#stat_onion_queue').text(data.stats['onion']['queue']);
$('#stat_regular_domain_up').text(data.splash_crawlers_latest_stats['regular']['domains_up']);
$('#stat_regular_domain_down').text(data.splash_crawlers_latest_stats['regular']['domains_down']);
$('#stat_regular_total').text(data.splash_crawlers_latest_stats['regular']['total']);
$('#stat_regular_queue').text(data.splash_crawlers_latest_stats['regular']['domains_queue']);
$('#stat_web_domain_up').text(data.stats['web']['up']);
$('#stat_web_domain_down').text(data.stats['web']['down']);
$('#stat_web_total').text(data.stats['web']['crawled']);
$('#stat_web_queue').text(data.stats['web']['queue']);
if(data.all_splash_crawler_status.length!=0){
if(data.crawlers_status.length!=0){
$("#tbody_crawler_onion_info").empty();
var tableRef = document.getElementById('tbody_crawler_onion_info');
for (var i = 0; i < data.all_splash_crawler_status.length; i++) {
var crawler = data.all_splash_crawler_status[i];
for (var i = 0; i < data.crawlers_status.length; i++) {
var crawler = data.crawlers_status[i];
var newRow = tableRef.insertRow(tableRef.rows.length);
var text_color;
var icon;
@ -198,16 +231,16 @@ function refresh_crawler_status(){
}
var newCell = newRow.insertCell(0);
newCell.innerHTML = "<td><i class=\"fas fa-"+icon+"-circle\" style=\"color:"+text_color+";\"></i> "+crawler['crawler_info']+"</td>";
newCell.innerHTML = "<td><i class=\"fas fa-"+icon+"-circle\" style=\"color:"+text_color+";\"></i> "+crawler['start_time']+"</td>";
var newCell = newRow.insertCell(1);
newCell.innerHTML = "<td><i class=\""+icon_t+"\"></i></td>";
newCell = newRow.insertCell(2);
newCell.innerHTML = "<td>"+crawler['crawling_domain']+"</td>";
newCell.innerHTML = "<td>"+crawler['domain']+"</td>";
newCell = newRow.insertCell(3);
newCell.innerHTML = "<td><div style=\"color:"+text_color+";\">"+crawler['status_info']+"</div></td>";
newCell.innerHTML = "<td><div style=\"color:"+text_color+";\">"+crawler['status']+"</div></td>";
//$("#panel_crawler").show();
}

View file

@ -0,0 +1,154 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>AIL-Framework</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
{% include 'crawler/show_domains_by_daterange.html' %}
{% for date in dict_domains %}
<div class="card text-center mt-1 mb-3">
<div class="card-header bg-dark text-white">
<h3 style="text-align:center;">{{'{}/{}/{}'.format(date[0:4], date[4:6], date[6:8])}}</h3>
</div>
<div class="card-body px-1">
<table id="table_{{date}}" class="table table-striped table-bordered">
<thead class="bg-dark text-white">
<tr>
<th>Domain</th>
<th>First Seen</th>
<th>Last Check</th>
<th>Status</th>
</tr>
</thead>
<tbody>
{% for dict_domain in dict_domains[date] %}
<tr>
<td>
<a target="_blank" href="{{ url_for('crawler_splash.showDomain') }}?domain={{ dict_domain['domain'] }}">{{ dict_domain['domain'] }}</a>
<div>
{% for tag in dict_domain['tags'] %}
<a href="{{ url_for('tags_ui.get_obj_by_tags') }}?object_type=domain&ltags={{ tag }}">
<span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }}">{{ tag }}</span>
</a>
{% endfor %}
</div>
</td>
<td>{{dict_domain['first_seen']}}</td>
<td>{{dict_domain['last_check']}}</td>
<td>
{% if dict_domain['status'] %}
<div style="color:Green; display:inline-block">
<i class="fas fa-check-circle"></i> UP
</div>
{% else %}
<div style="color:Red; display:inline-block">
<i class="fas fa-times-circle"></i> DOWN
</div>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
{% endfor %}
</div>
</div>
</div>
</body>
<script>
$(document).ready(function(){
$("#page-Crawler").addClass("active");
$('#date-range-from').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
$('#date-range-to').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
{% for date in dict_domains %}
$('#table_{{date}}').DataTable({
"aLengthMenu": [[5, 15, 30, -1], [5, 15, 30, "All"]],
"iDisplayLength": 15,
"order": [[ 0, "asc" ]]
});
{% endfor %}
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
</script>
</html>

View file

@ -0,0 +1,338 @@
<!DOCTYPE html>
<html>
<head>
<title>AIL-Framework</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
<style>
.bar {
fill: steelblue;
}
.bar:hover{
fill: brown;
cursor: pointer;
}
.bar_stack:hover{
cursor: pointer;
}
.popover{
max-width: 100%;
}
.domain_name {
display:inline-block;
overflow: hidden;
white-space: nowrap;
text-overflow: ellipsis;
max-width: 400px;
}
</style>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
{% include 'crawler/crawler_disabled.html' %}
<div class="row">
<div class="col-12 col-xl-6">
<div class="table-responsive mt-1 table-hover table-borderless table-striped">
<table class="table">
<thead class="thead-dark">
<tr>
<th>Domain</th>
<th>First Seen</th>
<th>Last Check</th>
<th>Status</th>
</tr>
</thead>
<tbody id="tbody_last_crawled">
{% for domain in domains %}
<tr data-toggle="popover" data-trigger="hover"
title="<span class='badge badge-dark'>{{domain['domain']}}</span>"
data-content="epoch: {{domain['epoch']}}<br>last status: {{ domain['status'] }}">
<td><a target="_blank" class="domain_name" href="{{ url_for('crawler_splash.showDomain') }}?domain={{ domain['domain'] }}&epoch={{domain['epoch']}}">{{ domain['domain'] }}</a></td>
<td>{{domain['first_seen']}}</td>
<td>{{domain['last_check']}}</td>
<td>
{% if domain['status_epoch'] %}
<div style="color:Green; display:inline-block">
<i class="fas fa-check-circle"></i> UP
</div>
{% else %}
<div style="color:Red; display:inline-block">
<i class="fas fa-times-circle"></i> DOWN
</div>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<a href="{{ url_for('hiddenServices.blacklisted_domains') }}?type={{type}}">
<button type="button" class="btn btn-outline-danger">Show Blacklisted {{type_name}}s</button>
</a>
</div>
<div class="col-12 col-xl-6">
{% include 'crawler/show_domains_by_daterange.html' %}
<div id="barchart_type"></div>
</div>
</div>
</div>
</div>
</div>
</body>
<script>
var chart = {};
$(document).ready(function(){
$("#page-Crawler").addClass("active");
$("#nav_{{type}}_crawler").addClass("active");
$('#date-range-from').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
$('#date-range-to').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
chart.stackBarChart =barchart_type_stack("{{ url_for('crawler_splash.crawlers_last_domains_json') }}?type={{type}}", 'id');
chart.onResize();
$(window).on("resize", function() {
chart.onResize();
});
$('[data-toggle="popover"]').popover({
placement: 'top',
container: 'body',
html : true,
});
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
</script>
<script>
var margin = {top: 20, right: 90, bottom: 55, left: 0},
width = parseInt(d3.select('#barchart_type').style('width'), 10);
width = 1000 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var x = d3.scaleBand().rangeRound([0, width]).padding(0.1);
var y = d3.scaleLinear().rangeRound([height, 0]);
var xAxis = d3.axisBottom(x);
var yAxis = d3.axisLeft(y);
var color = d3.scaleOrdinal(d3.schemeSet3);
var svg = d3.select("#barchart_type").append("svg")
.attr("id", "thesvg")
.attr("viewBox", "0 0 "+width+" 500")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
function barchart_type_stack(url, id) {
d3.json(url)
.then(function(data){
var labelVar = 'date'; //A
var varNames = d3.keys(data[0])
.filter(function (key) { return key !== labelVar;}); //B
data.forEach(function (d) { //D
var y0 = 0;
d.mapping = varNames.map(function (name) {
return {
name: name,
label: d[labelVar],
y0: y0,
y1: y0 += +d[name]
};
});
d.total = d.mapping[d.mapping.length - 1].y1;
});
x.domain(data.map(function (d) { return (d.date); })); //E
y.domain([0, d3.max(data, function (d) { return d.total; })]);
svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis)
.selectAll("text")
.attr("class", "bar")
.on("click", function (d) { window.location.href = "#" })
.attr("transform", "rotate(-18)" )
//.attr("transform", "rotate(-40)" )
.style("text-anchor", "end");
svg.append("g")
.attr("class", "y axis")
.call(yAxis)
.append("text")
.attr("transform", "rotate(-90)")
.attr("y", 6)
.attr("dy", ".71em")
.style("text-anchor", "end");
var selection = svg.selectAll(".series")
.data(data)
.enter().append("g")
.attr("class", "series")
.attr("transform", function (d) { return "translate(" + x((d.date)) + ",0)"; });
selection.selectAll("rect")
.data(function (d) { return d.mapping; })
.enter().append("rect")
.attr("class", "bar_stack")
.attr("width", x.bandwidth())
.attr("y", function (d) { return y(d.y1); })
.attr("height", function (d) { return y(d.y0) - y(d.y1); })
.style("fill", function (d) { return color(d.name); })
.style("stroke", "grey")
.on("mouseover", function (d) { showPopover.call(this, d); })
.on("mouseout", function (d) { removePopovers(); })
.on("click", function(d){ window.location.href = "#" });
data.forEach(function(d) {
if(d.total != 0){
svg.append("text")
.attr("class", "bar")
.attr("dy", "-.35em")
.attr('x', x(d.date) + x.bandwidth()/2)
.attr('y', y(d.total))
.on("click", function () {window.location.href = "#" })
.style("text-anchor", "middle")
.text(d.total);
}
});
drawLegend(varNames);
});
}
function drawLegend (varNames) {
var legend = svg.selectAll(".legend")
.data(varNames.slice().reverse())
.enter().append("g")
.attr("class", "legend")
.attr("transform", function (d, i) { return "translate(0," + i * 20 + ")"; });
legend.append("rect")
.attr("x", 943)
.attr("width", 10)
.attr("height", 10)
.style("fill", color)
.style("stroke", "grey");
legend.append("text")
.attr("class", "svgText")
.attr("x", 941)
.attr("y", 6)
.attr("dy", ".35em")
.style("text-anchor", "end")
.text(function (d) { return d; });
}
function removePopovers () {
$('.popover').each(function() {
$(this).remove();
});
}
function showPopover (d) {
$(this).popover({
title: d.name,
placement: 'top',
container: 'body',
trigger: 'manual',
html : true,
content: function() {
return d.label +
"<br/>num: " + d3.format(",")(d.value ? d.value: d.y1 - d.y0); }
});
$(this).popover('show')
}
chart.onResize = function () {
var aspect = width / height, chart = $("#thesvg");
var targetWidth = chart.parent().width();
chart.attr("width", targetWidth);
chart.attr("height", targetWidth / 2);
}
window.chart = chart;
</script>

View file

@ -68,17 +68,17 @@
<table class="table table-sm">
<tbody>
<tr>
<td>Splash Manager URL</td>
<td>{{splash_manager_url}}</td>
<td>Lacus URL</td>
<td>{{lacus_url}}</td>
</tr>
<tr>
<td>API Key</td>
{# <td>API Key</td>#}
{# <td>#}
{# {{api_key}}#}
{# <!-- <a class="ml-3" href="/settings/new_token"><i class="fa fa-random"></i></a> -->#}
{# </td>#}
<td>
{{api_key}}
<!-- <a class="ml-3" href="/settings/new_token"><i class="fa fa-random"></i></a> -->
</td>
<td>
<a href="{{ url_for('crawler_splash.crawler_splash_setings_crawler_manager') }}">
<a href="{{ url_for('crawler_splash.crawler_lacus_settings_crawler_manager') }}">
<button type="button" class="btn btn-info">
Edit <i class="fas fa-pencil-alt"></i>
</button>
@ -92,126 +92,52 @@
</div>
</div>
<div {%if not is_manager_connected%}class="hidden"{%endif%}>
<div class="card border-secondary mb-4">
<div class="card-body text-dark">
<h5 class="card-title">All Splash Crawlers:</h5>
<table class="table table-striped">
<thead class="bg-info text-white">
<th>
Splash name
</th>
<th>
Proxy
</th>
<th>
Crawler type
</th>
<th>
Description
</th>
<th></th>
</thead>
<tbody>
{% for splash_name in all_splash %}
<tr>
<td>
{{splash_name}}
</td>
<td>
{{all_splash[splash_name]['proxy']}}
</td>
<td>
{%if all_splash[splash_name]['type']=='tor'%}
<i class="fas fa-user-secret"></i>
{%else%}
<i class="fab fa-html5">
{%endif%}
{{all_splash[splash_name]['type']}}
</td>
<td>
{{all_splash[splash_name]['description']}}
</td>
<td>
<div class="d-flex justify-content-end">
<!-- <button class="btn btn-outline-dark px-1 py-0">
<i class="fas fa-pencil-alt"></i>
</button> -->
</div>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
<div class="card border-secondary">
<div class="card-body text-dark">
<h5 class="card-title">All Proxies:</h5>
<table class="table table-striped">
<thead class="bg-info text-white">
<th>
Proxy name
</th>
<th>
Host
</th>
<th>
Port
</th>
<th>
Type
</th>
<th>
Crawler Type
</th>
<th>
Description
</th>
<th></th>
</thead>
<tbody>
{% for proxy_name in all_proxies %}
<tr>
<td>
{{proxy_name}}
</td>
<td>
{{all_proxies[proxy_name]['host']}}
</td>
<td>
{{all_proxies[proxy_name]['port']}}
</td>
<td>
{{all_proxies[proxy_name]['type']}}
</td>
<td>
{%if all_proxies[proxy_name]['crawler_type']=='tor'%}
<i class="fas fa-user-secret"></i>
{%else%}
<i class="fab fa-html5">
{%endif%}
{{all_proxies[proxy_name]['crawler_type']}}
</td>
<td>
{{all_proxies[proxy_name]['description']}}
</td>
<td>
<div class="d-flex justify-content-end">
<!-- <button class="btn btn-outline-dark px-1 py-0">
<i class="fas fa-pencil-alt"></i>
</button> -->
</div>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
<div class="card border-secondary">
<div class="card-body text-dark">
<h5 class="card-title">All Proxies:</h5>
<table class="table table-striped">
<thead class="bg-info text-white">
<tr>
<th>Proxy name</th>
<th>URL</th>
<th>Crawler Type</th>
<th>Description</th>
<th></th>
</tr>
</thead>
<tbody>
{% for proxy_name in all_proxies %}
<tr>
<td>
{{proxy_name}}
</td>
<td>
{{all_proxies[proxy_name]['url']}}
</td>
<td>
{%if all_proxies[proxy_name]['crawler_type']=='tor'%}
<i class="fas fa-user-secret"></i>
{%else%}
<i class="fab fa-html5"></i>
{%endif%}
{{all_proxies[proxy_name]['crawler_type']}}
</td>
<td>
{{all_proxies[proxy_name]['description']}}
</td>
<td>
<div class="d-flex justify-content-end">
<!-- <button class="btn btn-outline-dark px-1 py-0">
<i class="fas fa-pencil-alt"></i>
</button> -->
</div>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
</div>
@ -242,7 +168,7 @@
{{crawler_error_mess}}
</pre>
<a href="{{ url_for('crawler_splash.crawler_splash_setings_test_crawler') }}">
<a href="{{ url_for('crawler_splash.crawler_settings_crawler_test') }}">
<button type="button" class="btn btn-primary">
ReRun Test <i class="fas fa-rocket"></i>
</button>

View file

@ -0,0 +1,61 @@
<!DOCTYPE html>
<html>
<head>
<title>AIL-Framework</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="card my-2">
<div class="card-header bg-dark text-white">
<h3 class="card-title"> Lacus Config:</h3>
<form action="{{ url_for('crawler_splash.crawler_lacus_settings_crawler_manager') }}" method="post" enctype="multipart/form-data">
<div class="form-group">
<label for="splash_manager_url">Lacus Server URL</label>
<input type="text" class="form-control" id="splash_manager_url" required placeholder="https://lacus_url" name="lacus_url" {%if lacus_url%}value="{{lacus_url}}"{%endif%}>
</div>
{# <div class="form-group">#}
{# <label for="api_key">API Key</label>#}
{# <input type="text" class="form-control" id="api_key" placeholder="API Key" name="api_key" {%if api_key%}value="{{api_key}}"{%endif%}>#}
{# </div>#}
<button type="submit" class="btn btn-primary">Edit Lacus <i class="fas fa-pencil-alt"></i></button>
</form>
</div>
</div>
</div>
</div>
</div>
</body>
<script>
var to_refresh = false
$(document).ready(function(){
$("#page-Crawler").addClass("active");
$("#nav_settings").addClass("active");
});
</script>

View file

@ -1,55 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>AIL-Framework</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<form action="{{ url_for('crawler_splash.crawler_splash_setings_crawler_manager') }}" method="post" enctype="multipart/form-data">
<div class="form-group">
<label for="splash_manager_url">Splash Manager URL</label>
<input type="text" class="form-control" id="splash_manager_url" placeholder="https://splash_manager_url" name="splash_manager_url" {%if splash_manager_url%}value="{{splash_manager_url}}"{%endif%}>
</div>
<div class="form-group">
<label for="api_key">API Key</label>
<input type="text" class="form-control" id="api_key" placeholder="API Key" name="api_key" {%if api_key%}value="{{api_key}}"{%endif%}>
</div>
<button type="submit" class="btn btn-primary">Edit <i class="fas fa-pencil-alt"></i></button>
</form>
</div>
</div>
</div>
</body>
<script>
var to_refresh = false
$(document).ready(function(){
$("#page-Crawler").addClass("active");
$("#nav_settings").addClass("active");
});
</script>

Some files were not shown because too many files have changed in this diff Show more