chg: [crawler + core + cve] migrate crawler to lacus + add new CVE object and correlation + migrate core

This commit is contained in:
Terrtia 2022-10-25 16:25:19 +02:00
parent eeff786ea5
commit 104eaae793
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
109 changed files with 4310 additions and 4551 deletions

View file

@ -1,457 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import re
import uuid
import json
import redis
import datetime
import time
import subprocess
import requests
from collections import deque
from pyfaup.faup import Faup
sys.path.append(os.environ['AIL_BIN'])
from Helper import Process
from pubsublogger import publisher
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import crawlers
# ======== FUNCTIONS ========
def load_blacklist(service_type):
try:
with open(os.environ['AIL_BIN']+'/torcrawler/blacklist_{}.txt'.format(service_type), 'r') as f:
redis_crawler.delete('blacklist_{}'.format(service_type))
lines = f.read().splitlines()
for line in lines:
redis_crawler.sadd('blacklist_{}'.format(service_type), line)
except Exception:
pass
def update_auto_crawler():
current_epoch = int(time.time())
list_to_crawl = redis_crawler.zrangebyscore('crawler_auto_queue', '-inf', current_epoch)
for elem_to_crawl in list_to_crawl:
mess, type = elem_to_crawl.rsplit(';', 1)
redis_crawler.sadd('{}_crawler_priority_queue'.format(type), mess)
redis_crawler.zrem('crawler_auto_queue', elem_to_crawl)
# Extract info form url (url, domain, domain url, ...)
def unpack_url(url):
to_crawl = {}
faup.decode(url)
url_unpack = faup.get()
# # FIXME: # TODO: remove me
try:
to_crawl['domain'] = url_unpack['domain'].decode()
except:
to_crawl['domain'] = url_unpack['domain']
to_crawl['domain'] = to_crawl['domain'].lower()
# force lower case domain/subdomain (rfc4343)
# # FIXME: # TODO: remove me
try:
url_host = url_unpack['host'].decode()
except:
url_host = url_unpack['host']
new_url_host = url_host.lower()
url_lower_case = url.replace(url_host, new_url_host, 1)
if url_unpack['scheme'] is None:
to_crawl['scheme'] = 'http'
url= 'http://{}'.format(url_lower_case)
else:
# # FIXME: # TODO: remove me
try:
scheme = url_unpack['scheme'].decode()
except Exception as e:
scheme = url_unpack['scheme']
if scheme in default_proto_map:
to_crawl['scheme'] = scheme
url = url_lower_case
else:
redis_crawler.sadd('new_proto', '{} {}'.format(scheme, url_lower_case))
to_crawl['scheme'] = 'http'
url= 'http://{}'.format(url_lower_case.replace(scheme, '', 1))
if url_unpack['port'] is None:
to_crawl['port'] = default_proto_map[to_crawl['scheme']]
else:
# # FIXME: # TODO: remove me
try:
port = url_unpack['port'].decode()
except:
port = url_unpack['port']
# Verify port number #################### make function to verify/correct port number
try:
int(port)
# Invalid port Number
except Exception as e:
port = default_proto_map[to_crawl['scheme']]
to_crawl['port'] = port
#if url_unpack['query_string'] is None:
# if to_crawl['port'] == 80:
# to_crawl['url']= '{}://{}'.format(to_crawl['scheme'], url_unpack['host'].decode())
# else:
# to_crawl['url']= '{}://{}:{}'.format(to_crawl['scheme'], url_unpack['host'].decode(), to_crawl['port'])
#else:
# to_crawl['url']= '{}://{}:{}{}'.format(to_crawl['scheme'], url_unpack['host'].decode(), to_crawl['port'], url_unpack['query_string'].decode())
to_crawl['url'] = url
if to_crawl['port'] == 80:
to_crawl['domain_url'] = '{}://{}'.format(to_crawl['scheme'], new_url_host)
else:
to_crawl['domain_url'] = '{}://{}:{}'.format(to_crawl['scheme'], new_url_host, to_crawl['port'])
# # FIXME: # TODO: remove me
try:
to_crawl['tld'] = url_unpack['tld'].decode()
except:
to_crawl['tld'] = url_unpack['tld']
return to_crawl
def get_crawler_config(redis_server, mode, service_type, domain, url=None):
crawler_options = {}
if mode=='auto':
config = redis_server.get('crawler_config:{}:{}:{}:{}'.format(mode, service_type, domain, url))
else:
config = redis_server.get('crawler_config:{}:{}:{}'.format(mode, service_type, domain))
if config is None:
config = {}
else:
config = json.loads(config)
for option in default_crawler_config:
if option in config:
crawler_options[option] = config[option]
else:
crawler_options[option] = default_crawler_config[option]
if mode == 'auto':
crawler_options['time'] = int(config['time'])
elif mode == 'manual':
redis_server.delete('crawler_config:{}:{}:{}'.format(mode, service_type, domain))
return crawler_options
def load_crawler_config(queue_type, service_type, domain, paste, url, date):
crawler_config = {}
crawler_config['splash_url'] = f'http://{splash_url}'
crawler_config['item'] = paste
crawler_config['service_type'] = service_type
crawler_config['domain'] = domain
crawler_config['date'] = date
if queue_type and queue_type != 'tor':
service_type = queue_type
# Auto and Manual Crawling
# Auto ################################################# create new entry, next crawling => here or when ended ?
if paste == 'auto':
crawler_config['crawler_options'] = get_crawler_config(redis_crawler, 'auto', service_type, domain, url=url)
crawler_config['requested'] = True
# Manual
elif paste == 'manual':
crawler_config['crawler_options'] = get_crawler_config(r_cache, 'manual', service_type, domain)
crawler_config['requested'] = True
# default crawler
else:
crawler_config['crawler_options'] = get_crawler_config(redis_crawler, 'default', service_type, domain)
crawler_config['requested'] = False
return crawler_config
def is_domain_up_day(domain, type_service, date_day):
if redis_crawler.sismember('{}_up:{}'.format(type_service, date_day), domain):
return True
else:
return False
def set_crawled_domain_metadata(type_service, date, domain, father_item):
# first seen
if not redis_crawler.hexists('{}_metadata:{}'.format(type_service, domain), 'first_seen'):
redis_crawler.hset('{}_metadata:{}'.format(type_service, domain), 'first_seen', date['date_day'])
redis_crawler.hset('{}_metadata:{}'.format(type_service, domain), 'paste_parent', father_item)
# last check
redis_crawler.hset('{}_metadata:{}'.format(type_service, domain), 'last_check', date['date_day'])
# Put message back on queue
def on_error_send_message_back_in_queue(type_service, domain, message):
if not redis_crawler.sismember('{}_domain_crawler_queue'.format(type_service), domain):
redis_crawler.sadd('{}_domain_crawler_queue'.format(type_service), domain)
redis_crawler.sadd('{}_crawler_priority_queue'.format(type_service), message)
def crawl_onion(url, domain, port, type_service, message, crawler_config):
crawler_config['url'] = url
crawler_config['port'] = port
print('Launching Crawler: {}'.format(url))
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'crawling_domain', domain)
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
retry = True
nb_retry = 0
while retry:
try:
r = requests.get(f'http://{splash_url}' , timeout=30.0)
retry = False
except Exception:
# TODO: relaunch docker or send error message
nb_retry += 1
if nb_retry == 2:
crawlers.restart_splash_docker(splash_url, splash_name)
time.sleep(20)
if nb_retry == 6:
on_error_send_message_back_in_queue(type_service, domain, message)
publisher.error('{} SPASH DOWN'.format(splash_url))
print('--------------------------------------')
print(' \033[91m DOCKER SPLASH DOWN\033[0m')
print(' {} DOWN'.format(splash_url))
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'SPLASH DOWN')
nb_retry == 0
print(' \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m')
print(' Retry({}) in 10 seconds'.format(nb_retry))
time.sleep(10)
if r.status_code == 200:
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'Crawling')
# save config in cash
UUID = str(uuid.uuid4())
r_cache.set('crawler_request:{}'.format(UUID), json.dumps(crawler_config))
process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', UUID],
stdout=subprocess.PIPE)
while process.poll() is None:
time.sleep(1)
if process.returncode == 0:
output = process.stdout.read().decode()
print(output)
# error: splash:Connection to proxy refused
if 'Connection to proxy refused' in output:
on_error_send_message_back_in_queue(type_service, domain, message)
publisher.error('{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format(splash_url))
print('------------------------------------------------------------------------')
print(' \033[91m SPLASH: Connection to proxy refused')
print('')
print(' PROXY DOWN OR BAD CONFIGURATION\033[0m'.format(splash_url))
print('------------------------------------------------------------------------')
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'Error')
exit(-2)
else:
crawlers.update_splash_manager_connection_status(True)
else:
print(process.stdout.read())
exit(-1)
else:
on_error_send_message_back_in_queue(type_service, domain, message)
print('--------------------------------------')
print(' \033[91m DOCKER SPLASH DOWN\033[0m')
print(' {} DOWN'.format(splash_url))
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'Crawling')
exit(1)
# check external links (full_crawl)
def search_potential_source_domain(type_service, domain):
external_domains = set()
for link in redis_crawler.smembers('domain_{}_external_links:{}'.format(type_service, domain)):
# unpack url
url_data = unpack_url(link)
if url_data['domain'] != domain:
if url_data['tld'] == 'onion' or url_data['tld'] == 'i2p':
external_domains.add(url_data['domain'])
# # TODO: add special tag ?
if len(external_domains) >= 20:
redis_crawler.sadd('{}_potential_source'.format(type_service), domain)
print('New potential source found: domain')
redis_crawler.delete('domain_{}_external_links:{}'.format(type_service, domain))
if __name__ == '__main__':
if len(sys.argv) != 2:
print('usage:', 'Crawler.py', 'splash_url')
exit(1)
##################################################
splash_url = sys.argv[1]
splash_name = crawlers.get_splash_name_by_url(splash_url)
proxy_name = crawlers.get_splash_proxy(splash_name)
crawler_type = crawlers.get_splash_crawler_type(splash_name)
print(f'SPLASH Name: {splash_name}')
print(f'Proxy Name: {proxy_name}')
print(f'Crawler Type: {crawler_type}')
#time.sleep(10)
#sys.exit(0)
#rotation_mode = deque(['onion', 'regular'])
all_crawler_queues = crawlers.get_crawler_queue_types_by_splash_name(splash_name)
rotation_mode = deque(all_crawler_queues)
print(rotation_mode)
default_proto_map = {'http': 80, 'https': 443}
######################################################## add ftp ???
publisher.port = 6380
publisher.channel = "Script"
publisher.info("Script Crawler started")
config_section = 'Crawler'
# Setup the I/O queues
p = Process(config_section)
print('splash url: {}'.format(splash_url))
r_cache = redis.StrictRedis(
host=p.config.get("Redis_Cache", "host"),
port=p.config.getint("Redis_Cache", "port"),
db=p.config.getint("Redis_Cache", "db"),
decode_responses=True)
redis_crawler = redis.StrictRedis(
host=p.config.get("ARDB_Onion", "host"),
port=p.config.getint("ARDB_Onion", "port"),
db=p.config.getint("ARDB_Onion", "db"),
decode_responses=True)
faup = crawlers.get_faup()
# get HAR files
default_crawler_har = p.config.getboolean("Crawler", "default_crawler_har")
if default_crawler_har:
default_crawler_har = True
else:
default_crawler_har = False
# get PNG files
default_crawler_png = p.config.getboolean("Crawler", "default_crawler_png")
if default_crawler_png:
default_crawler_png = True
else:
default_crawler_png = False
# Default crawler options
default_crawler_config = {'html': True,
'har': default_crawler_har,
'png': default_crawler_png,
'depth_limit': p.config.getint("Crawler", "crawler_depth_limit"),
'closespider_pagecount': p.config.getint("Crawler", "default_crawler_closespider_pagecount"),
'cookiejar_uuid': None,
'user_agent': p.config.get("Crawler", "default_crawler_user_agent")}
# Track launched crawler
r_cache.sadd('all_splash_crawlers', splash_url)
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'Waiting')
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
# update hardcoded blacklist
load_blacklist('onion')
load_blacklist('regular')
while True:
update_auto_crawler()
rotation_mode.rotate()
to_crawl = crawlers.get_elem_to_crawl_by_queue_type(rotation_mode)
if to_crawl:
url_data = unpack_url(to_crawl['url'])
# remove domain from queue
redis_crawler.srem('{}_domain_crawler_queue'.format(to_crawl['type_service']), url_data['domain'])
print()
print()
print('\033[92m------------------START CRAWLER------------------\033[0m')
print('crawler type: {}'.format(to_crawl['type_service']))
print('\033[92m-------------------------------------------------\033[0m')
print('url: {}'.format(url_data['url']))
print('domain: {}'.format(url_data['domain']))
print('domain_url: {}'.format(url_data['domain_url']))
print()
# Check blacklist
if not redis_crawler.sismember('blacklist_{}'.format(to_crawl['type_service']), url_data['domain']):
date = {'date_day': datetime.datetime.now().strftime("%Y%m%d"),
'date_month': datetime.datetime.now().strftime("%Y%m"),
'epoch': int(time.time())}
# Update crawler status type
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'type', to_crawl['type_service'])
crawler_config = load_crawler_config(to_crawl['queue_type'], to_crawl['type_service'], url_data['domain'], to_crawl['paste'], to_crawl['url'], date)
# check if default crawler
if not crawler_config['requested']:
# Auto crawl only if service not up this month
if redis_crawler.sismember('month_{}_up:{}'.format(to_crawl['type_service'], date['date_month']), url_data['domain']):
continue
set_crawled_domain_metadata(to_crawl['type_service'], date, url_data['domain'], to_crawl['paste'])
#### CRAWLER ####
# Manual and Auto Crawler
if crawler_config['requested']:
######################################################crawler strategy
# CRAWL domain
crawl_onion(url_data['url'], url_data['domain'], url_data['port'], to_crawl['type_service'], to_crawl['original_message'], crawler_config)
# Default Crawler
else:
# CRAWL domain
crawl_onion(url_data['domain_url'], url_data['domain'], url_data['port'], to_crawl['type_service'], to_crawl['original_message'], crawler_config)
#if url != domain_url and not is_domain_up_day(url_data['domain'], to_crawl['type_service'], date['date_day']):
# crawl_onion(url_data['url'], url_data['domain'], to_crawl['original_message'])
# Save last_status day (DOWN)
if not is_domain_up_day(url_data['domain'], to_crawl['type_service'], date['date_day']):
redis_crawler.sadd('{}_down:{}'.format(to_crawl['type_service'], date['date_day']), url_data['domain'])
# if domain was UP at least one time
if redis_crawler.exists('crawler_history_{}:{}:{}'.format(to_crawl['type_service'], url_data['domain'], url_data['port'])):
# add crawler history (if domain is down)
if not redis_crawler.zrangebyscore('crawler_history_{}:{}:{}'.format(to_crawl['type_service'], url_data['domain'], url_data['port']), date['epoch'], date['epoch']):
# Domain is down
redis_crawler.zadd('crawler_history_{}:{}:{}'.format(to_crawl['type_service'], url_data['domain'], url_data['port']), int(date['epoch']), int(date['epoch']))
############################
# extract page content
############################
# update list, last crawled domains
redis_crawler.lpush('last_{}'.format(to_crawl['type_service']), '{}:{};{}'.format(url_data['domain'], url_data['port'], date['epoch']))
redis_crawler.ltrim('last_{}'.format(to_crawl['type_service']), 0, 15)
#update crawler status
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'Waiting')
r_cache.hdel('metadata_crawler:{}'.format(splash_url), 'crawling_domain')
# Update crawler status type
r_cache.hdel('metadata_crawler:{}'.format(splash_url), 'type', to_crawl['type_service'])
# add next auto Crawling in queue:
if to_crawl['paste'] == 'auto':
redis_crawler.zadd('crawler_auto_queue', int(time.time()+crawler_config['crawler_options']['time']) , '{};{}'.format(to_crawl['original_message'], to_crawl['type_service']))
# update list, last auto crawled domains
redis_crawler.lpush('last_auto_crawled', '{}:{};{}'.format(url_data['domain'], url_data['port'], date['epoch']))
redis_crawler.ltrim('last_auto_crawled', 0, 9)
else:
print(' Blacklisted Domain')
print()
print()
else:
time.sleep(1)

View file

@ -119,11 +119,11 @@ def core_migration():
# Auto Export Migration # Auto Export Migration
ail_misp = r_serv_db.get('ail:misp') ail_misp = r_serv_db.get('ail:misp')
if ail_misp != 'True': if ail_misp != 'True':
ail_misp == 'False' ail_misp = 'False'
r_kvrocks.set('ail:misp', ail_misp) r_kvrocks.set('ail:misp', ail_misp)
ail_thehive = r_serv_db.get('ail:thehive') ail_thehive = r_serv_db.get('ail:thehive')
if ail_thehive != 'True': if ail_thehive != 'True':
ail_thehive == 'False' ail_thehive = 'False'
r_kvrocks.set('ail:thehive', ail_thehive) r_kvrocks.set('ail:thehive', ail_thehive)
@ -494,7 +494,7 @@ def domain_migration():
domain = Domains.Domain(dom) domain = Domains.Domain(dom)
domain.update_daterange(first_seen) domain.update_daterange(first_seen)
domain.update_daterange(last_check) domain.update_daterange(last_check)
domain._set_ports(ports) domain._set_ports(ports) # TODO ############################################################################
if last_origin: if last_origin:
domain.set_last_origin(last_origin) domain.set_last_origin(last_origin)
for language in languages: for language in languages:
@ -520,13 +520,13 @@ def domain_migration():
epoch = history['epoch'] epoch = history['epoch']
# DOMAIN DOWN # DOMAIN DOWN
if not history.get('status'): # domain DOWN if not history.get('status'): # domain DOWN
domain.add_history(epoch, port) domain.add_history(epoch)
print(f'DOWN {epoch}') print(f'DOWN {epoch}')
# DOMAIN UP # DOMAIN UP
else: else:
root_id = history.get('root') root_id = history.get('root')
if root_id: if root_id:
domain.add_history(epoch, port, root_item=root_id) domain.add_history(epoch, root_item=root_id)
print(f'UP {root_id}') print(f'UP {root_id}')
crawled_items = get_crawled_items(dom, root_id) crawled_items = get_crawled_items(dom, root_id)
for item_id in crawled_items: for item_id in crawled_items:
@ -534,7 +534,7 @@ def domain_migration():
item_father = get_item_father(item_id) item_father = get_item_father(item_id)
if item_father and url: if item_father and url:
print(f'{url} {item_id}') print(f'{url} {item_id}')
domain.add_crawled_item(url, port, item_id, item_father) domain.add_crawled_item(url, item_id, item_father)
#print() #print()

View file

@ -18,17 +18,18 @@ import time
import re import re
import sys import sys
from pubsublogger import publisher from pubsublogger import publisher
from packages import Paste from lib.objects.Items import Item
from Helper import Process from Helper import Process
from ipaddress import IPv4Network, IPv4Address from ipaddress import IPv4Network, IPv4Address
# TODO REWRITE ME -> IMPROVE + MIGRATE TO MODULE
def search_ip(message): def search_ip(message):
paste = Paste.Paste(message) item = Item(message)
content = paste.get_p_content() content = item.get_content()
# regex to find IPs # regex to find IPs
reg_ip = re.compile(r'^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', flags=re.MULTILINE) reg_ip = re.compile(r'^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', flags=re.MULTILINE)
# list of the regex results in the Paste, may be null # list of the regex results in the Item, may be null
results = reg_ip.findall(content) results = reg_ip.findall(content)
matching_ips = [] matching_ips = []
@ -40,14 +41,13 @@ def search_ip(message):
matching_ips.append(address) matching_ips.append(address)
if len(matching_ips) > 0: if len(matching_ips) > 0:
print('{} contains {} IPs'.format(paste.p_name, len(matching_ips))) print(f'{item.get_id()} contains {len(matching_ips)} IPs')
publisher.warning('{} contains {} IPs'.format(paste.p_name, len(matching_ips))) publisher.warning(f'{item.get_id()} contains {item.get_id()} IPs')
#Tag message with IP # Tag message with IP
msg = 'infoleak:automatic-detection="ip";{}'.format(message) msg = f'infoleak:automatic-detection="ip";{item.get_id()}'
p.populate_set_out(msg, 'Tags') p.populate_set_out(msg, 'Tags')
#Send to duplicate
p.populate_set_out(message, 'Duplicate')
if __name__ == '__main__': if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)

View file

@ -40,7 +40,6 @@ is_ail_core=`screen -ls | egrep '[0-9]+.Core_AIL' | cut -d. -f1`
is_ail_2_ail=`screen -ls | egrep '[0-9]+.AIL_2_AIL' | cut -d. -f1` is_ail_2_ail=`screen -ls | egrep '[0-9]+.AIL_2_AIL' | cut -d. -f1`
isscripted=`screen -ls | egrep '[0-9]+.Script_AIL' | cut -d. -f1` isscripted=`screen -ls | egrep '[0-9]+.Script_AIL' | cut -d. -f1`
isflasked=`screen -ls | egrep '[0-9]+.Flask_AIL' | cut -d. -f1` isflasked=`screen -ls | egrep '[0-9]+.Flask_AIL' | cut -d. -f1`
iscrawler=`screen -ls | egrep '[0-9]+.Crawler_AIL' | cut -d. -f1`
isfeeded=`screen -ls | egrep '[0-9]+.Feeder_Pystemon' | cut -d. -f1` isfeeded=`screen -ls | egrep '[0-9]+.Feeder_Pystemon' | cut -d. -f1`
function helptext { function helptext {
@ -126,6 +125,8 @@ function launching_logs {
screen -S "Logging_AIL" -X screen -t "LogScript" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Script -l ../logs/; read x" screen -S "Logging_AIL" -X screen -t "LogScript" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Script -l ../logs/; read x"
sleep 0.1 sleep 0.1
screen -S "Logging_AIL" -X screen -t "LogScript" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Sync -l ../logs/; read x" screen -S "Logging_AIL" -X screen -t "LogScript" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Sync -l ../logs/; read x"
sleep 0.1
screen -S "Logging_AIL" -X screen -t "LogScript" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Crawler -l ../logs/; read x"
} }
function launching_queues { function launching_queues {
@ -174,8 +175,6 @@ function launching_scripts {
screen -S "Script_AIL" -X screen -t "JSON_importer" bash -c "cd ${AIL_BIN}/import; ${ENV_PY} ./JSON_importer.py; read x" screen -S "Script_AIL" -X screen -t "JSON_importer" bash -c "cd ${AIL_BIN}/import; ${ENV_PY} ./JSON_importer.py; read x"
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Crawler_manager" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./Crawler_manager.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "D4_client" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./D4_client.py; read x" screen -S "Script_AIL" -X screen -t "D4_client" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./D4_client.py; read x"
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./DbCleaner.py; read x" screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./DbCleaner.py; read x"
@ -202,6 +201,9 @@ function launching_scripts {
screen -S "Script_AIL" -X screen -t "SubmitPaste" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./submit_paste.py; read x" screen -S "Script_AIL" -X screen -t "SubmitPaste" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./submit_paste.py; read x"
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Crawler" bash -c "cd ${AIL_BIN}/crawlers; ${ENV_PY} ./Crawler.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Sync_module" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./Sync_module.py; read x" screen -S "Script_AIL" -X screen -t "Sync_module" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./Sync_module.py; read x"
sleep 0.1 sleep 0.1
@ -225,8 +227,6 @@ function launching_scripts {
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Mail" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Mail.py; read x" screen -S "Script_AIL" -X screen -t "Mail" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Mail.py; read x"
sleep 0.1 sleep 0.1
# screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./SentimentAnalysis.py; read x"
# sleep 0.1
screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./ModuleStats.py; read x" screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./ModuleStats.py; read x"
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Onion" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Onion.py; read x" screen -S "Script_AIL" -X screen -t "Onion" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Onion.py; read x"
@ -265,8 +265,12 @@ function launching_scripts {
################################## ##################################
# DISABLED MODULES # # DISABLED MODULES #
################################## ##################################
#screen -S "Script_AIL" -X screen -t "Phone" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Phone.py; read x" # screen -S "Script_AIL" -X screen -t "Phone" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Phone.py; read x"
#sleep 0.1 # sleep 0.1
# screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./SentimentAnalysis.py; read x"
# sleep 0.1
# screen -S "Script_AIL" -X screen -t "Release" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Release.py; read x"
# sleep 0.1
################################## ##################################
# # # #
@ -285,8 +289,6 @@ function launching_scripts {
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "IPAddress" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./IPAddress.py; read x" screen -S "Script_AIL" -X screen -t "IPAddress" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./IPAddress.py; read x"
#screen -S "Script_AIL" -X screen -t "Release" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Release.py; read x"
#sleep 0.1
} }
@ -476,19 +478,19 @@ function launch_feeder {
} }
function killscript { function killscript {
if [[ $islogged || $isqueued || $is_ail_core || $isscripted || $isflasked || $isfeeded || $iscrawler || $is_ail_2_ail ]]; then if [[ $islogged || $isqueued || $is_ail_core || $isscripted || $isflasked || $isfeeded || $is_ail_2_ail ]]; then
echo -e $GREEN"Killing Script"$DEFAULT echo -e $GREEN"Killing Script"$DEFAULT
kill $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $iscrawler $is_ail_2_ail kill $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $is_ail_2_ail
sleep 0.2 sleep 0.2
echo -e $ROSE`screen -ls`$DEFAULT echo -e $ROSE`screen -ls`$DEFAULT
echo -e $GREEN"\t* $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $iscrawler $is_ail_2_ail killed."$DEFAULT echo -e $GREEN"\t* $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $is_ail_2_ail killed."$DEFAULT
else else
echo -e $RED"\t* No script to kill"$DEFAULT echo -e $RED"\t* No script to kill"$DEFAULT
fi fi
} }
function killall { function killall {
if [[ $isredis || $isardb || $iskvrocks || $islogged || $isqueued || $is_ail_2_ail || $isscripted || $isflasked || $isfeeded || $iscrawler || $is_ail_core || $is_ail_2_ail ]]; then if [[ $isredis || $isardb || $iskvrocks || $islogged || $isqueued || $is_ail_2_ail || $isscripted || $isflasked || $isfeeded || $is_ail_core || $is_ail_2_ail ]]; then
if [[ $isredis ]]; then if [[ $isredis ]]; then
echo -e $GREEN"Gracefully closing redis servers"$DEFAULT echo -e $GREEN"Gracefully closing redis servers"$DEFAULT
shutting_down_redis; shutting_down_redis;
@ -503,10 +505,10 @@ function killall {
shutting_down_kvrocks; shutting_down_kvrocks;
fi fi
echo -e $GREEN"Killing all"$DEFAULT echo -e $GREEN"Killing all"$DEFAULT
kill $isredis $isardb $iskvrocks $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $iscrawler $is_ail_2_ail kill $isredis $isardb $iskvrocks $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $is_ail_2_ail
sleep 0.2 sleep 0.2
echo -e $ROSE`screen -ls`$DEFAULT echo -e $ROSE`screen -ls`$DEFAULT
echo -e $GREEN"\t* $isredis $isardb $iskvrocks $islogged $isqueued $isscripted $is_ail_2_ail $isflasked $isfeeded $iscrawler $is_ail_core killed."$DEFAULT echo -e $GREEN"\t* $isredis $isardb $iskvrocks $islogged $isqueued $isscripted $is_ail_2_ail $isflasked $isfeeded $is_ail_core killed."$DEFAULT
else else
echo -e $RED"\t* No screen to kill"$DEFAULT echo -e $RED"\t* No screen to kill"$DEFAULT
fi fi

View file

@ -15,7 +15,7 @@ import json
import redis import redis
import psutil import psutil
from subprocess import PIPE, Popen from subprocess import PIPE, Popen
from packages import Paste from lib.objects.Items import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader import ConfigLoader
@ -51,7 +51,7 @@ QUEUE_STATUS = {}
CPU_TABLE = {} CPU_TABLE = {}
CPU_OBJECT_TABLE = {} CPU_OBJECT_TABLE = {}
# Path of the current paste for a pid # Path of the current item for a pid
COMPLETE_PASTE_PATH_PER_PID = {} COMPLETE_PASTE_PATH_PER_PID = {}
''' '''
@ -443,10 +443,10 @@ class Show_paste(Frame):
self.label_list[i]._text = "" self.label_list[i]._text = ""
return return
paste = Paste.Paste(COMPLETE_PASTE_PATH_PER_PID[current_selected_value]) item = Item(COMPLETE_PASTE_PATH_PER_PID[current_selected_value])
old_content = paste.get_p_content()[0:4000] # Limit number of char to be displayed old_content = item.get_content()[0:4000] # Limit number of char to be displayed
#Replace unprintable char by ? # Replace unprintable char by ?
content = "" content = ""
for i, c in enumerate(old_content): for i, c in enumerate(old_content):
if ord(c) > 127: # Used to avoid printing unprintable char if ord(c) > 127: # Used to avoid printing unprintable char
@ -456,7 +456,7 @@ class Show_paste(Frame):
else: else:
content += c content += c
#Print in the correct label, END or more # Print in the correct label, END or more
to_print = "" to_print = ""
i = 0 i = 0
for line in content.split("\n"): for line in content.split("\n"):
@ -472,7 +472,7 @@ class Show_paste(Frame):
self.label_list[i]._text = "- END of PASTE -" self.label_list[i]._text = "- END of PASTE -"
i += 1 i += 1
while i<self.num_label: #Clear out remaining lines while i<self.num_label: # Clear out remaining lines
self.label_list[i]._text = "" self.label_list[i]._text = ""
i += 1 i += 1
@ -491,6 +491,7 @@ class Show_paste(Frame):
for i in range(2,self.num_label): for i in range(2,self.num_label):
self.label_list[i]._text = "" self.label_list[i]._text = ""
''' '''
END SCENES DEFINITION END SCENES DEFINITION
''' '''

View file

@ -72,6 +72,7 @@ def sendEmailNotification(recipient, mail_subject, mail_body):
traceback.print_tb(err.__traceback__) traceback.print_tb(err.__traceback__)
publisher.warning(err) publisher.warning(err)
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Test notification sender.') parser = argparse.ArgumentParser(description='Test notification sender.')
parser.add_argument("addr", help="Test mail 'to' address") parser.add_argument("addr", help="Test mail 'to' address")

View file

@ -180,9 +180,9 @@ if __name__ == '__main__':
key_id_str = 'Key ID - ' key_id_str = 'Key ID - '
regex_key_id = '{}.+'.format(key_id_str) regex_key_id = '{}.+'.format(key_id_str)
regex_pgp_public_blocs = '-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----' regex_pgp_public_blocs = r'-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----'
regex_pgp_signature = '-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----' regex_pgp_signature = r'-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----'
regex_pgp_message = '-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----' regex_pgp_message = r'-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----'
regex_tool_version = r"\bVersion:.*\n" regex_tool_version = r"\bVersion:.*\n"
regex_block_comment = r"\bComment:.*\n" regex_block_comment = r"\bComment:.*\n"

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import time import time
from packages import Paste from lib.objects.Items import Item
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process
import re import re
@ -45,8 +45,8 @@ if __name__ == "__main__":
time.sleep(10) time.sleep(10)
continue continue
paste = Paste.Paste(filepath) item = Item(filepath)
content = paste.get_p_content() content = item.get_content()
#signal.alarm(max_execution_time) #signal.alarm(max_execution_time)
try: try:
@ -54,16 +54,16 @@ if __name__ == "__main__":
if len(releases) == 0: if len(releases) == 0:
continue continue
to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_rel_path) to_print = f'Release;{item.get_source()};{item.get_date()};{item.get_basename()};{len(releases)} releases;{item.get_id()}'
print(to_print) print(to_print)
if len(releases) > 30: if len(releases) > 30:
publisher.warning(to_print) publisher.warning(to_print)
else: else:
publisher.info(to_print) publisher.info(to_print)
except TimeoutException: except TimeoutException:
p.incr_module_timeout_statistic() p.incr_module_timeout_statistic()
print ("{0} processing timeout".format(paste.p_rel_path)) print(f"{item.get_id()} processing timeout")
continue continue
else: else:
signal.alarm(0) signal.alarm(0)

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import time import time
from packages import Paste from lib.objects.Items import Item
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process
import re import re
@ -13,19 +13,19 @@ if __name__ == "__main__":
p = Process(config_section) p = Process(config_section)
publisher.info("Finding Source Code") publisher.info("Finding Source Code")
critical = 0 # AS TO BE IMPORTANT, MIGHT BE REMOVED critical = 0 # AS TO BE IMPORTANT, MIGHT BE REMOVED
#RELEVANTS LANGUAGES # RELEVANT LANGUAGES
shell = "[a-zA-Z0-9]+@[a-zA-Z0-9\-]+\:\~\$" shell = r"[a-zA-Z0-9]+@[a-zA-Z0-9\-]+\:\~\$"
c = "\#include\ \<[a-z\/]+.h\>" c = r"\#include\ \<[a-z\/]+.h\>"
php = "\<\?php" php = r"\<\?php"
python = "import\ [\w]+" python = r"import\ [\w]+"
bash = "#!\/[\w]*\/bash" bash = r"#!\/[\w]*\/bash"
javascript = "function\(\)" javascript = r"function\(\)"
ruby = "require \ [\w]+" ruby = r"require \ [\w]+"
adr = "0x[a-f0-9]{2}" adr = r"0x[a-f0-9]{2}"
#asm = "\"((?s).{1}x[0-9a-f]{2}){3,}" ISSUES WITH FINDALL, pattern like \x54\xaf\x23\.. # asm = r"\"((?s).{1}x[0-9a-f]{2}){3,}" ISSUES WITH FINDALL, pattern like \x54\xaf\x23\..
languages = [shell, c, php, bash, python, javascript, bash, ruby, adr] languages = [shell, c, php, bash, python, javascript, bash, ruby, adr]
regex = '|'.join(languages) regex = '|'.join(languages)
@ -41,13 +41,13 @@ if __name__ == "__main__":
filepath, count = message.split() filepath, count = message.split()
paste = Paste.Paste(filepath) item = Item(filepath)
content = paste.get_p_content() content = item.get_content()
match_set = set(re.findall(regex, content)) match_set = set(re.findall(regex, content))
if len(match_set) == 0: if len(match_set) == 0:
continue continue
to_print = 'SourceCode;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message) to_print = f'SourceCode;{item.get_source()};{item.get_date()};{item.get_basename()};{item.get_id()}'
if len(match_set) > critical: if len(match_set) > critical:
publisher.warning(to_print) publisher.warning(to_print)

View file

@ -1,68 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import ConfigLoader
import crawlers
config_loader = ConfigLoader.ConfigLoader()
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
# # TODO: lauch me in core screen
# # TODO: check if already launched in tor screen
# # TODO: handle mutltiple splash_manager
if __name__ == '__main__':
is_manager_connected = crawlers.ping_splash_manager()
if not is_manager_connected:
print('Error, Can\'t connect to Splash manager')
session_uuid = None
else:
print('Splash manager connected')
session_uuid = crawlers.get_splash_manager_session_uuid()
is_manager_connected = crawlers.reload_splash_and_proxies_list()
print(is_manager_connected)
if is_manager_connected:
if crawlers.test_ail_crawlers():
crawlers.relaunch_crawlers()
last_check = int(time.time())
while True:
# # TODO: avoid multiple ping
# check if manager is connected
if int(time.time()) - last_check > 60:
is_manager_connected = crawlers.is_splash_manager_connected()
current_session_uuid = crawlers.get_splash_manager_session_uuid()
# reload proxy and splash list
if current_session_uuid and current_session_uuid != session_uuid:
is_manager_connected = crawlers.reload_splash_and_proxies_list()
if is_manager_connected:
print('reload proxies and splash list')
if crawlers.test_ail_crawlers():
crawlers.relaunch_crawlers()
session_uuid = current_session_uuid
if not is_manager_connected:
print('Error, Can\'t connect to Splash manager')
last_check = int(time.time())
# # TODO: lauch crawlers if was never connected
# refresh splash and proxy list
elif False:
crawlers.reload_splash_and_proxies_list()
print('list of splash and proxies refreshed')
else:
time.sleep(5)
# kill/launch new crawler / crawler manager check if already launched
# # TODO: handle mutltiple splash_manager
# catch reload request

331
bin/crawlers/Crawler.py Executable file
View file

@ -0,0 +1,331 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from lib import crawlers
from lib.ConfigLoader import ConfigLoader
from lib.objects.Domains import Domain
from lib.objects import Screenshots
class Crawler(AbstractModule):
def __init__(self):
super(Crawler, self, ).__init__(logger_channel='Crawler')
# Waiting time in seconds between to message processed
self.pending_seconds = 1
config_loader = ConfigLoader()
self.r_log_submit = config_loader.get_redis_conn('Redis_Log_submit')
self.default_har = config_loader.get_config_boolean('Crawler', 'default_har')
self.default_screenshot = config_loader.get_config_boolean('Crawler', 'default_screenshot')
self.default_depth_limit = config_loader.get_config_int('Crawler', 'default_depth_limit')
# TODO: LIMIT MAX NUMBERS OF CRAWLED PAGES
# update hardcoded blacklist
crawlers.load_blacklist()
# update captures cache
crawlers.reload_crawler_captures()
# LACUS
self.lacus = crawlers.get_lacus()
# Capture
self.har = None
self.screenshot = None
self.root_item = None
self.har_dir = None
self.items_dir = None
self.domain = None
# Send module state to logs
self.redis_logger.info('Crawler initialized')
def print_crawler_start_info(self, url, domain, domain_url):
print()
print()
print('\033[92m------------------START CRAWLER------------------\033[0m')
print(f'crawler type: {domain}')
print('\033[92m-------------------------------------------------\033[0m')
print(f'url: {url}')
print(f'domain: {domain}')
print(f'domain_url: {domain_url}')
print()
def get_message(self):
# Check if a new Capture can be Launched
if crawlers.get_nb_crawler_captures() < crawlers.get_crawler_max_captures():
task_row = crawlers.get_crawler_task_from_queue()
if task_row:
print(task_row)
task_uuid, priority = task_row
self.enqueue_capture(task_uuid, priority)
# Check if a Capture is Done
capture = crawlers.get_crawler_capture()
if capture:
print(capture)
capture_uuid = capture[0][0]
capture_status = self.lacus.get_capture_status(capture_uuid)
if capture_status != crawlers.CaptureStatus.DONE: # TODO ADD GLOBAL TIMEOUT-> Save start time
crawlers.update_crawler_capture(capture_uuid)
print(capture_uuid, capture_status, int(time.time()))
else:
self.compute(capture_uuid)
crawlers.remove_crawler_capture(capture_uuid)
print('capture', capture_uuid, 'completed')
time.sleep(self.pending_seconds)
def enqueue_capture(self, task_uuid, priority):
task = crawlers.get_crawler_task(task_uuid)
print(task)
# task = {
# 'uuid': task_uuid,
# 'url': 'https://foo.be',
# 'domain': 'foo.be',
# 'depth': 1,
# 'har': True,
# 'screenshot': True,
# 'user_agent': crawlers.get_default_user_agent(),
# 'cookiejar': [],
# 'header': '',
# 'proxy': 'force_tor',
# 'parent': 'manual',
# }
url = task['url']
force = priority != 0
# TODO unpack cookiejar
# TODO HEADER
capture_uuid = self.lacus.enqueue(url=url,
depth=task['depth'],
user_agent=task['user_agent'],
proxy=task['proxy'],
cookies=[],
force=force,
general_timeout_in_sec=90)
crawlers.add_crawler_capture(task_uuid, capture_uuid)
print(task_uuid, capture_uuid, 'launched')
return capture_uuid
# CRAWL DOMAIN
# TODO: CATCH ERRORS
def compute(self, capture_uuid):
print('saving capture', capture_uuid)
task_uuid = crawlers.get_crawler_capture_task_uuid(capture_uuid)
task = crawlers.get_crawler_task(task_uuid)
print(task['domain'])
self.domain = Domain(task['domain'])
# TODO CHANGE EPOCH
epoch = int(time.time())
parent_id = task['parent']
print(task)
entries = self.lacus.get_capture(capture_uuid)
print(entries['status'])
self.har = task['har']
self.screenshot = task['screenshot']
str_date = crawlers.get_current_date(separator=True)
self.har_dir = crawlers.get_date_har_dir(str_date)
self.items_dir = crawlers.get_date_crawled_items_source(str_date)
self.root_item = None
# Save Capture
self.save_capture_response(parent_id, entries)
self.domain.update_daterange(str_date.replace('/', ''))
# Origin + History
if self.root_item:
# domain.add_ports(port)
self.domain.set_last_origin(parent_id)
self.domain.add_history(epoch, root_item=self.root_item)
elif self.domain.was_up():
self.domain.add_history(epoch, root_item=epoch)
crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch)
crawlers.clear_crawler_task(task_uuid, self.domain.get_domain_type())
def save_capture_response(self, parent_id, entries):
print(entries.keys())
if 'error' in entries:
# TODO IMPROVE ERROR MESSAGE
self.redis_logger.warning(str(entries['error']))
print(entries['error'])
if entries.get('html'):
print('retrieved content')
# print(entries.get('html'))
# TODO LOGS IF != domain
if 'last_redirected_url' in entries and entries['last_redirected_url']:
last_url = entries['last_redirected_url']
unpacked_last_url = crawlers.unpack_url(last_url)
current_domain = unpacked_last_url['domain']
# REDIRECTION TODO CHECK IF WEB
if current_domain != self.domain.id and not self.root_item:
self.redis_logger.warning(f'External redirection {self.domain.id} -> {current_domain}')
print(f'External redirection {self.domain.id} -> {current_domain}')
if not self.root_item:
self.domain = Domain(current_domain)
# TODO LAST URL
# FIXME
else:
last_url = f'http://{self.domain.id}'
if 'html' in entries and entries['html']:
item_id = crawlers.create_item_id(self.items_dir, self.domain.id)
print(item_id)
gzip64encoded = crawlers.get_gzipped_b64_item(item_id, entries['html'])
# send item to Global
relay_message = f'{item_id} {gzip64encoded}'
self.send_message_to_queue(relay_message, 'Mixer')
# increase nb of paste by feeder name
self.r_log_submit.hincrby('mixer_cache:list_feeder', 'crawler', 1)
# Tag
msg = f'infoleak:submission="crawler";{item_id}'
self.send_message_to_queue(msg, 'Tags')
crawlers.create_item_metadata(item_id, self.domain.id, last_url, parent_id)
if self.root_item is None:
self.root_item = item_id
parent_id = item_id
# SCREENSHOT
if self.screenshot:
if 'png' in entries and entries['png']:
screenshot = Screenshots.create_screenshot(entries['png'], b64=False)
if screenshot:
# Create Correlations
screenshot.add_correlation('item', '', item_id)
screenshot.add_correlation('domain', '', self.domain.id)
# HAR
if self.har:
if 'har' in entries and entries['har']:
crawlers.save_har(self.har_dir, item_id, entries['har'])
# Next Children
entries_children = entries.get('children')
if entries_children:
for children in entries_children:
self.save_capture_response(parent_id, children)
if __name__ == '__main__':
module = Crawler()
module.debug = True
# module.compute(('ooooo', 0))
module.run()
##################################
##################################
##################################
##################################
##################################
# from Helper import Process
# from pubsublogger import publisher
# ======== FUNCTIONS ========
# def update_auto_crawler():
# current_epoch = int(time.time())
# list_to_crawl = redis_crawler.zrangebyscore('crawler_auto_queue', '-inf', current_epoch)
# for elem_to_crawl in list_to_crawl:
# mess, type = elem_to_crawl.rsplit(';', 1)
# redis_crawler.sadd('{}_crawler_priority_queue'.format(type), mess)
# redis_crawler.zrem('crawler_auto_queue', elem_to_crawl)
# Extract info form url (url, domain, domain url, ...)
# def unpack_url(url):
# to_crawl = {}
# faup.decode(url)
# url_unpack = faup.get()
# to_crawl['domain'] = to_crawl['domain'].lower()
# new_url_host = url_host.lower()
# url_lower_case = url.replace(url_host, new_url_host, 1)
#
# if url_unpack['scheme'] is None:
# to_crawl['scheme'] = 'http'
# url= 'http://{}'.format(url_lower_case)
# else:
# try:
# scheme = url_unpack['scheme'].decode()
# except Exception as e:
# scheme = url_unpack['scheme']
# if scheme in default_proto_map:
# to_crawl['scheme'] = scheme
# url = url_lower_case
# else:
# redis_crawler.sadd('new_proto', '{} {}'.format(scheme, url_lower_case))
# to_crawl['scheme'] = 'http'
# url= 'http://{}'.format(url_lower_case.replace(scheme, '', 1))
#
# if url_unpack['port'] is None:
# to_crawl['port'] = default_proto_map[to_crawl['scheme']]
# else:
# try:
# port = url_unpack['port'].decode()
# except:
# port = url_unpack['port']
# # Verify port number #################### make function to verify/correct port number
# try:
# int(port)
# # Invalid port Number
# except Exception as e:
# port = default_proto_map[to_crawl['scheme']]
# to_crawl['port'] = port
#
# #if url_unpack['query_string'] is None:
# # if to_crawl['port'] == 80:
# # to_crawl['url']= '{}://{}'.format(to_crawl['scheme'], url_unpack['host'].decode())
# # else:
# # to_crawl['url']= '{}://{}:{}'.format(to_crawl['scheme'], url_unpack['host'].decode(), to_crawl['port'])
# #else:
# # to_crawl['url']= '{}://{}:{}{}'.format(to_crawl['scheme'], url_unpack['host'].decode(), to_crawl['port'], url_unpack['query_string'].decode())
#
# to_crawl['url'] = url
# if to_crawl['port'] == 80:
# to_crawl['domain_url'] = '{}://{}'.format(to_crawl['scheme'], new_url_host)
# else:
# to_crawl['domain_url'] = '{}://{}:{}'.format(to_crawl['scheme'], new_url_host, to_crawl['port'])
#
# try:
# to_crawl['tld'] = url_unpack['tld'].decode()
# except:
# to_crawl['tld'] = url_unpack['tld']
#
# return to_crawl
# ##################################################### add ftp ???
# update_auto_crawler()
# # add next auto Crawling in queue:
# if to_crawl['paste'] == 'auto':
# redis_crawler.zadd('crawler_auto_queue', int(time.time()+crawler_config['crawler_options']['time']) , '{};{}'.format(to_crawl['original_message'], to_crawl['type_service']))
# # update list, last auto crawled domains
# redis_crawler.lpush('last_auto_crawled', '{}:{};{}'.format(url_data['domain'], url_data['port'], date['epoch']))
# redis_crawler.ltrim('last_auto_crawled', 0, 9)
#

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
from packages import Paste from lib.objects.Items import Item
from Helper import Process from Helper import Process
import os import os
@ -12,11 +12,13 @@ import configparser
from collections import defaultdict from collections import defaultdict
# TODO FIX ME OR REMOVE ME
def get_dict_cve(list_paste_cve, only_one_same_cve_by_paste=False): def get_dict_cve(list_paste_cve, only_one_same_cve_by_paste=False):
dict_keyword = {} dict_keyword = {}
for paste_cve in list_paste_cve: for paste_cve in list_paste_cve:
paste_content = Paste.Paste(paste_cve).get_p_content() paste_content = Item(paste_cve).get_content()
cve_list = reg_cve.findall(paste_content) cve_list = reg_cve.findall(paste_content)
if only_one_same_cve_by_paste: if only_one_same_cve_by_paste:

View file

@ -35,17 +35,17 @@ class ConfigLoader(object):
else: else:
self.cfg.read(default_config_file) self.cfg.read(default_config_file)
def get_redis_conn(self, redis_name, decode_responses=True): ## TODO: verify redis name def get_redis_conn(self, redis_name, decode_responses=True):
return redis.StrictRedis( host=self.cfg.get(redis_name, "host"), return redis.StrictRedis(host=self.cfg.get(redis_name, "host"),
port=self.cfg.getint(redis_name, "port"), port=self.cfg.getint(redis_name, "port"),
db=self.cfg.getint(redis_name, "db"), db=self.cfg.getint(redis_name, "db"),
decode_responses=decode_responses ) decode_responses=decode_responses)
def get_db_conn(self, db_name, decode_responses=True): ## TODO: verify redis name def get_db_conn(self, db_name, decode_responses=True):
return redis.StrictRedis( host=self.cfg.get(db_name, "host"), return redis.StrictRedis(host=self.cfg.get(db_name, "host"),
port=self.cfg.getint(db_name, "port"), port=self.cfg.getint(db_name, "port"),
password=self.cfg.get(db_name, "password"), password=self.cfg.get(db_name, "password"),
decode_responses=decode_responses ) decode_responses=decode_responses)
def get_files_directory(self, key_name): def get_files_directory(self, key_name):
directory_path = self.cfg.get('Directories', key_name) directory_path = self.cfg.get('Directories', key_name)
@ -79,3 +79,33 @@ class ConfigLoader(object):
return all_keys_values return all_keys_values
else: else:
return [] return []
# # # # Directory Config # # # #
config_loader = ConfigLoader()
ITEMS_FOLDER = config_loader.get_config_str("Directories", "pastes")
if ITEMS_FOLDER == 'PASTES':
ITEMS_FOLDER = os.path.join(os.environ['AIL_HOME'], ITEMS_FOLDER)
ITEMS_FOLDER = ITEMS_FOLDER + '/'
ITEMS_FOLDER = os.path.join(os.path.realpath(ITEMS_FOLDER), '')
HARS_DIR = config_loader.get_files_directory('har')
if HARS_DIR == 'CRAWLED_SCREENSHOT':
HARS_DIR = os.path.join(os.environ['AIL_HOME'], HARS_DIR)
SCREENSHOTS_FOLDER = config_loader.get_files_directory('screenshot')
if SCREENSHOTS_FOLDER == 'CRAWLED_SCREENSHOT/screenshot':
SCREENSHOTS_FOLDER = os.path.join(os.environ['AIL_HOME'], SCREENSHOTS_FOLDER)
config_loader = None
def get_hars_dir():
return HARS_DIR
def get_items_dir():
return ITEMS_FOLDER
def get_screenshots_dir():
return SCREENSHOTS_FOLDER

View file

@ -58,7 +58,6 @@ def get_item_stats_nb_by_date():
def _set_item_stats_nb_by_date(date, source): def _set_item_stats_nb_by_date(date, source):
return r_statistics.zrange(f'providers_set_{date}', ) return r_statistics.zrange(f'providers_set_{date}', )
# # TODO: load ZSET IN CACHE => FAST UPDATE # # TODO: load ZSET IN CACHE => FAST UPDATE
def update_item_stats_size_nb(item_id, source, size, date): def update_item_stats_size_nb(item_id, source, size, date):
# Add/Update in Redis # Add/Update in Redis
@ -106,7 +105,7 @@ def update_module_stats(module_name, num, keyword, date):
# check if this keyword is eligible for progression # check if this keyword is eligible for progression
keyword_total_sum = 0 keyword_total_sum = 0
curr_value = r_statistics.hget(date, module+'-'+keyword) curr_value = r_statistics.hget(date, f'{module_name}-{keyword}')
keyword_total_sum += int(curr_value) if curr_value is not None else 0 keyword_total_sum += int(curr_value) if curr_value is not None else 0
if r_statistics.zcard(f'top_{module_name}_set_{date}') < PIE_CHART_MAX_CARDINALITY: if r_statistics.zcard(f'top_{module_name}_set_{date}') < PIE_CHART_MAX_CARDINALITY:

View file

@ -22,7 +22,7 @@ def get_ail_uuid():
# # TODO: check change paste => item # # TODO: check change paste => item
def get_all_objects(): def get_all_objects():
return ['domain', 'item', 'pgp', 'cryptocurrency', 'decoded', 'screenshot', 'username'] return ['cve', 'domain', 'item', 'pgp', 'cryptocurrency', 'decoded', 'screenshot', 'username']
def get_object_all_subtypes(obj_type): def get_object_all_subtypes(obj_type):
if obj_type == 'cryptocurrency': if obj_type == 'cryptocurrency':

View file

@ -43,12 +43,13 @@ config_loader = None
CORRELATION_TYPES_BY_OBJ = { CORRELATION_TYPES_BY_OBJ = {
"cryptocurrency" : ["domain", "item"], "cryptocurrency" : ["domain", "item"],
"decoded" : ["domain", "item"], "cve": ["domain", "item"],
"domain": ["cryptocurrency", "decoded", "item", "pgp", "username", "screenshot"], "decoded": ["domain", "item"],
"item": ["cryptocurrency", "decoded", "domain", "pgp", "username", "screenshot"], "domain": ["cve", "cryptocurrency", "decoded", "item", "pgp", "username", "screenshot"],
"item": ["cve", "cryptocurrency", "decoded", "domain", "pgp", "username", "screenshot"],
"pgp" : ["domain", "item"], "pgp" : ["domain", "item"],
"username" : ["domain", "item"], "username": ["domain", "item"],
"screenshot" : ["domain", "item"], "screenshot": ["domain", "item"],
} }
def get_obj_correl_types(obj_type): def get_obj_correl_types(obj_type):

File diff suppressed because it is too large Load diff

View file

@ -8,7 +8,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import ConfigLoader import ConfigLoader
config_loader = ConfigLoader.ConfigLoader() config_loader = ConfigLoader.ConfigLoader()
r_serv_db = config_loader.get_db_conn("Kvrocks_DB") r_serv_db = config_loader.get_db_conn("Kvrocks_Objects")
config_loader = None config_loader = None
def get_first_object_date(object_type, subtype, field=''): def get_first_object_date(object_type, subtype, field=''):
@ -24,15 +24,15 @@ def get_last_object_date(object_type, subtype, field=''):
return int(last_date) return int(last_date)
def _set_first_object_date(object_type, subtype, date, field=''): def _set_first_object_date(object_type, subtype, date, field=''):
return r_serv_db.zadd('objs:first_date', f'{object_type}:{subtype}:{field}', date) return r_serv_db.zadd('objs:first_date', {f'{object_type}:{subtype}:{field}': date})
def _set_last_object_date(object_type, subtype, date, field=''): def _set_last_object_date(object_type, subtype, date, field=''):
return r_serv_db.zadd('objs:last_date', f'{object_type}:{subtype}:{field}', date) return r_serv_db.zadd('objs:last_date', {f'{object_type}:{subtype}:{field}': float(date)})
def update_first_object_date(object_type, subtype, date, field=''): def update_first_object_date(object_type, subtype, date, field=''):
first_date = get_first_object_date(object_type, subtype, field=field) first_date = get_first_object_date(object_type, subtype, field=field)
if int(date) < first_date: if int(date) < first_date:
_set_first_object_date(object_typel, subtype, date, field=field) _set_first_object_date(object_type, subtype, date, field=field)
return date return date
else: else:
return first_date return first_date

View file

@ -7,15 +7,15 @@ import gzip
import magic import magic
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) sys.path.append(os.environ['AIL_BIN'])
import ConfigLoader ##################################
import Tag # Import Project packages
##################################
from lib import ConfigLoader
from lib import Tag
config_loader = ConfigLoader.ConfigLoader() config_loader = ConfigLoader.ConfigLoader()
# get and sanityze PASTE DIRECTORY
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '')
r_cache = config_loader.get_redis_conn("Redis_Cache") r_cache = config_loader.get_redis_conn("Redis_Cache")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None config_loader = None
@ -28,15 +28,15 @@ def exist_item(item_id):
return False return False
def get_item_filepath(item_id): def get_item_filepath(item_id):
filename = os.path.join(PASTES_FOLDER, item_id) filename = os.path.join(ConfigLoader.get_items_dir(), item_id)
return os.path.realpath(filename) return os.path.realpath(filename)
def get_item_date(item_id, add_separator=False): def get_item_date(item_id, add_separator=False):
l_directory = item_id.split('/') l_dir = item_id.split('/')
if add_separator: if add_separator:
return '{}/{}/{}'.format(l_directory[-4], l_directory[-3], l_directory[-2]) return f'{l_dir[-4]}/{l_dir[-3]}/{l_dir[-2]}'
else: else:
return '{}{}{}'.format(l_directory[-4], l_directory[-3], l_directory[-2]) return f'{l_dir[-4]}{l_dir[-3]}{l_dir[-2]}'
def get_basename(item_id): def get_basename(item_id):
return os.path.basename(item_id) return os.path.basename(item_id)
@ -53,17 +53,17 @@ def get_item_domain(item_id):
return item_id[19:-36] return item_id[19:-36]
def get_item_content_binary(item_id): def get_item_content_binary(item_id):
item_full_path = os.path.join(PASTES_FOLDER, item_id) item_full_path = os.path.join(ConfigLoader.get_items_dir(), item_id)
try: try:
with gzip.open(item_full_path, 'rb') as f: with gzip.open(item_full_path, 'rb') as f:
item_content = f.read() item_content = f.read()
except Exception as e: except Exception as e:
print(e) print(e)
item_content = '' item_content = b''
return item_content return item_content
def get_item_content(item_id): def get_item_content(item_id):
item_full_path = os.path.join(PASTES_FOLDER, item_id) item_full_path = os.path.join(ConfigLoader.get_items_dir(), item_id)
try: try:
item_content = r_cache.get(item_full_path) item_content = r_cache.get(item_full_path)
except UnicodeDecodeError: except UnicodeDecodeError:
@ -84,7 +84,7 @@ def get_item_content(item_id):
def get_item_mimetype(item_id): def get_item_mimetype(item_id):
return magic.from_buffer(get_item_content(item_id), mime=True) return magic.from_buffer(get_item_content(item_id), mime=True)
#### TREE CHILD/FATHER #### # # # # TREE CHILD/FATHER # # # #
def is_father(item_id): def is_father(item_id):
return r_serv_metadata.exists('paste_children:{}'.format(item_id)) return r_serv_metadata.exists('paste_children:{}'.format(item_id))
@ -127,6 +127,18 @@ def is_domain_root(item_id):
def get_item_url(item_id): def get_item_url(item_id):
return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'real_link') return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'real_link')
def get_item_har(item_id):
har = '/'.join(item_id.rsplit('/')[-4:])
har = f'{har}.json'
path = os.path.join(ConfigLoader.get_hars_dir(), har)
if os.path.isfile(path):
return har
def get_item_har_content(har):
with open(har, 'rb') as f:
har_content = f.read()
return har_content
def get_nb_children(item_id): def get_nb_children(item_id):
return r_serv_metadata.scard('paste_children:{}'.format(item_id)) return r_serv_metadata.scard('paste_children:{}'.format(item_id))
@ -140,14 +152,14 @@ def get_item_children(item_id):
# # TODO: handle domain last origin in domain lib # # TODO: handle domain last origin in domain lib
def _delete_node(item_id): def _delete_node(item_id):
# only if item isn't deleted # only if item isn't deleted
#if is_crawled(item_id): # if is_crawled(item_id):
# r_serv_metadata.hrem('paste_metadata:{}'.format(item_id), 'real_link') # r_serv_metadata.hrem('paste_metadata:{}'.format(item_id), 'real_link')
for children_id in get_item_children(item_id): for children_id in get_item_children(item_id):
r_serv_metadata.hdel('paste_metadata:{}'.format(children_id), 'father') r_serv_metadata.hdel('paste_metadata:{}'.format(children_id), 'father')
r_serv_metadata.delete('paste_children:{}'.format(item_id)) r_serv_metadata.delete('paste_children:{}'.format(item_id))
# delete regular # delete regular
# simple if leaf # simple if leaf
# delete item node # delete item node
@ -210,7 +222,7 @@ def _get_dir_source_name(directory, source_name=None, l_sources_name=set(), filt
else: else:
for src_name in l_dir: for src_name in l_dir:
if len(src_name) == 4: if len(src_name) == 4:
#try: # try:
int(src_name) int(src_name)
to_add = os.path.join(source_name) to_add = os.path.join(source_name)
# filter sources, remove first directory # filter sources, remove first directory
@ -218,7 +230,7 @@ def _get_dir_source_name(directory, source_name=None, l_sources_name=set(), filt
to_add = to_add.replace('archive/', '').replace('alerts/', '') to_add = to_add.replace('archive/', '').replace('alerts/', '')
l_sources_name.add(to_add) l_sources_name.add(to_add)
return l_sources_name return l_sources_name
#except: # except:
# pass # pass
if source_name: if source_name:
src_name = os.path.join(source_name, src_name) src_name = os.path.join(source_name, src_name)
@ -227,7 +239,7 @@ def _get_dir_source_name(directory, source_name=None, l_sources_name=set(), filt
def get_all_items_sources(filter_dir=False, r_list=False): def get_all_items_sources(filter_dir=False, r_list=False):
res = _get_dir_source_name(PASTES_FOLDER, filter_dir=filter_dir) res = _get_dir_source_name(ConfigLoader.get_items_dir(), filter_dir=filter_dir)
if res: if res:
if r_list: if r_list:
res = list(res) res = list(res)

View file

@ -52,9 +52,9 @@ class CryptoCurrency(AbstractSubtypeObject):
def get_link(self, flask_context=False): def get_link(self, flask_context=False):
if flask_context: if flask_context:
url = url_for('correlation.show_correlation', object_type=self.type, type_id=self.subtype, correlation_id=self.id) url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
else: else:
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&type_id={self.subtype}&correlation_id={self.id}' url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
return url return url
def get_svg_icon(self): def get_svg_icon(self):
@ -89,7 +89,11 @@ class CryptoCurrency(AbstractSubtypeObject):
return obj return obj
def get_meta(self, options=set()): def get_meta(self, options=set()):
return self._get_meta() meta = self._get_meta()
meta['id'] = self.id
meta['subtype'] = self.subtype
meta['tags'] = self.get_tags()
return meta

99
bin/lib/objects/Cves.py Executable file
View file

@ -0,0 +1,99 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
from flask import url_for
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_daterange_object import AbstractDaterangeObject
from packages import Date
config_loader = ConfigLoader()
r_objects = config_loader.get_db_conn("Kvrocks_Objects")
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None
################################################################################
################################################################################
################################################################################
# # TODO: COMPLETE CLASS
class Cve(AbstractDaterangeObject):
"""
AIL Cve Object.
"""
def __init__(self, id):
super(Cve, self).__init__('cve', id)
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True),
# 'compress': 'gzip'}
# return payload
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
# # TODO:
pass
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url
# TODO # CHANGE COLOR
def get_svg_icon(self):
return {'style': 'fas', 'icon': '\uf188', 'color': '#1E88E5', 'radius': 5}
# TODO # TODO # TODO # TODO # TODO # TODO # TODO # TODO
def get_misp_object(self):
pass
def get_meta(self, options=set()):
meta = self._get_meta(options=options)
meta['id'] = self.id
meta['subtype'] = self.subtype
meta['tags'] = self.get_tags()
return meta
def add(self, date, item_id):
self._add(date, item_id)
# TODO # TODO # TODO # TODO # TODO # TODO # TODO # TODO # TODO # TODO
def get_all_cves():
cves = []
return cves
def get_cves_by_date(date):
# return r_objects.zrange(f'cve:date:{date}', 0, -1)
return set(r_objects.hkeys(f'cve:date:{date}'))
def get_cves_by_daterange(date_from, date_to):
cves = set()
for date in Date.substract_date(date_from, date_to):
cves | get_cves_by_date(date)
return cves
def get_cves_meta(cves_id, options=set()):
dict_cve = {}
for cve_id in cves_id:
cve = Cve(cve_id)
dict_cve[cve_id] = cve.get_meta(options=options)
return dict_cve
def api_get_cves_meta_by_daterange(date_from, date_to):
date = Date.sanitise_date_range(date_from, date_to)
return get_cves_meta(get_cves_by_daterange(date['date_from'], date['date_to']), options=['sparkline'])
# if __name__ == '__main__':

View file

@ -69,9 +69,9 @@ class Decoded(AbstractObject):
def get_link(self, flask_context=False): def get_link(self, flask_context=False):
if flask_context: if flask_context:
url = url_for('correlation.show_correlation', object_type="decoded", correlation_id=self.id) url = url_for('correlation.show_correlation', type="decoded", id=self.id)
else: else:
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&correlation_id={self.id}' url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url return url
def get_svg_icon(self): def get_svg_icon(self):
@ -90,7 +90,7 @@ class Decoded(AbstractObject):
return {'style': 'fas', 'icon': icon, 'color': '#88CCEE', 'radius':5} return {'style': 'fas', 'icon': icon, 'color': '#88CCEE', 'radius':5}
''' '''
Return the estimed type of a given decoded item. Return the estimated type of a given decoded item.
:param sha1_string: sha1_string :param sha1_string: sha1_string
''' '''
@ -170,8 +170,11 @@ class Decoded(AbstractObject):
if date > last_seen: if date > last_seen:
self.set_last_seen(date) self.set_last_seen(date)
def get_meta(self): def get_meta(self, options=set()):
pass meta = {'id': self.id,
'subtype': self.subtype,
'tags': self.get_tags()}
return meta
def get_meta_vt(self): def get_meta_vt(self):
meta = {} meta = {}
@ -209,7 +212,7 @@ class Decoded(AbstractObject):
def is_seen_this_day(self, date): def is_seen_this_day(self, date):
for decoder in get_decoders_names(): for decoder in get_decoders_names():
if r_metadata.zscore(f'{decoder_name}_date:{date}', self.id): if r_metadata.zscore(f'{decoder}_date:{date}', self.id):
return True return True
return False return False
@ -324,6 +327,9 @@ class Decoded(AbstractObject):
####################################################################################### #######################################################################################
####################################################################################### #######################################################################################
def is_vt_enabled(self):
return VT_ENABLED
def set_vt_report(self, report): def set_vt_report(self, report):
r_metadata.hset(f'metadata_hash:{self.id}', 'vt_report', report) r_metadata.hset(f'metadata_hash:{self.id}', 'vt_report', report)
@ -354,7 +360,6 @@ class Decoded(AbstractObject):
print(report) print(report)
return report return report
elif response.status_code == 403: elif response.status_code == 403:
Flask_config.vt_enabled = False
return 'Virustotal key is incorrect (e.g. for public API not for virustotal intelligence), authentication failed' return 'Virustotal key is incorrect (e.g. for public API not for virustotal intelligence), authentication failed'
elif response.status_code == 204: elif response.status_code == 204:
return 'Rate Limited' return 'Rate Limited'

View file

@ -4,18 +4,31 @@
import os import os
import sys import sys
import time import time
import zipfile
from datetime import datetime
from flask import url_for from flask import url_for
from io import BytesIO
from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
from lib.ConfigLoader import ConfigLoader ##################################
# Import Project packages
##################################
from lib import ConfigLoader
from lib.objects.abstract_object import AbstractObject from lib.objects.abstract_object import AbstractObject
from lib.item_basic import get_item_children, get_item_date, get_item_url from lib.item_basic import get_item_children, get_item_date, get_item_url, get_item_har
from lib import data_retention_engine from lib import data_retention_engine
config_loader = ConfigLoader() from packages import Date
r_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = ConfigLoader.ConfigLoader()
r_crawler = config_loader.get_db_conn("Kvrocks_Crawler")
r_metadata = config_loader.get_redis_conn("ARDB_Metadata") ######################################
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None config_loader = None
@ -42,36 +55,37 @@ class Domain(AbstractObject):
if str(self.id).endswith('.onion'): if str(self.id).endswith('.onion'):
return 'onion' return 'onion'
else: else:
return 'regular' return 'web'
def exists(self): def exists(self):
return r_onion.exists(f'{self.domain_type}_metadata:{self.id}') return r_crawler.exists(f'domain:meta:{self.id}')
def get_first_seen(self, r_int=False, separator=True): def get_first_seen(self, r_int=False, separator=True):
first_seen = r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'first_seen') first_seen = r_crawler.hget(f'domain:meta:{self.id}', 'first_seen')
if first_seen: if first_seen:
if separator: if r_int:
first_seen = f'{first_seen[0:4]}/{first_seen[4:6]}/{first_seen[6:8]}'
elif r_int==True:
first_seen = int(first_seen) first_seen = int(first_seen)
elif separator:
first_seen = f'{first_seen[0:4]}/{first_seen[4:6]}/{first_seen[6:8]}'
return first_seen return first_seen
def get_last_check(self, r_int=False, separator=True): def get_last_check(self, r_int=False, separator=True):
last_check = r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'last_check') last_check = r_crawler.hget(f'domain:meta:{self.id}', 'last_check')
if last_check is not None: if last_check is not None:
if separator: if r_int:
last_check = f'{last_check[0:4]}/{last_check[4:6]}/{last_check[6:8]}'
elif r_format=="int":
last_check = int(last_check) last_check = int(last_check)
elif separator:
last_check = f'{last_check[0:4]}/{last_check[4:6]}/{last_check[6:8]}'
return last_check return last_check
def _set_first_seen(self, date): def _set_first_seen(self, date):
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'first_seen', date) r_crawler.hset(f'domain:meta:{self.id}', 'first_seen', date)
def _set_last_check(self, date): def _set_last_check(self, date):
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'last_check', date) r_crawler.hset(f'domain:meta:{self.id}', 'last_check', date)
def update_daterange(self, date): def update_daterange(self, date):
date = int(date)
first_seen = self.get_first_seen(r_int=True) first_seen = self.get_first_seen(r_int=True)
last_check = self.get_last_check(r_int=True) last_check = self.get_last_check(r_int=True)
if not first_seen: if not first_seen:
@ -82,65 +96,101 @@ class Domain(AbstractObject):
elif int(last_check) < date: elif int(last_check) < date:
self._set_last_check(date) self._set_last_check(date)
def get_last_origin(self): def get_last_origin(self, obj=False):
return r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'paste_parent') origin = {'item': r_crawler.hget(f'domain:meta:{self.id}', 'last_origin')}
if obj and origin['item']:
if origin['item'] != 'manual' and origin['item'] != 'auto':
item_id = origin['item']
origin['domain'] = r_metadata.hget(f'paste_metadata:{item_id}', 'domain')
origin['url'] = r_metadata.hget(f'paste_metadata:{item_id}', 'url')
return origin
def set_last_origin(self, origin_id): def set_last_origin(self, origin_id):
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'paste_parent', origin_id) r_crawler.hset(f'domain:meta:{self.id}', 'last_origin', origin_id)
def is_up(self, ports=[]): def is_up(self):
if not ports: res = r_crawler.zrevrange(f'domain:history:{self.id}', 0, 0, withscores=True)
ports = self.get_ports() if res:
for port in ports: item_core, epoch = res[0]
res = r_onion.zrevrange(f'crawler_history_{self.domain_type}:{self.id}:{port}', 0, 0, withscores=True) try:
if res: int(item_core)
item_core, epoch = res[0] except ValueError:
try: return True
epoch = int(item_core)
except:
print('True')
return True
print('False')
return False return False
def was_up(self): def was_up(self):
return r_onion.hexists(f'{self.domain_type}_metadata:{self.id}', 'ports') return r_crawler.exists(f'domain:history:{self.id}')
def is_up_by_month(self, date_month):
# FIXME DIRTY PATCH
if r_crawler.exists(f'month_{self.domain_type}_up:{date_month}'):
return r_crawler.sismember(f'month_{self.domain_type}_up:{date_month}', self.get_id())
else:
return False
def is_up_this_month(self):
date_month = datetime.now().strftime("%Y%m")
return self.is_up_by_month(date_month)
def is_down_by_day(self, date):
# FIXME DIRTY PATCH
if r_crawler.exists(f'{self.domain_type}_down:{date}'):
return r_crawler.sismember(f'{self.domain_type}_down:{date}', self.id)
else:
return False
def is_down_today(self):
date = datetime.now().strftime("%Y%m%d")
return self.is_down_by_day(date)
def is_up_by_epoch(self, epoch):
history = r_crawler.zrevrangebyscore(f'domain:history:{self.id}', int(epoch), int(epoch))
if not history:
return False
else:
history = history[0]
try:
int(history)
return False
except ValueError:
return True
def get_ports(self, r_set=False): def get_ports(self, r_set=False):
l_ports = r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'ports') l_ports = r_crawler.hget(f'domain:meta:{self.id}', 'ports')
if l_ports: if l_ports:
l_ports = l_ports.split(";") l_ports = l_ports.split(";")
if r_set: else:
return set(l_ports) l_ports = []
else: if r_set:
return l_ports return set(l_ports)
return [] else:
return l_ports
def _set_ports(self, ports): def _set_ports(self, ports):
ports = ';'.join(ports) ports = ';'.join(str(p) for p in ports)
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'ports', ports) r_crawler.hset(f'domain:meta:{self.id}', 'ports', ports)
def add_ports(self, port): def add_ports(self, port):
ports = self.get_ports(r_set=True) ports = self.get_ports(r_set=True)
ports.add(port) ports.add(port)
self._set_ports(ports) self._set_ports(ports)
def get_history_by_port(self, port, status=False, root=False): def get_history(self, status=False, root=False):
''' """
Return . Return .
:return: :return:
:rtype: list of tuple (item_core, epoch) :rtype: list of tuple (item_core, epoch)
''' """
history_tuple = r_onion.zrange(f'crawler_history_{self.domain_type}:{self.id}:{port}', 0, -1, withscores=True) history_tuple = r_crawler.zrange(f'domain:history:{self.id}', 0, -1, withscores=True)
history = [] history = []
for root_id, epoch in history_tuple: for root_id, epoch in history_tuple:
dict_history = {} dict_history = {}
epoch = int(epoch) # force int epoch = int(epoch) # force int
dict_history["epoch"] = epoch dict_history["epoch"] = epoch
dict_history["date"] = time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(epoch_val)) dict_history["date"] = time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(epoch))
try: try:
int(root_item) int(root_id)
if status: if status:
dict_history['status'] = False dict_history['status'] = False
except ValueError: except ValueError:
@ -152,30 +202,31 @@ class Domain(AbstractObject):
return history return history
def get_languages(self): def get_languages(self):
return r_onion.smembers(f'domain:language:{self.id}') return r_crawler.smembers(f'domain:language:{self.id}')
def get_meta_keys(self): def get_meta_keys(self):
return ['type', 'first_seen', 'last_check', 'last_origin', 'ports', 'status', 'tags', 'languages'] return ['type', 'first_seen', 'last_check', 'last_origin', 'ports', 'status', 'tags', 'languages']
# options: set of optional meta fields # options: set of optional meta fields
def get_meta(self, options=set()): def get_meta(self, options=set()):
meta = {} meta = {'type': self.domain_type,
meta['type'] = self.domain_type 'id': self.id,
meta['first_seen'] = self.get_first_seen() 'domain': self.id, # TODO Remove me -> Fix templates
meta['last_check'] = self.get_last_check() 'first_seen': self.get_first_seen(),
meta['tags'] = self.get_tags(r_list=True) 'last_check': self.get_last_check(),
meta['ports'] = self.get_ports() 'tags': self.get_tags(r_list=True),
meta['status'] = self.is_up(ports=meta['ports']) 'status': self.is_up()
}
# meta['ports'] = self.get_ports()
if 'last_origin' in options: if 'last_origin' in options:
meta['last_origin'] = self.get_last_origin() meta['last_origin'] = self.get_last_origin(obj=True)
#meta['is_tags_safe'] = ################################## # meta['is_tags_safe'] = ##################################
if 'languages' in options: if 'languages' in options:
meta['languages'] = self.get_languages() meta['languages'] = self.get_languages()
#meta['screenshot'] = # meta['screenshot'] =
return meta return meta
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self): def delete(self):
# # TODO: # # TODO:
@ -196,12 +247,12 @@ class Domain(AbstractObject):
else: else:
style = 'fab' style = 'fab'
icon = '\uf13b' icon = '\uf13b'
return {'style': style, 'icon': icon, 'color':color, 'radius':5} return {'style': style, 'icon': icon, 'color': color, 'radius': 5}
def is_crawled_item(self, item_id): def is_crawled_item(self, item_id):
domain_lenght = len(self.id) domain_length = len(self.id)
if len(item_id) > (domain_lenght+48): if len(item_id) > (domain_length+48):
if item_id[-36-domain_lenght:-36] == self.id: if item_id[-36-domain_length:-36] == self.id:
return True return True
return False return False
@ -215,169 +266,231 @@ class Domain(AbstractObject):
for item_id in get_item_children(root_id): for item_id in get_item_children(root_id):
if self.is_crawled_item(item_id): if self.is_crawled_item(item_id):
crawled_items.append(item_id) crawled_items.append(item_id)
crawled_items.extend(self.get_crawled_items_children(self.id, item_id)) crawled_items.extend(self.get_crawled_items_children(item_id))
return crawled_items return crawled_items
def get_all_urls(self, date=False): ## parameters to add first_seen/last_seen ?????????????????????????????? def get_last_item_root(self):
root_item = r_crawler.zrevrange(f'domain:history:{self.id}', 0, 0, withscores=True)
if not root_item:
return None
root_item = root_item[0][0]
try:
int(root_item)
return None
except ValueError:
pass
return root_item
def get_item_root_by_epoch(self, epoch):
root_item = r_crawler.zrevrangebyscore(f'domain:history:{self.id}', int(epoch), int(epoch), withscores=True)
if not root_item:
return None
root_item = root_item[0][0]
try:
int(root_item)
return None
except ValueError:
pass
return root_item
def get_crawled_items_by_epoch(self, epoch=None):
if epoch:
root_item = self.get_item_root_by_epoch(epoch)
else:
root_item = self.get_last_item_root()
if root_item:
return self.get_crawled_items(root_item)
# TODO FIXME
def get_all_urls(self, date=False, epoch=None):
if date: if date:
urls = {} urls = {}
else: else:
urls = set() urls = set()
for port in self.get_ports():
for history in self.get_history_by_port(port, root=True): items = self.get_crawled_items_by_epoch(epoch=epoch)
if history.get('root'): if items:
for item_id in self.get_crawled_items(history.get('root')): for item_id in items:
url = get_item_url(item_id) url = get_item_url(item_id)
if url: if url:
if date: if date:
item_date = int(get_item_date(item_id)) item_date = int(get_item_date(item_id))
if url not in urls: if url not in urls:
urls[url] = {'first_seen': item_date,'last_seen': item_date} urls[url] = {'first_seen': item_date, 'last_seen': item_date}
else: # update first_seen / last_seen else: # update first_seen / last_seen
if item_date < urls[url]['first_seen']: if item_date < urls[url]['first_seen']:
all_url[url]['first_seen'] = item_date urls[url]['first_seen'] = item_date
if item_date > urls[url]['last_seen']: if item_date > urls[url]['last_seen']:
all_url[url]['last_seen'] = item_date urls[url]['last_seen'] = item_date
else: else:
urls.add(url) urls.add(url)
return urls return urls
def get_misp_object(self): def get_misp_object(self, epoch=None):
# create domain-ip obj # create domain-ip obj
obj_attrs = [] obj_attrs = []
obj = MISPObject('domain-crawled', standalone=True) obj = MISPObject('domain-crawled', standalone=True)
obj.first_seen = self.get_first_seen() obj.first_seen = self.get_first_seen()
obj.last_seen = self.get_last_check() obj.last_seen = self.get_last_check()
obj_attrs.append( obj.add_attribute('domain', value=self.id) ) obj_attrs.append(obj.add_attribute('domain', value=self.id))
urls = self.get_all_urls(date=True) urls = self.get_all_urls(date=True, epoch=epoch)
for url in urls: for url in urls:
attribute = obj.add_attribute('url', value=url) attribute = obj.add_attribute('url', value=url)
attribute.first_seen = str(urls[url]['first_seen']) attribute.first_seen = str(urls[url]['first_seen'])
attribute.last_seen = str(urls[url]['last_seen']) attribute.last_seen = str(urls[url]['last_seen'])
obj_attrs.append( attribute ) obj_attrs.append(attribute)
for obj_attr in obj_attrs: for obj_attr in obj_attrs:
for tag in self.get_tags(): for tag in self.get_tags():
obj_attr.add_tag(tag) obj_attr.add_tag(tag)
return obj return obj
# TODO ADD MISP Event Export
# TODO DOWN DOMAIN
def get_download_zip(self, epoch=None):
hars_dir = ConfigLoader.get_hars_dir()
items_dir = ConfigLoader.get_items_dir()
screenshots_dir = ConfigLoader.get_screenshots_dir()
items = self.get_crawled_items_by_epoch(epoch=epoch)
if not items:
return None
map_file = 'ITEM ID : URL'
# zip buffer
zip_buffer = BytesIO()
with zipfile.ZipFile(zip_buffer, "a") as zf:
for item_id in items:
url = get_item_url(item_id)
basename = os.path.basename(item_id)
# Item
_write_in_zip_buffer(zf, os.path.join(items_dir, item_id), f'{basename}.gz')
map_file = map_file + f'\n{item_id} : {url}'
# HAR
har = get_item_har(item_id)
if har:
print(har)
_write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json')
# Screenshot
screenshot = self._get_external_correlation('item', '', item_id, 'screenshot')
if screenshot:
screenshot = screenshot['screenshot'].pop()[1:]
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8],
screenshot[8:10], screenshot[10:12], screenshot[12:])
_write_in_zip_buffer(zf, os.path.join(screenshots_dir, f'{screenshot}.png'), f'{basename}.png')
zf.writestr('_URL_MAP_', BytesIO(map_file.encode()).getvalue())
misp_object = self.get_misp_object().to_json().encode()
zf.writestr('misp.json', BytesIO(misp_object).getvalue())
zip_buffer.seek(0)
return zip_buffer
def add_language(self, language): def add_language(self, language):
r_onion.sadd('all_domains_languages', language) r_crawler.sadd('all_domains_languages', language)
r_onion.sadd(f'all_domains_languages:{self.domain_type}', language) r_crawler.sadd(f'all_domains_languages:{self.domain_type}', language)
r_onion.sadd(f'language:domains:{self.domain_type}:{language}', self.id) r_crawler.sadd(f'language:domains:{self.domain_type}:{language}', self.id)
r_onion.sadd(f'domain:language:{self.id}', language) r_crawler.sadd(f'domain:language:{self.id}', language)
############################################################################ ############################################################################
############################################################################ ############################################################################
def create(self, first_seen, last_check, ports, status, tags, languages): def create(self, first_seen, last_check, status, tags, languages):
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'first_seen', first_seen) r_crawler.hset(f'domain:meta:{self.id}', 'first_seen', first_seen)
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'last_check', last_check) r_crawler.hset(f'domain:meta:{self.id}', 'last_check', last_check)
for language in languages: for language in languages:
self.add_language(language) self.add_language(language)
#### CRAWLER ####
# add root_item to history # add root_item to history
# if domain down -> root_item = epoch # if domain down -> root_item = epoch
def _add_history_root_item(self, root_item, epoch, port): def _add_history_root_item(self, root_item, epoch):
# Create/Update crawler history # Create/Update crawler history
r_onion.zadd(f'crawler_history_{self.domain_type}:{self.id}:{port}', epoch, int(root_item)) r_crawler.zadd(f'domain:history:{self.id}', {root_item: epoch})
# if domain down -> root_item = epoch # if domain down -> root_item = epoch
def add_history(self, epoch, port, root_item=None, date=None): def add_history(self, epoch, root_item=None, date=None):
if not date: if not date:
date = time.strftime('%Y%m%d', time.gmtime(epoch)) date = time.strftime('%Y%m%d', time.gmtime(epoch))
try: try:
int(root_item) root_item = int(root_item)
except ValueError: status = False
root_item = None except (ValueError, TypeError):
status = True
data_retention_engine.update_object_date('domain', self.domain_type, date) data_retention_engine.update_object_date('domain', self.domain_type, date)
update_first_object_date(date, self.domain_type)
update_last_object_date(date, self.domain_type)
# UP # UP
if root_item: if status:
r_onion.srem(f'full_{self.domain_type}_down', self.id) r_crawler.srem(f'full_{self.domain_type}_down', self.id)
r_onion.sadd(f'full_{self.domain_type}_up', self.id) r_crawler.sadd(f'full_{self.domain_type}_up', self.id)
r_onion.sadd(f'{self.domain_type}_up:{date}', self.id) # # TODO: -> store first day r_crawler.sadd(f'{self.domain_type}_up:{date}', self.id) # # TODO: -> store first day
r_onion.sadd(f'month_{self.domain_type}_up:{date[0:6]}', self.id) # # TODO: -> store first month r_crawler.sadd(f'month_{self.domain_type}_up:{date[0:6]}', self.id) # # TODO: -> store first month
self._add_history_root_item(root_item, epoch, port) self._add_history_root_item(root_item, epoch)
else: else:
if port: r_crawler.sadd(f'{self.domain_type}_down:{date}', self.id)
r_onion.sadd(f'{self.domain_type}_down:{date}', self.id) # # TODO: -> store first month if self.was_up():
self._add_history_root_item(epoch, epoch, port) self._add_history_root_item(epoch, epoch)
else: else:
r_onion.sadd(f'{self.domain_type}_down:{date}', self.id) r_crawler.sadd(f'full_{self.domain_type}_down', self.id)
if not self.was_up():
r_onion.sadd(f'full_{self.domain_type}_down', self.id)
def add_crawled_item(self, url, port, item_id, item_father): # TODO RENAME PASTE_METADATA
def add_crawled_item(self, url, item_id, item_father):
r_metadata.hset(f'paste_metadata:{item_id}', 'father', item_father) r_metadata.hset(f'paste_metadata:{item_id}', 'father', item_father)
r_metadata.hset(f'paste_metadata:{item_id}', 'domain', f'{self.id}:{port}') r_metadata.hset(f'paste_metadata:{item_id}', 'domain', self.id) # FIXME REMOVE ME -> extract for real link ?????????
r_metadata.hset(f'paste_metadata:{item_id}', 'real_link', url) r_metadata.hset(f'paste_metadata:{item_id}', 'real_link', url)
# add this item_id to his father # add this item_id to his father
r_metadata.sadd(f'paste_children:{item_father}', item_id) r_metadata.sadd(f'paste_children:{item_father}', item_id)
##-- CRAWLER --##
############################################################################
# In memory zipfile
def _write_in_zip_buffer(zf, path, filename):
with open(path, "rb") as f:
content = f.read()
zf.writestr( filename, BytesIO(content).getvalue())
############################################################################ ############################################################################
############################################################################
def get_all_domains_types(): def get_all_domains_types():
return ['onion', 'regular'] # i2p return ['onion', 'web'] # i2p
def get_all_domains_languages(): def get_all_domains_languages():
return r_onion.smembers('all_domains_languages') return r_crawler.smembers('all_domains_languages')
def get_domains_up_by_type(domain_type): def get_domains_up_by_type(domain_type):
return r_onion.smembers(f'full_{domain_type}_up') return r_crawler.smembers(f'full_{domain_type}_up')
def get_domains_down_by_type(domain_type): def get_domains_down_by_type(domain_type):
return r_onion.smembers(f'full_{domain_type}_down') return r_crawler.smembers(f'full_{domain_type}_down')
def get_first_object_date(subtype, field=''): def get_domains_up_by_date(date, domain_type):
first_date = r_onion.zscore('objs:first_date', f'domain:{subtype}:{field}') return r_crawler.smembers(f'{domain_type}_up:{date}')
if not first_date:
first_date = 99999999
return int(first_date)
def get_last_object_date(subtype, field=''): def get_domains_down_by_date(date, domain_type):
last_date = r_onion.zscore('objs:last_date', f'domain:{subtype}:{field}') return r_crawler.smembers(f'{domain_type}_down:{date}')
if not last_date:
last_date = 0
return int(last_date)
def _set_first_object_date(date, subtype, field=''): def get_domains_by_daterange(date_from, date_to, domain_type, up=True, down=False):
return r_onion.zadd('objs:first_date', f'domain:{subtype}:{field}', date) date_domains = {}
for date in Date.substract_date(date_from, date_to):
def _set_last_object_date(date, subtype, field=''): domains = []
return r_onion.zadd('objs:last_date', f'domain:{subtype}:{field}', date) if up:
domains.extend(get_domains_up_by_date(date, domain_type))
def update_first_object_date(date, subtype, field=''): if down:
first_date = get_first_object_date(subtype, field=field) domains.extend(get_domains_down_by_date(date, domain_type))
if int(date) < first_date: if domains:
_set_first_object_date(date, subtype, field=field) date_domains[date] = list(domains)
return date return date_domains
else:
return first_date
def update_last_object_date(date, subtype, field=''):
last_date = get_last_object_date(subtype, field=field)
if int(date) > last_date:
_set_last_object_date(date, subtype, field=field)
return date
else:
return last_date
def get_domains_meta(domains):
metas = []
for domain in domains:
dom = Domain(domain)
metas.append(dom.get_meta())
return metas
################################################################################ ################################################################################
################################################################################ ################################################################################
#if __name__ == '__main__': if __name__ == '__main__':
dom = Domain('')
dom.get_download_zip()

View file

@ -3,10 +3,10 @@
import base64 import base64
import gzip import gzip
import magic
import os import os
import re import re
import sys import sys
import redis
import cld3 import cld3
import html2text import html2text
@ -233,8 +233,9 @@ class Item(AbstractObject):
return self.id[19:-36] return self.id[19:-36]
def get_screenshot(self): def get_screenshot(self):
s = r_serv_metadata.hget(f'paste_metadata:{self.id}', 'screenshot') s = self.get_correlation('screenshot')
if s: if s:
s = s['screenshot'].pop()[1:]
return os.path.join(s[0:2], s[2:4], s[4:6], s[6:8], s[8:10], s[10:12], s[12:]) return os.path.join(s[0:2], s[2:4], s[4:6], s[6:8], s[8:10], s[10:12], s[12:])
def get_har(self): def get_har(self):
@ -315,6 +316,11 @@ class Item(AbstractObject):
all_languages.append(lang) all_languages.append(lang)
return all_languages return all_languages
def get_mimetype(self, content=None):
if not content:
content = self.get_content()
return magic.from_buffer(content, mime=True)
############################################################################ ############################################################################
############################################################################ ############################################################################

View file

@ -41,14 +41,18 @@ class Pgp(AbstractSubtypeObject):
pass pass
# # TODO: # # TODO:
def get_meta(self): def get_meta(self, options=set()):
return None meta = self._get_meta()
meta['id'] = self.id
meta['subtype'] = self.subtype
meta['tags'] = self.get_tags()
return meta
def get_link(self, flask_context=False): def get_link(self, flask_context=False):
if flask_context: if flask_context:
url = url_for('correlation.show_correlation', object_type=self.type, type_id=self.subtype, correlation_id=self.id) url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
else: else:
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&type_id={self.subtype}&correlation_id={self.id}' url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
return url return url
def get_svg_icon(self): def get_svg_icon(self):

View file

@ -1,14 +1,18 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import base64
import os import os
import sys import sys
from hashlib import sha256
from io import BytesIO from io import BytesIO
from flask import url_for from flask import url_for
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
#from lib import Tag ##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_object import AbstractObject from lib.objects.abstract_object import AbstractObject
@ -17,14 +21,15 @@ r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot') SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot')
config_loader = None config_loader = None
class Screenshot(AbstractObject): class Screenshot(AbstractObject):
""" """
AIL Screenshot Object. (strings) AIL Screenshot Object. (strings)
""" """
# ID = SHA256 # ID = SHA256
def __init__(self, id): def __init__(self, screenshot_id):
super(Screenshot, self).__init__('screenshot', id) super(Screenshot, self).__init__('screenshot', screenshot_id)
# def get_ail_2_ail_payload(self): # def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True), # payload = {'raw': self.get_gzip_content(b64=True),
@ -41,13 +46,13 @@ class Screenshot(AbstractObject):
def get_link(self, flask_context=False): def get_link(self, flask_context=False):
if flask_context: if flask_context:
url = url_for('correlation.show_correlation', object_type=self.type, correlation_id=self.id) url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else: else:
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&correlation_id={self.id}' url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url return url
def get_svg_icon(self): def get_svg_icon(self):
return {'style': 'fas', 'icon': '\uf03e', 'color': '#E1F5DF', 'radius':5} return {'style': 'fas', 'icon': '\uf03e', 'color': '#E1F5DF', 'radius': 5}
def get_rel_path(self, add_extension=False): def get_rel_path(self, add_extension=False):
rel_path = os.path.join(self.id[0:2], self.id[2:4], self.id[4:6], self.id[6:8], self.id[8:10], self.id[10:12], self.id[12:]) rel_path = os.path.join(self.id[0:2], self.id[2:4], self.id[4:6], self.id[6:8], self.id[8:10], self.id[10:12], self.id[12:])
@ -77,12 +82,11 @@ class Screenshot(AbstractObject):
return obj return obj
def get_meta(self, options=set()): def get_meta(self, options=set()):
meta = {} meta = {'id': self.id}
meta['id'] = self.id meta['img'] = get_screenshot_rel_path(self.id) ######### # TODO: Rename ME ??????
meta['img'] = get_screenshot_rel_path(self.id) ######### # TODO: Rename ME ??????
meta['tags'] = self.get_tags(r_list=True) meta['tags'] = self.get_tags(r_list=True)
# TODO: ADD IN ABSTRACT CLASS # TODO: ADD IN ABSTRACT CLASS
#meta['is_tags_safe'] = Tag.is_tags_safe(metadata_dict['tags']) ################## # TODO: ADD IN ABSZTRACT CLASS #meta['is_tags_safe'] = Tag.is_tags_safe(metadata_dict['tags']) ################## # TODO: ADD IN ABSTRACT CLASS
return meta return meta
def get_screenshot_dir(): def get_screenshot_dir():
@ -90,7 +94,7 @@ def get_screenshot_dir():
# get screenshot relative path # get screenshot relative path
def get_screenshot_rel_path(sha256_str, add_extension=False): def get_screenshot_rel_path(sha256_str, add_extension=False):
screenshot_path = os.path.join(sha256_str[0:2], sha256_str[2:4], sha256_str[4:6], sha256_str[6:8], sha256_str[8:10], sha256_str[10:12], sha256_str[12:]) screenshot_path = os.path.join(sha256_str[0:2], sha256_str[2:4], sha256_str[4:6], sha256_str[6:8], sha256_str[8:10], sha256_str[10:12], sha256_str[12:])
if add_extension: if add_extension:
screenshot_path = f'{screenshot_path}.png' screenshot_path = f'{screenshot_path}.png'
return screenshot_path return screenshot_path
@ -106,5 +110,22 @@ def get_all_screenshots():
screenshots.append(screenshot_id) screenshots.append(screenshot_id)
return screenshots return screenshots
# FIXME STR SIZE LIMIT
def create_screenshot(content, size_limit=5000000, b64=True, force=False):
size = (len(content)*3) / 4
if size <= size_limit or size_limit < 0 or force:
if b64:
content = base64.standard_b64decode(content.encode())
screenshot_id = sha256(content).hexdigest()
screenshot = Screenshot(screenshot_id)
if not screenshot.exists():
filepath = screenshot.get_filepath()
dirname = os.path.dirname(filepath)
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(filepath, 'wb') as f:
f.write(content)
return screenshot
return None
#if __name__ == '__main__': #if __name__ == '__main__':

View file

@ -10,12 +10,14 @@ from pymisp import MISPObject
# sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) # sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) sys.path.append(os.environ['AIL_BIN'])
import ConfigLoader ##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
config_loader = ConfigLoader.ConfigLoader() config_loader = ConfigLoader()
config_loader = None config_loader = None
@ -44,9 +46,9 @@ class Username(AbstractSubtypeObject):
def get_link(self, flask_context=False): def get_link(self, flask_context=False):
if flask_context: if flask_context:
url = url_for('correlation.show_correlation', object_type=self.type, type_id=self.subtype, correlation_id=self.id) url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
else: else:
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&type_id={self.subtype}&correlation_id={self.id}' url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
return url return url
def get_svg_icon(self): def get_svg_icon(self):
@ -61,6 +63,13 @@ class Username(AbstractSubtypeObject):
icon = '\uf007' icon = '\uf007'
return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius':5} return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius':5}
def get_meta(self, options=set()):
meta = self._get_meta()
meta['id'] = self.id
meta['subtype'] = self.subtype
meta['tags'] = self.get_tags()
return meta
def get_misp_object(self): def get_misp_object(self):
obj_attrs = [] obj_attrs = []
if self.subtype == 'telegram': if self.subtype == 'telegram':

View file

@ -0,0 +1,139 @@
# -*-coding:UTF-8 -*
"""
Base Class for AIL Objects
"""
##################################
# Import External packages
##################################
import os
import sys
from abc import abstractmethod, ABC
#from flask import url_for
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader
from lib.item_basic import is_crawled, get_item_domain
from packages import Date
# LOAD CONFIG
config_loader = ConfigLoader()
# r_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_object = config_loader.get_db_conn("Kvrocks_Objects")
config_loader = None
class AbstractDaterangeObject(AbstractObject, ABC):
"""
Abstract Subtype Object
"""
def __init__(self, obj_type, id):
""" Abstract for all the AIL object
:param obj_type: object type (item, ...)
:param id: Object ID
"""
super().__init__(obj_type, id)
def exists(self):
return r_object.exists(f'{self.type}:meta:{self.id}')
def get_first_seen(self, r_int=False):
first_seen = r_object.hget(f'{self.type}:meta:{self.id}', 'first_seen')
if r_int:
if first_seen:
return int(first_seen)
else:
return 99999999
else:
return first_seen
def get_last_seen(self, r_int=False):
last_seen = r_object.hget(f'{self.type}:meta:{self.id}', 'last_seen')
if r_int:
if last_seen:
return int(last_seen)
else:
return 0
else:
return last_seen
def get_nb_seen(self):
return r_object.hget(f'{self.type}:meta:{self.id}', 'nb')
def get_nb_seen_by_date(self, date):
nb = r_object.hget(f'{self.type}:date:{date}', self.id)
if nb is None:
return 0
else:
return int(nb)
def _get_meta(self, options=[]):
meta_dict = {'first_seen': self.get_first_seen(),
'last_seen': self.get_last_seen(),
'nb_seen': self.get_nb_seen()}
if 'sparkline' in options:
meta_dict['sparkline'] = self.get_sparkline()
return meta_dict
def set_first_seen(self, first_seen):
r_object.hset(f'{self.type}:meta:{self.id}', 'first_seen', first_seen)
def set_last_seen(self, last_seen):
r_object.hset(f'{self.type}:meta:{self.id}', 'last_seen', last_seen)
def update_daterange(self, date):
date = int(date)
# obj don't exit
if not self.exists():
self.set_first_seen(date)
self.set_last_seen(date)
else:
first_seen = self.get_first_seen(r_int=True)
last_seen = self.get_last_seen(r_int=True)
if date < first_seen:
self.set_first_seen(date)
if date > last_seen:
self.set_last_seen(date)
def get_sparkline(self):
sparkline = []
for date in Date.get_previous_date_list(6):
sparkline.append(self.get_nb_seen_by_date(date))
return sparkline
def _add(self, date, item_id):
if not self.exists():
self.set_first_seen(date)
self.set_last_seen(date)
r_object.sadd(f'{self.type}:all', self.id)
else:
self.update_daterange(date)
# NB Object seen by day
r_object.hincrby(f'{self.type}:date:{date}', self.id, 1)
r_object.zincrby(f'{self.type}:date:{date}', self.id, 1) # # # # # # # # # #
# NB Object seen
r_object.hincrby(f'{self.type}:meta:{self.id}', 'nb', 1)
# Correlations
self.add_correlation('item', '', item_id)
if is_crawled(item_id): # Domain
domain = get_item_domain(item_id)
self.add_correlation('domain', '', domain)
# TODO:ADD objects + Stats
def _create(self, first_seen, last_seen):
self.set_first_seen(first_seen)
self.set_last_seen(last_seen)
r_object.sadd(f'{self.type}:all', self.id)
# TODO
def _delete(self):
pass

View file

@ -144,7 +144,7 @@ class AbstractObject(ABC):
pass pass
@abstractmethod @abstractmethod
def get_meta(self): def get_meta(self, options=set()):
""" """
get Object metadata get Object metadata
""" """
@ -165,6 +165,18 @@ class AbstractObject(ABC):
def get_misp_object(self): def get_misp_object(self):
pass pass
def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type):
"""
Get object correlation
"""
return get_correlations(req_type, req_subtype, req_id, filter_types=[obj_type])
def get_correlation(self, obj_type):
"""
Get object correlation
"""
return get_correlations(self.type, self.subtype, self.id, filter_types=[obj_type])
def get_correlations(self): def get_correlations(self):
""" """
Get object correlations Get object correlations

View file

@ -20,6 +20,8 @@ from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
from lib.item_basic import is_crawled, get_item_domain from lib.item_basic import is_crawled, get_item_domain
from packages import Date
# LOAD CONFIG # LOAD CONFIG
config_loader = ConfigLoader() config_loader = ConfigLoader()
r_metadata = config_loader.get_redis_conn("ARDB_Metadata") r_metadata = config_loader.get_redis_conn("ARDB_Metadata")
@ -115,6 +117,11 @@ class AbstractSubtypeObject(AbstractObject):
if date > last_seen: if date > last_seen:
self.set_last_seen(date) self.set_last_seen(date)
def get_sparkline(self):
sparkline = []
for date in Date.get_previous_date_list(6):
sparkline.append(self.get_nb_seen_by_date(date))
return sparkline
# #
# HANDLE Others objects ???? # HANDLE Others objects ????
# #

View file

@ -12,11 +12,15 @@ from flask import url_for
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
from lib.ail_core import get_all_objects from lib.ail_core import get_all_objects
from lib import correlations_engine from lib import correlations_engine
from lib import btc_ail
from lib import Tag
from lib.objects.CryptoCurrencies import CryptoCurrency from lib.objects.CryptoCurrencies import CryptoCurrency
from lib.objects.Cves import Cve
from lib.objects.Decodeds import Decoded from lib.objects.Decodeds import Decoded
from lib.objects.Domains import Domain from lib.objects.Domains import Domain
from lib.objects.Items import Item from lib.objects.Items import Item
@ -39,12 +43,11 @@ def is_valid_object_type(obj_type):
def sanitize_objs_types(objs): def sanitize_objs_types(objs):
l_types = [] l_types = []
print('sanitize')
print(objs)
print(get_all_objects())
for obj in objs: for obj in objs:
if is_valid_object_type(obj): if is_valid_object_type(obj):
l_types.append(obj) l_types.append(obj)
if not l_types:
l_types = get_all_objects()
return l_types return l_types
def get_object(obj_type, subtype, id): def get_object(obj_type, subtype, id):
@ -54,6 +57,8 @@ def get_object(obj_type, subtype, id):
return Domain(id) return Domain(id)
elif obj_type == 'decoded': elif obj_type == 'decoded':
return Decoded(id) return Decoded(id)
elif obj_type == 'cve':
return Cve(id)
elif obj_type == 'screenshot': elif obj_type == 'screenshot':
return Screenshot(id) return Screenshot(id)
elif obj_type == 'cryptocurrency': elif obj_type == 'cryptocurrency':
@ -63,23 +68,48 @@ def get_object(obj_type, subtype, id):
elif obj_type == 'username': elif obj_type == 'username':
return Username(id, subtype) return Username(id, subtype)
def exists_obj(obj_type, subtype, id): def exists_obj(obj_type, subtype, obj_id):
object = get_object(obj_type, subtype, id) obj = get_object(obj_type, subtype, obj_id)
return object.exists() if obj:
return obj.exists()
else:
return False
def get_object_link(obj_type, subtype, id, flask_context=False): def get_object_link(obj_type, subtype, id, flask_context=False):
object = get_object(obj_type, subtype, id) obj = get_object(obj_type, subtype, id)
return object.get_link(flask_context=flask_context) return obj.get_link(flask_context=flask_context)
def get_object_svg(obj_type, subtype, id): def get_object_svg(obj_type, subtype, id):
object = get_object(obj_type, subtype, id) obj = get_object(obj_type, subtype, id)
return object.get_svg_icon() return obj.get_svg_icon()
def get_object_meta(obj_type, subtype, id, flask_context=False): def get_object_meta(obj_type, subtype, id, options=[], flask_context=False):
object = get_object(obj_type, subtype, id) obj = get_object(obj_type, subtype, id)
meta = object.get_meta() meta = obj.get_meta(options=options)
meta['icon'] = object.get_svg_icon() meta['icon'] = obj.get_svg_icon()
meta['link'] = object.get_link(flask_context=flask_context) meta['link'] = obj.get_link(flask_context=flask_context)
return meta
def get_objects_meta(objs, options=[], flask_context=False):
metas = []
for obj_dict in objs:
metas.append(get_object_meta(obj_dict['type'], obj_dict['subtype'], obj_dict['id'], options=options, flask_context=flask_context))
return metas
def get_object_card_meta(obj_type, subtype, id, related_btc=False):
obj = get_object(obj_type, subtype, id)
meta = obj.get_meta()
meta['icon'] = obj.get_svg_icon()
if subtype or obj_type == 'cve':
meta['sparkline'] = obj.get_sparkline()
if subtype == 'bitcoin' and related_btc:
meta["related_btc"] = btc_ail.get_bitcoin_info(obj.id)
if obj.get_type() == 'decoded':
meta["vt"] = obj.get_meta_vt()
meta["vt"]["status"] = obj.is_vt_enabled()
# TAGS MODAL
if obj.get_type() == 'screenshot' or obj.get_type() == 'decoded':
meta["add_tags_modal"] = Tag.get_modal_add_tags(obj.id, object_type=obj.get_type())
return meta return meta
def get_ui_obj_tag_table_keys(obj_type): def get_ui_obj_tag_table_keys(obj_type):
@ -203,7 +233,6 @@ def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True):
dict_node['style']['node_radius'] = dict_node['style']['radius'] dict_node['style']['node_radius'] = dict_node['style']['radius']
# # TODO: # FIXME: in UI # # TODO: # FIXME: in UI
dict_node['style']
dict_node['text'] = obj_id dict_node['text'] = obj_id
if node_id == obj_str_id: if node_id == obj_str_id:
dict_node["style"]["node_color"] = 'orange' dict_node["style"]["node_color"] = 'orange'

View file

@ -36,17 +36,19 @@ def _regex_findall(redis_key, regex, item_content, r_set):
all_items = re.findall(regex, item_content) all_items = re.findall(regex, item_content)
if r_set: if r_set:
if len(all_items) > 1: if len(all_items) > 1:
r_serv_cache.sadd(redis_key, *all_items) for item in all_items:
r_serv_cache.sadd(redis_key, str(item))
r_serv_cache.expire(redis_key, 360) r_serv_cache.expire(redis_key, 360)
elif all_items: elif all_items:
r_serv_cache.sadd(redis_key, all_items[0]) r_serv_cache.sadd(redis_key, str(all_items[0]))
r_serv_cache.expire(redis_key, 360) r_serv_cache.expire(redis_key, 360)
else: else:
if len(all_items) > 1: if len(all_items) > 1:
r_serv_cache.lpush(redis_key, *all_items) for item in all_items:
r_serv_cache.lpush(redis_key, str(item))
r_serv_cache.expire(redis_key, 360) r_serv_cache.expire(redis_key, 360)
elif all_items: elif all_items:
r_serv_cache.lpush(redis_key, all_items[0]) r_serv_cache.lpush(redis_key, str(all_items[0]))
r_serv_cache.expire(redis_key, 360) r_serv_cache.expire(redis_key, 360)
def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time=30, r_set=True): def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time=30, r_set=True):

View file

@ -11,16 +11,16 @@ Search for API keys on an item content.
""" """
import re
import os import os
import re
import sys import sys
sys.path.append(os.path.join(os.environ['AIL_BIN'])) sys.path.append(os.environ['AIL_BIN'])
##################################
# project packages # Import Project packages
##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from lib.objects.Items import Item
from lib import regex_helper
class ApiKey(AbstractModule): class ApiKey(AbstractModule):
"""ApiKey module for AIL framework""" """ApiKey module for AIL framework"""
@ -28,13 +28,11 @@ class ApiKey(AbstractModule):
def __init__(self): def __init__(self):
super(ApiKey, self).__init__() super(ApiKey, self).__init__()
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
# # TODO: ENUM or dict # # TODO: ENUM or dict
# TODO improve REGEX # TODO improve REGEX
#r'(?<![A-Z0-9])=[A-Z0-9]{20}(?![A-Z0-9])' # r'(?<![A-Z0-9])=[A-Z0-9]{20}(?![A-Z0-9])'
#r'(?<!=[A-Za-z0-9+])=[A-Za-z0-9+]{40}(?![A-Za-z0-9+])' # r'(?<!=[A-Za-z0-9+])=[A-Za-z0-9+]{40}(?![A-Za-z0-9+])'
self.re_aws_access_key = r'AKIA[0-9A-Z]{16}' self.re_aws_access_key = r'AKIA[0-9A-Z]{16}'
self.re_aws_secret_key = r'[0-9a-zA-Z/+]{40}' self.re_aws_secret_key = r'[0-9a-zA-Z/+]{40}'
re.compile(self.re_aws_access_key) re.compile(self.re_aws_access_key)
@ -48,15 +46,14 @@ class ApiKey(AbstractModule):
self.redis_logger.info(f"Module {self.module_name} initialized") self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message, r_result=False): def compute(self, message, r_result=False):
id, score = message.split() item_id, score = message.split()
item = Item(id) item = Item(item_id)
item_content = item.get_content() item_content = item.get_content()
google_api_key = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.re_google_api_key, item.get_id(), item_content) google_api_key = self.regex_findall(self.re_google_api_key, item.get_id(), item_content)
aws_access_key = self.regex_findall(self.re_aws_access_key, item.get_id(), item_content)
aws_access_key = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.re_aws_access_key, item.get_id(), item_content)
if aws_access_key: if aws_access_key:
aws_secret_key = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.re_aws_secret_key, item.get_id(), item_content) aws_secret_key = self.regex_findall(self.re_aws_secret_key, item.get_id(), item_content)
if aws_access_key or google_api_key: if aws_access_key or google_api_key:
to_print = f'ApiKey;{item.get_source()};{item.get_date()};{item.get_basename()};' to_print = f'ApiKey;{item.get_source()};{item.get_date()};{item.get_basename()};'
@ -68,7 +65,7 @@ class ApiKey(AbstractModule):
msg = f'infoleak:automatic-detection="google-api-key";{item.get_id()}' msg = f'infoleak:automatic-detection="google-api-key";{item.get_id()}'
self.send_message_to_queue(msg, 'Tags') self.send_message_to_queue(msg, 'Tags')
# # TODO: # FIXME: AWS regex/validate/sanityze KEY + SECRET KEY # # TODO: # FIXME: AWS regex/validate/sanitize KEY + SECRET KEY
if aws_access_key: if aws_access_key:
print(f'found AWS key: {to_print}') print(f'found AWS key: {to_print}')
self.redis_logger.warning(f'{to_print}Checked {len(aws_access_key)} found AWS Key;{item.get_id()}') self.redis_logger.warning(f'{to_print}Checked {len(aws_access_key)} found AWS Key;{item.get_id()}')
@ -87,7 +84,8 @@ class ApiKey(AbstractModule):
self.send_message_to_queue(item.get_id(), 'Duplicate') self.send_message_to_queue(item.get_id(), 'Duplicate')
if r_result: if r_result:
return (google_api_key, aws_access_key, aws_secret_key) return google_api_key, aws_access_key, aws_secret_key
if __name__ == "__main__": if __name__ == "__main__":
module = ApiKey() module = ApiKey()

View file

@ -43,7 +43,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from lib.objects.Items import Item
class Categ(AbstractModule): class Categ(AbstractModule):

View file

@ -30,7 +30,6 @@ import os
import sys import sys
import time import time
import re import re
import redis
from datetime import datetime from datetime import datetime
from pyfaup.faup import Faup from pyfaup.faup import Faup
@ -39,9 +38,8 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from lib.objects.Items import Item
from lib import ConfigLoader from lib import ConfigLoader
from lib import regex_helper
from lib import Statistics from lib import Statistics
@ -60,21 +58,18 @@ class Credential(AbstractModule):
REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev' REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev'
REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping' REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping'
def __init__(self): def __init__(self):
super(Credential, self).__init__() super(Credential, self).__init__()
self.faup = Faup() self.faup = Faup()
self.regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" self.regex_web = r"((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
self.regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+" self.regex_cred = r"[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
self.regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" self.regex_site_for_stats = r"@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
# Database # Database
config_loader = ConfigLoader.ConfigLoader() config_loader = ConfigLoader.ConfigLoader()
#self.server_cred = config_loader.get_redis_conn("ARDB_TermCred") # self.server_cred = config_loader.get_redis_conn("ARDB_TermCred")
self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics") self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")
# Config values # Config values
@ -83,29 +78,27 @@ class Credential(AbstractModule):
self.max_execution_time = 30 self.max_execution_time = 30
# Waiting time in secondes between to message proccessed # Waiting time in seconds between to message processed
self.pending_seconds = 10 self.pending_seconds = 10
# Send module state to logs # Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized") self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message): def compute(self, message):
id, count = message.split() item_id, count = message.split()
item = Item(id) item = Item(item_id)
item_content = item.get_content() item_content = item.get_content()
# TODO: USE SETS # TODO: USE SETS
# Extract all credentials # Extract all credentials
all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item.get_id(), item_content, max_time=self.max_execution_time) all_credentials = self.regex_findall(self.regex_cred, item.get_id(), item_content)
if all_credentials: if all_credentials:
nb_cred = len(all_credentials) nb_cred = len(all_credentials)
message = f'Checked {nb_cred} credentials found.' message = f'Checked {nb_cred} credentials found.'
all_sites = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_web, item.get_id(), item_content, max_time=self.max_execution_time) all_sites = self.regex_findall(self.regex_web, item.get_id(), item_content)
if all_sites: if all_sites:
discovered_sites = ', '.join(all_sites) discovered_sites = ', '.join(all_sites)
message += f' Related websites: {discovered_sites}' message += f' Related websites: {discovered_sites}'
@ -114,7 +107,7 @@ class Credential(AbstractModule):
to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{item.get_id()}' to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{item.get_id()}'
#num of creds above tresh, publish an alert # num of creds above threshold, publish an alert
if nb_cred > self.criticalNumberToAlert: if nb_cred > self.criticalNumberToAlert:
print(f"========> Found more than 10 credentials in this file : {item.get_id()}") print(f"========> Found more than 10 credentials in this file : {item.get_id()}")
self.redis_logger.warning(to_print) self.redis_logger.warning(to_print)
@ -122,11 +115,11 @@ class Credential(AbstractModule):
msg = f'infoleak:automatic-detection="credential";{item.get_id()}' msg = f'infoleak:automatic-detection="credential";{item.get_id()}'
self.send_message_to_queue(msg, 'Tags') self.send_message_to_queue(msg, 'Tags')
site_occurence = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_site_for_stats, item.get_id(), item_content, max_time=self.max_execution_time, r_set=False) site_occurrence = self.regex_findall(self.regex_site_for_stats, item.get_id(), item_content)
creds_sites = {} creds_sites = {}
for site in site_occurence: for site in site_occurrence:
site_domain = site[1:-1].lower() site_domain = site[1:-1].lower()
if site_domain in creds_sites.keys(): if site_domain in creds_sites.keys():
creds_sites[site_domain] += 1 creds_sites[site_domain] += 1
@ -136,7 +129,7 @@ class Credential(AbstractModule):
for url in all_sites: for url in all_sites:
self.faup.decode(url) self.faup.decode(url)
domain = self.faup.get()['domain'] domain = self.faup.get()['domain']
## TODO: # FIXME: remove me, check faup versionb # # TODO: # FIXME: remove me, check faup versionb
try: try:
domain = domain.decode() domain = domain.decode()
except: except:
@ -159,10 +152,10 @@ class Credential(AbstractModule):
date = datetime.now().strftime("%Y%m") date = datetime.now().strftime("%Y%m")
nb_tlds = {} nb_tlds = {}
for cred in all_credentials: for cred in all_credentials:
maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0] maildomains = re.findall(r"@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0]
self.faup.decode(maildomains) self.faup.decode(maildomains)
tld = self.faup.get()['tld'] tld = self.faup.get()['tld']
## TODO: # FIXME: remove me # # TODO: # FIXME: remove me
try: try:
tld = tld.decode() tld = tld.decode()
except: except:

View file

@ -17,14 +17,13 @@ It apply credit card regexes on item content and warn if a valid card number is
import os import os
import re import re
import sys import sys
import time
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from lib.objects.Items import Item
from packages import lib_refine from packages import lib_refine
class CreditCards(AbstractModule): class CreditCards(AbstractModule):
@ -53,15 +52,14 @@ class CreditCards(AbstractModule):
# Send module state to logs # Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized") self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message, r_result=False): def compute(self, message, r_result=False):
id, score = message.split() item_id, score = message.split()
item = Item(id) item = Item(item_id)
content = item.get_content() content = item.get_content()
all_cards = re.findall(self.regex, content) all_cards = re.findall(self.regex, content)
if len(all_cards) > 0: if len(all_cards) > 0:
#self.redis_logger.debug(f'All matching {all_cards}') # self.redis_logger.debug(f'All matching {all_cards}')
creditcard_set = set([]) creditcard_set = set([])
for card in all_cards: for card in all_cards:
@ -70,9 +68,9 @@ class CreditCards(AbstractModule):
self.redis_logger.debug(f'{clean_card} is valid') self.redis_logger.debug(f'{clean_card} is valid')
creditcard_set.add(clean_card) creditcard_set.add(clean_card)
#pprint.pprint(creditcard_set) # pprint.pprint(creditcard_set)
to_print = f'CreditCard;{item.get_source()};{item.get_date()};{item.get_basename()};' to_print = f'CreditCard;{item.get_source()};{item.get_date()};{item.get_basename()};'
if (len(creditcard_set) > 0): if len(creditcard_set) > 0:
self.redis_logger.warning(f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.get_id()}') self.redis_logger.warning(f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.get_id()}')
msg = f'infoleak:automatic-detection="credit-card";{item.get_id()}' msg = f'infoleak:automatic-detection="credit-card";{item.get_id()}'
@ -83,7 +81,7 @@ class CreditCards(AbstractModule):
else: else:
self.redis_logger.info(f'{to_print}CreditCard related;{item.get_id()}') self.redis_logger.info(f'{to_print}CreditCard related;{item.get_id()}')
if __name__ == '__main__':
if __name__ == '__main__':
module = CreditCards() module = CreditCards()
module.run() module.run()

View file

@ -22,6 +22,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from lib.objects import Cves
from lib.objects.Items import Item from lib.objects.Items import Item
@ -36,13 +37,12 @@ class Cve(AbstractModule):
# regex to find CVE # regex to find CVE
self.reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,5}') self.reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,5}')
# Waiting time in secondes between to message proccessed # Waiting time in seconds between to message processed
self.pending_seconds = 1 self.pending_seconds = 1
# Send module state to logs # Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized') self.redis_logger.info(f'Module {self.module_name} initialized')
def compute(self, message): def compute(self, message):
item_id, count = message.split() item_id, count = message.split()
@ -51,17 +51,23 @@ class Cve(AbstractModule):
cves = self.regex_findall(self.reg_cve, item_id, item.get_content()) cves = self.regex_findall(self.reg_cve, item_id, item.get_content())
if cves: if cves:
print(cves)
date = item.get_date()
for cve_id in cves:
cve = Cves.Cve(cve_id)
cve.add(date, item_id)
warning = f'{item_id} contains CVEs {cves}' warning = f'{item_id} contains CVEs {cves}'
print(warning) print(warning)
self.redis_logger.warning(warning) self.redis_logger.warning(warning)
msg = f'infoleak:automatic-detection="cve";{item_id}' msg = f'infoleak:automatic-detection="cve";{item_id}'
# Send to Tags Queue # Send to Tags Queue
self.send_message_to_queue(msg, 'Tags') self.send_message_to_queue(msg, 'Tags')
if __name__ == '__main__': if __name__ == '__main__':
module = Cve() module = Cve()
module.run() # module.run()
module.compute('crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd 9')

View file

@ -65,49 +65,45 @@ class Decoder(AbstractModule):
#hexStr = ''.join( hex_string.split(" ") ) #hexStr = ''.join( hex_string.split(" ") )
return bytes(bytearray([int(hexStr[i:i+2], 16) for i in range(0, len(hexStr), 2)])) return bytes(bytearray([int(hexStr[i:i+2], 16) for i in range(0, len(hexStr), 2)]))
# TODO to lambda expr # TODO to lambda expr
def binary_decoder(self, binary_string): def binary_decoder(self, binary_string):
return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)])) return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)]))
# TODO to lambda expr # TODO to lambda expr
def base64_decoder(self, base64_string): def base64_decoder(self, base64_string):
return base64.b64decode(base64_string) return base64.b64decode(base64_string)
def __init__(self): def __init__(self):
super(Decoder, self).__init__() super(Decoder, self).__init__()
regex_binary = '[0-1]{40,}' regex_binary = r'[0-1]{40,}'
#regex_hex = '(0[xX])?[A-Fa-f0-9]{40,}' # regex_hex = r'(0[xX])?[A-Fa-f0-9]{40,}'
regex_hex = '[A-Fa-f0-9]{40,}' regex_hex = r'[A-Fa-f0-9]{40,}'
regex_base64 = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)' regex_base64 = r'(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)'
cmp_regex_binary = re.compile(regex_binary) cmp_regex_binary = re.compile(regex_binary)
cmp_regex_hex = re.compile(regex_hex) cmp_regex_hex = re.compile(regex_hex)
cmp_regex_base64 = re.compile(regex_base64) cmp_regex_base64 = re.compile(regex_base64)
# map decoder function # map decoder function
self.decoder_function = {'binary':self.binary_decoder,'hexadecimal':self.hex_decoder, 'base64':self.base64_decoder} self.decoder_function = {'binary': self.binary_decoder, 'hexadecimal': self.hex_decoder, 'base64': self.base64_decoder}
# list all decoder with regex, # list all decoder with regex,
decoder_binary = {'name': 'binary', 'regex': cmp_regex_binary, 'encoded_min_size': 300, 'max_execution_time': binary_max_execution_time} decoder_binary = {'name': 'binary', 'regex': cmp_regex_binary, 'encoded_min_size': 300, 'max_execution_time': binary_max_execution_time}
decoder_hexadecimal = {'name': 'hexadecimal', 'regex': cmp_regex_hex, 'encoded_min_size': 300, 'max_execution_time': hex_max_execution_time} decoder_hexadecimal = {'name': 'hexadecimal', 'regex': cmp_regex_hex, 'encoded_min_size': 300, 'max_execution_time': hex_max_execution_time}
decoder_base64 = {'name': 'base64', 'regex': cmp_regex_base64, 'encoded_min_size': 40, 'max_execution_time': base64_max_execution_time} decoder_base64 = {'name': 'base64', 'regex': cmp_regex_base64, 'encoded_min_size': 40, 'max_execution_time': base64_max_execution_time}
self.decoder_order = [ decoder_base64, decoder_binary, decoder_hexadecimal, decoder_base64] self.decoder_order = [decoder_base64, decoder_binary, decoder_hexadecimal, decoder_base64]
for decoder in self.decoder_order: for decoder in self.decoder_order:
serv_metadata.sadd('all_decoder', decoder['name']) serv_metadata.sadd('all_decoder', decoder['name'])
# Waiting time in secondes between to message proccessed # Waiting time in seconds between to message processed
self.pending_seconds = 1 self.pending_seconds = 1
# Send module state to logs # Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized') self.redis_logger.info(f'Module {self.module_name} initialized')
def compute(self, message): def compute(self, message):
item = Item(message) item = Item(message)
@ -128,10 +124,9 @@ class Decoder(AbstractModule):
else: else:
signal.alarm(0) signal.alarm(0)
if(len(encoded_list) > 0): if len(encoded_list) > 0:
content = self.decode_string(content, item.id, date, encoded_list, decoder['name'], decoder['encoded_min_size']) content = self.decode_string(content, item.id, date, encoded_list, decoder['name'], decoder['encoded_min_size'])
def decode_string(self, content, item_id, date, encoded_list, decoder_name, encoded_min_size): def decode_string(self, content, item_id, date, encoded_list, decoder_name, encoded_min_size):
find = False find = False
for encoded in encoded_list: for encoded in encoded_list:
@ -153,12 +148,12 @@ class Decoder(AbstractModule):
save_item_relationship(sha1_string, item_id) ################################ save_item_relationship(sha1_string, item_id) ################################
#remove encoded from item content # remove encoded from item content
content = content.replace(encoded, '', 1) content = content.replace(encoded, '', 1)
self.redis_logger.debug(f'{item_id} : {decoder_name} - {mimetype}') self.redis_logger.debug(f'{item_id} : {decoder_name} - {mimetype}')
print(f'{item_id} : {decoder_name} - {mimetype}') print(f'{item_id} : {decoder_name} - {mimetype}')
if(find): if find:
self.redis_logger.info(f'{decoder_name} decoded') self.redis_logger.info(f'{decoder_name} decoded')
print(f'{decoder_name} decoded') print(f'{decoder_name} decoded')
@ -169,6 +164,7 @@ class Decoder(AbstractModule):
# perf: remove encoded from item content # perf: remove encoded from item content
return content return content
if __name__ == '__main__': if __name__ == '__main__':
# # TODO: TEST ME # # TODO: TEST ME

View file

@ -15,7 +15,6 @@ the out output of the Global module.
################################## ##################################
import os import os
import sys import sys
import time
import DomainClassifier.domainclassifier import DomainClassifier.domainclassifier
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
@ -23,11 +22,8 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from lib.objects.Items import Item
from lib import d4
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import d4
import item_basic
class DomClassifier(AbstractModule): class DomClassifier(AbstractModule):
@ -38,7 +34,7 @@ class DomClassifier(AbstractModule):
def __init__(self): def __init__(self):
super(DomClassifier, self).__init__() super(DomClassifier, self).__init__()
# Waiting time in secondes between to message proccessed # Waiting time in seconds between to message processed
self.pending_seconds = 1 self.pending_seconds = 1
addr_dns = self.process.config.get("DomClassifier", "dns") addr_dns = self.process.config.get("DomClassifier", "dns")
@ -51,11 +47,10 @@ class DomClassifier(AbstractModule):
# Send module state to logs # Send module state to logs
self.redis_logger.info(f"Module: {self.module_name} Launched") self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, message, r_result=False): def compute(self, message, r_result=False):
host, id = message.split() host, item_id = message.split()
item = Item(id) item = Item(item_id)
item_basename = item.get_basename() item_basename = item.get_basename()
item_date = item.get_date() item_date = item.get_date()
item_source = item.get_source() item_source = item.get_source()
@ -64,7 +59,7 @@ class DomClassifier(AbstractModule):
self.c.text(rawtext=host) self.c.text(rawtext=host)
print(self.c.domain) print(self.c.domain)
self.c.validdomain(passive_dns=True, extended=False) self.c.validdomain(passive_dns=True, extended=False)
#self.redis_logger.debug(self.c.vdomain) # self.redis_logger.debug(self.c.vdomain)
print(self.c.vdomain) print(self.c.vdomain)
print() print()

View file

@ -12,14 +12,12 @@ Its input comes from other modules, namely:
Perform comparisions with ssdeep and tlsh Perform comparisions with ssdeep and tlsh
""" """
import redis
import os import os
import sys import sys
import time import time
#from datetime import datetime, timedelta # from datetime import datetime, timedelta
import datetime import datetime
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
@ -51,7 +49,6 @@ class Duplicates(AbstractModule):
self.redis_logger.info(f"Module: {self.module_name} Launched") self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, message): def compute(self, message):
# IOError: "CRC Checksum Failed on : {id}" # IOError: "CRC Checksum Failed on : {id}"
@ -72,7 +69,7 @@ class Duplicates(AbstractModule):
self.algos['ssdeep']['hash'] = Duplicate.get_ssdeep_hash(content) self.algos['ssdeep']['hash'] = Duplicate.get_ssdeep_hash(content)
self.algos['tlsh']['hash'] = Duplicate.get_tlsh_hash(content) self.algos['tlsh']['hash'] = Duplicate.get_tlsh_hash(content)
# TODO: Handle coputed duplicates # TODO: Handle computed duplicates
nb_duplicates = 0 nb_duplicates = 0
@ -99,7 +96,7 @@ class Duplicates(AbstractModule):
y = time.time() y = time.time()
print(f'{item.get_id()} Processed in {y-x} sec') print(f'{item.get_id()} Processed in {y-x} sec')
#self.redis_logger.debug('{}Processed in {} sec'.format(to_print, y-x)) # self.redis_logger.debug('{}Processed in {} sec'.format(to_print, y-x))
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -31,7 +31,6 @@ import os
import sys import sys
import time import time
import datetime import datetime
import redis
from hashlib import md5 from hashlib import md5
from uuid import uuid4 from uuid import uuid4
@ -57,19 +56,18 @@ class Global(AbstractModule):
self.processed_item = 0 self.processed_item = 0
self.time_last_stats = time.time() self.time_last_stats = time.time()
# Get and sanityze ITEM DIRECTORY # Get and sanitize ITEM DIRECTORY
# # TODO: rename PASTE => ITEM # # TODO: rename PASTE => ITEM
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], self.process.config.get("Directories", "pastes")) self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], self.process.config.get("Directories", "pastes"))
self.PASTES_FOLDERS = self.PASTES_FOLDER + '/' self.PASTES_FOLDERS = self.PASTES_FOLDER + '/'
self.PASTES_FOLDERS = os.path.join(os.path.realpath(self.PASTES_FOLDERS), '') self.PASTES_FOLDERS = os.path.join(os.path.realpath(self.PASTES_FOLDERS), '')
# Waiting time in secondes between to message proccessed # Waiting time in seconds between to message processed
self.pending_seconds = 0.5 self.pending_seconds = 0.5
# Send module state to logs # Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized") self.redis_logger.info(f"Module {self.module_name} initialized")
def computeNone(self): def computeNone(self):
difftime = time.time() - self.time_last_stats difftime = time.time() - self.time_last_stats
if int(difftime) > 30: if int(difftime) > 30:
@ -80,7 +78,6 @@ class Global(AbstractModule):
self.time_last_stats = time.time() self.time_last_stats = time.time()
self.processed_item = 0 self.processed_item = 0
def compute(self, message, r_result=False): def compute(self, message, r_result=False):
# Recovering the streamed message informations # Recovering the streamed message informations
splitted = message.split() splitted = message.split()
@ -129,7 +126,8 @@ class Global(AbstractModule):
item_id = item_id.replace(self.PASTES_FOLDERS, '', 1) item_id = item_id.replace(self.PASTES_FOLDERS, '', 1)
self.send_message_to_queue(item_id) self.send_message_to_queue(item_id)
self.processed_item+=1 self.processed_item += 1
print(item_id)
if r_result: if r_result:
return item_id return item_id
@ -137,7 +135,6 @@ class Global(AbstractModule):
self.redis_logger.debug(f"Empty Item: {message} not processed") self.redis_logger.debug(f"Empty Item: {message} not processed")
print(f"Empty Item: {message} not processed") print(f"Empty Item: {message} not processed")
def check_filename(self, filename, new_file_content): def check_filename(self, filename, new_file_content):
""" """
Check if file is not a duplicated file Check if file is not a duplicated file
@ -181,10 +178,8 @@ class Global(AbstractModule):
# File not unzipped # File not unzipped
filename = None filename = None
return filename return filename
def gunzip_file(self, filename): def gunzip_file(self, filename):
""" """
Unzip a file Unzip a file
@ -224,7 +219,6 @@ class Global(AbstractModule):
return gunzipped_bytes_obj return gunzipped_bytes_obj
def rreplace(self, s, old, new, occurrence): def rreplace(self, s, old, new, occurrence):
li = s.rsplit(old, occurrence) li = s.rsplit(old, occurrence)
return new.join(li) return new.join(li)

View file

@ -17,7 +17,6 @@ It is looking for Hosts
import os import os
import re import re
import sys import sys
import time
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
@ -25,9 +24,7 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
from lib import regex_helper from lib.objects.Items import Item
#from lib.objects.Items import Item
from packages.Item import Item
class Hosts(AbstractModule): class Hosts(AbstractModule):
""" """
@ -40,12 +37,10 @@ class Hosts(AbstractModule):
config_loader = ConfigLoader() config_loader = ConfigLoader()
self.r_cache = config_loader.get_redis_conn("Redis_Cache") self.r_cache = config_loader.get_redis_conn("Redis_Cache")
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
# regex timeout # regex timeout
self.regex_timeout = 30 self.regex_timeout = 30
# Waiting time in secondes between to message proccessed # Waiting time in seconds between to message processed
self.pending_seconds = 1 self.pending_seconds = 1
self.host_regex = r'\b([a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)\b' self.host_regex = r'\b([a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)\b'
@ -53,7 +48,6 @@ class Hosts(AbstractModule):
self.redis_logger.info(f"Module: {self.module_name} Launched") self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, message): def compute(self, message):
item = Item(message) item = Item(message)
@ -61,18 +55,16 @@ class Hosts(AbstractModule):
# if mimetype.split('/')[0] == "text": # if mimetype.split('/')[0] == "text":
content = item.get_content() content = item.get_content()
hosts = self.regex_findall(self.host_regex, item.get_id(), content)
hosts = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.host_regex, item.get_id(), content)
if hosts: if hosts:
print(f'{len(hosts)} host {item.get_id()}') print(f'{len(hosts)} host {item.get_id()}')
for host in hosts: for host in hosts:
#print(host) # print(host)
msg = f'{host} {item.get_id()}' msg = f'{host} {item.get_id()}'
self.send_message_to_queue(msg, 'Host') self.send_message_to_queue(msg, 'Host')
if __name__ == '__main__': if __name__ == '__main__':
module = Hosts() module = Hosts()

View file

@ -34,7 +34,7 @@ class Iban(AbstractModule):
""" """
_LETTERS_IBAN = chain(enumerate(string.digits + string.ascii_uppercase), _LETTERS_IBAN = chain(enumerate(string.digits + string.ascii_uppercase),
enumerate(string.ascii_lowercase, 10)) enumerate(string.ascii_lowercase, 10))
LETTERS_IBAN = {ord(d): str(i) for i, d in _LETTERS_IBAN} LETTERS_IBAN = {ord(d): str(i) for i, d in _LETTERS_IBAN}
def __init__(self): def __init__(self):
@ -44,7 +44,7 @@ class Iban(AbstractModule):
self.pending_seconds = 10 self.pending_seconds = 10
self.regex_timeout = 30 self.regex_timeout = 30
#iban_regex = re.compile(r'\b[A-Za-z]{2}[0-9]{2}(?:[ ]?[0-9]{4}){4}(?:[ ]?[0-9]{1,2})?\b') # iban_regex = re.compile(r'\b[A-Za-z]{2}[0-9]{2}(?:[ ]?[0-9]{4}){4}(?:[ ]?[0-9]{1,2})?\b')
self.iban_regex = re.compile(r'\b([A-Za-z]{2}[ \-]?[0-9]{2})(?=(?:[ \-]?[A-Za-z0-9]){9,30})((?:[ \-]?[A-Za-z0-9]{3,5}){2,6})([ \-]?[A-Za-z0-9]{1,3})\b') self.iban_regex = re.compile(r'\b([A-Za-z]{2}[ \-]?[0-9]{2})(?=(?:[ \-]?[A-Za-z0-9]){9,30})((?:[ \-]?[A-Za-z0-9]{3,5}){2,6})([ \-]?[A-Za-z0-9]{1,3})\b')
self.iban_regex_verify = re.compile(r'^([A-Z]{2})([0-9]{2})([A-Z0-9]{9,30})$') self.iban_regex_verify = re.compile(r'^([A-Z]{2})([0-9]{2})([A-Z0-9]{9,30})$')
@ -90,6 +90,7 @@ class Iban(AbstractModule):
msg = f'infoleak:automatic-detection="iban";{item_id}' msg = f'infoleak:automatic-detection="iban";{item_id}'
self.send_message_to_queue(msg, 'Tags') self.send_message_to_queue(msg, 'Tags')
if __name__ == '__main__': if __name__ == '__main__':
module = Iban() module = Iban()

View file

@ -26,7 +26,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from lib.objects.Items import Item
class Indexer(AbstractModule): class Indexer(AbstractModule):
@ -57,9 +57,7 @@ class Indexer(AbstractModule):
self.ix = None self.ix = None
if self.indexertype == "whoosh": if self.indexertype == "whoosh":
self.schema = Schema(title=TEXT(stored=True), path=ID(stored=True, self.schema = Schema(title=TEXT(stored=True), path=ID(stored=True, unique=True), content=TEXT)
unique=True),
content=TEXT)
if not os.path.exists(self.baseindexpath): if not os.path.exists(self.baseindexpath):
os.mkdir(self.baseindexpath) os.mkdir(self.baseindexpath)
@ -96,7 +94,6 @@ class Indexer(AbstractModule):
self.last_refresh = time_now self.last_refresh = time_now
def compute(self, message): def compute(self, message):
docpath = message.split(" ", -1)[-1] docpath = message.split(" ", -1)[-1]
@ -109,7 +106,7 @@ class Indexer(AbstractModule):
try: try:
# Avoid calculating the index's size at each message # Avoid calculating the index's size at each message
if(time.time() - self.last_refresh > self.TIME_WAIT): if time.time() - self.last_refresh > self.TIME_WAIT:
self.last_refresh = time.time() self.last_refresh = time.time()
if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD*(1000*1000): if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD*(1000*1000):
timestamp = int(time.time()) timestamp = int(time.time())
@ -145,10 +142,8 @@ class Indexer(AbstractModule):
cur_sum = 0 cur_sum = 0
for root, dirs, files in os.walk(the_index_name): for root, dirs, files in os.walk(the_index_name):
cur_sum += sum(getsize(join(root, name)) for name in files) cur_sum += sum(getsize(join(root, name)) for name in files)
return cur_sum return cur_sum
def move_index_into_old_index_folder(self): def move_index_into_old_index_folder(self):
for cur_file in os.listdir(self.baseindexpath): for cur_file in os.listdir(self.baseindexpath):
if not cur_file == "old_index": if not cur_file == "old_index":

View file

@ -17,7 +17,6 @@ RSA private key, certificate messages
################################## ##################################
import os import os
import sys import sys
import time
from enum import Enum from enum import Enum
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
@ -25,7 +24,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from lib.objects.Items import Item
class KeyEnum(Enum): class KeyEnum(Enum):
@ -53,10 +52,9 @@ class Keys(AbstractModule):
def __init__(self): def __init__(self):
super(Keys, self).__init__() super(Keys, self).__init__()
# Waiting time in secondes between to message proccessed # Waiting time in seconds between to message processed
self.pending_seconds = 1 self.pending_seconds = 1
def compute(self, message): def compute(self, message):
item = Item(message) item = Item(message)
content = item.get_content() content = item.get_content()
@ -169,11 +167,12 @@ class Keys(AbstractModule):
if get_pgp_content: if get_pgp_content:
self.send_message_to_queue(item.get_id(), 'PgpDump') self.send_message_to_queue(item.get_id(), 'PgpDump')
if find : # if find :
#Send to duplicate # # Send to duplicate
self.send_message_to_queue(item.get_id(), 'Duplicate') # self.send_message_to_queue(item.get_id(), 'Duplicate')
self.redis_logger.debug(f'{item.get_id()} has key(s)') # self.redis_logger.debug(f'{item.get_id()} has key(s)')
print(f'{item.get_id()} has key(s)') # print(f'{item.get_id()} has key(s)')
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -11,7 +11,7 @@ sys.path.append(os.environ['AIL_BIN'])
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from lib.objects.Domains import Domain from lib.objects.Domains import Domain
from lib.objects.Items import Item from lib.objects.Items import Item
#from lib.ConfigLoader import ConfigLoader # from lib.ConfigLoader import ConfigLoader
class Languages(AbstractModule): class Languages(AbstractModule):
""" """
@ -31,6 +31,7 @@ class Languages(AbstractModule):
for lang in item.get_languages(min_probability=0.8): for lang in item.get_languages(min_probability=0.8):
domain.add_language(lang.language) domain.add_language(lang.language)
if __name__ == '__main__': if __name__ == '__main__':
module = Languages() module = Languages()
module.run() module.run()

View file

@ -13,12 +13,12 @@ It tries to identify SQL Injections with libinjection.
import os import os
import sys import sys
import redis
import urllib.request import urllib.request
import pylibinjection import pylibinjection
from datetime import datetime from datetime import datetime
from pyfaup.faup import Faup from pyfaup.faup import Faup
from urllib.parse import unquote
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
@ -27,7 +27,7 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
from packages.Item import Item from lib.objects.Items import Item
class LibInjection(AbstractModule): class LibInjection(AbstractModule):
"""docstring for LibInjection module.""" """docstring for LibInjection module."""
@ -43,38 +43,38 @@ class LibInjection(AbstractModule):
self.redis_logger.info(f"Module: {self.module_name} Launched") self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, message): def compute(self, message):
url, id = message.split() url, item_id = message.split()
self.faup.decode(url) self.faup.decode(url)
url_parsed = self.faup.get() url_parsed = self.faup.get()
## TODO: # FIXME: remove me # # TODO: # FIXME: remove me
try: try:
resource_path = url_parsed['resource_path'].encode() resource_path = url_parsed['resource_path'].encode()
except: except:
resource_path = url_parsed['resource_path'] resource_path = url_parsed['resource_path']
## TODO: # FIXME: remove me # # TODO: # FIXME: remove me
try: try:
query_string = url_parsed['query_string'].encode() query_string = url_parsed['query_string'].encode()
except: except:
query_string = url_parsed['query_string'] query_string = url_parsed['query_string']
result_path = {'sqli' : False} result_path = {'sqli': False}
result_query = {'sqli' : False} result_query = {'sqli': False}
if resource_path is not None: if resource_path is not None:
result_path = pylibinjection.detect_sqli(resource_path) result_path = pylibinjection.detect_sqli(resource_path)
#print(f'path is sqli : {result_path}') # print(f'path is sqli : {result_path}')
if query_string is not None: if query_string is not None:
result_query = pylibinjection.detect_sqli(query_string) result_query = pylibinjection.detect_sqli(query_string)
#print(f'query is sqli : {result_query}') # print(f'query is sqli : {result_query}')
if result_path['sqli'] is True or result_query['sqli'] is True: if result_path['sqli'] is True or result_query['sqli'] is True:
item = Item(id) item = Item(item_id)
item_id = item.get_id() item_id = item.get_id()
print(f"Detected (libinjection) SQL in URL: {item_id}") print(f"Detected (libinjection) SQL in URL: {item_id}")
print(urllib.request.unquote(url)) print(unquote(url))
to_print = f'LibInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}' to_print = f'LibInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}'
self.redis_logger.warning(to_print) self.redis_logger.warning(to_print)
@ -86,8 +86,8 @@ class LibInjection(AbstractModule):
msg = f'infoleak:automatic-detection="sql-injection";{item_id}' msg = f'infoleak:automatic-detection="sql-injection";{item_id}'
self.send_message_to_queue(msg, 'Tags') self.send_message_to_queue(msg, 'Tags')
#statistics # statistics
## TODO: # FIXME: remove me # # TODO: # FIXME: remove me
try: try:
tld = url_parsed['tld'].decode() tld = url_parsed['tld'].decode()
except: except:
@ -96,7 +96,7 @@ class LibInjection(AbstractModule):
date = datetime.now().strftime("%Y%m") date = datetime.now().strftime("%Y%m")
self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1) self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)
if __name__ == "__main__":
if __name__ == "__main__":
module = LibInjection() module = LibInjection()
module.run() module.run()

View file

@ -13,9 +13,7 @@ It apply mail regexes on item content and warn if above a threshold.
import os import os
import re import re
import redis
import sys import sys
import time
import datetime import datetime
import dns.resolver import dns.resolver
@ -52,7 +50,7 @@ class Mail(AbstractModule):
self.mail_threshold = 10 self.mail_threshold = 10
self.regex_timeout = 30 self.regex_timeout = 30
self.email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}" self.email_regex = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
re.compile(self.email_regex) re.compile(self.email_regex)
def is_mxdomain_in_cache(self, mxdomain): def is_mxdomain_in_cache(self, mxdomain):
@ -64,8 +62,8 @@ class Mail(AbstractModule):
def check_mx_record(self, set_mxdomains): def check_mx_record(self, set_mxdomains):
"""Check if emails MX domains are responding. """Check if emails MX domains are responding.
:param adress_set: -- (set) This is a set of emails domains :param set_mxdomains: -- (set) This is a set of emails domains
:return: (int) Number of adress with a responding and valid MX domains :return: (int) Number of address with a responding and valid MX domains
""" """
resolver = dns.resolver.Resolver() resolver = dns.resolver.Resolver()
@ -107,7 +105,7 @@ class Mail(AbstractModule):
self.redis_logger.debug('SyntaxError: EmptyLabel') self.redis_logger.debug('SyntaxError: EmptyLabel')
print('SyntaxError: EmptyLabel') print('SyntaxError: EmptyLabel')
except dns.resolver.NXDOMAIN: except dns.resolver.NXDOMAIN:
#save_mxdomain_in_cache(mxdomain) # save_mxdomain_in_cache(mxdomain)
self.redis_logger.debug('The query name does not exist.') self.redis_logger.debug('The query name does not exist.')
print('The query name does not exist.') print('The query name does not exist.')
except dns.name.LabelTooLong: except dns.name.LabelTooLong:
@ -115,12 +113,12 @@ class Mail(AbstractModule):
print('The Label is too long') print('The Label is too long')
except dns.exception.Timeout: except dns.exception.Timeout:
print('dns timeout') print('dns timeout')
#save_mxdomain_in_cache(mxdomain) # save_mxdomain_in_cache(mxdomain)
except Exception as e: except Exception as e:
print(e) print(e)
return valid_mxdomain return valid_mxdomain
# # TODO: sanityze mails # # TODO: sanitize mails
def compute(self, message): def compute(self, message):
item_id, score = message.split() item_id, score = message.split()
item = Item(item_id) item = Item(item_id)
@ -134,7 +132,7 @@ class Mail(AbstractModule):
mxdomains_email[mxdomain] = set() mxdomains_email[mxdomain] = set()
mxdomains_email[mxdomain].add(mail) mxdomains_email[mxdomain].add(mail)
## TODO: add MAIL trackers # # TODO: add MAIL trackers
valid_mx = self.check_mx_record(mxdomains_email.keys()) valid_mx = self.check_mx_record(mxdomains_email.keys())
print(f'valid_mx: {valid_mx}') print(f'valid_mx: {valid_mx}')
@ -144,7 +142,7 @@ class Mail(AbstractModule):
nb_mails = len(mxdomains_email[domain_mx]) nb_mails = len(mxdomains_email[domain_mx])
num_valid_email += nb_mails num_valid_email += nb_mails
# Create doamin_mail stats # Create domain_mail stats
msg = f'mail;{nb_mails};{domain_mx};{item_date}' msg = f'mail;{nb_mails};{domain_mx};{item_date}'
self.send_message_to_queue(msg, 'ModuleStats') self.send_message_to_queue(msg, 'ModuleStats')
@ -159,8 +157,8 @@ class Mail(AbstractModule):
for tld in mx_tlds: for tld in mx_tlds:
Statistics.add_module_tld_stats_by_date('mail', item_date, tld, mx_tlds[tld]) Statistics.add_module_tld_stats_by_date('mail', item_date, tld, mx_tlds[tld])
msg = f'Mails;{item.get_source()};{item_date};{item.get_basename()};Checked {num_valid_email} e-mail(s);{item_id}'
if num_valid_email > self.mail_threshold: if num_valid_email > self.mail_threshold:
msg = f'Mails;{item.get_source()};{item_date};{item.get_basename()};Checked {num_valid_email} e-mail(s);{item_id}'
print(f'{item_id} Checked {num_valid_email} e-mail(s)') print(f'{item_id} Checked {num_valid_email} e-mail(s)')
self.redis_logger.warning(msg) self.redis_logger.warning(msg)
# Tags # Tags
@ -170,8 +168,6 @@ class Mail(AbstractModule):
self.redis_logger.info(msg) self.redis_logger.info(msg)
if __name__ == '__main__': if __name__ == '__main__':
module = Mail() module = Mail()
#module.compute('tests/2021/01/01/mails.gz 50')
module.run() module.run()

View file

@ -25,12 +25,11 @@ class ModuleStats(AbstractModule):
Module Statistics module for AIL framework Module Statistics module for AIL framework
""" """
def __init__(self): def __init__(self):
super(ModuleStats, self).__init__() super(ModuleStats, self).__init__()
# Waiting time in secondes between to message proccessed # Waiting time in seconds between to message processed
self.pending_seconds = 20 self.pending_seconds = 20
def compute(self, message): def compute(self, message):
@ -38,9 +37,10 @@ class ModuleStats(AbstractModule):
# MODULE STATS # MODULE STATS
if len(message.split(';')) > 1: if len(message.split(';')) > 1:
module_name, num, keyword, date = message.split(';') module_name, num, keyword, date = message.split(';')
Statisticsupdate_module_stats(module_name, num, keyword, date) Statistics.update_module_stats(module_name, num, keyword, date)
# ITEM STATS # ITEM STATS
else: else:
item_id = message
item = Item(item_id) item = Item(item_id)
source = item.get_source() source = item.get_source()
date = item.get_date() date = item.get_date()

View file

@ -13,8 +13,6 @@ Requirements
*Need running Redis instances. (Redis) *Need running Redis instances. (Redis)
""" """
import time
import datetime
import os import os
import sys import sys
import re import re
@ -25,68 +23,8 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
from lib.objects.Items import Item
from lib import crawlers from lib import crawlers
from lib import regex_helper
from packages.Item import Item
## Manually fetch first page if crawler is disabled
# import base64
# import subprocess
#
# torclient_host = '127.0.0.1'
# torclient_port = 9050
#
# def fetch(p, r_cache, urls, domains):
# now = datetime.datetime.now()
# path = os.path.join('onions', str(now.year).zfill(4),
# str(now.month).zfill(2),
# str(now.day).zfill(2),
# str(int(time.mktime(now.utctimetuple()))))
# failed = []
# downloaded = []
# print('{} Urls to fetch'.format(len(urls)))
# for url, domain in zip(urls, domains):
# if r_cache.exists(url) or url in failed:
# continue
# to_fetch = base64.standard_b64encode(url.encode('utf8'))
# print('fetching url: {}'.format(to_fetch))
# process = subprocess.Popen(["python", './tor_fetcher.py', to_fetch],
# stdout=subprocess.PIPE)
# while process.poll() is None:
# time.sleep(1)
#
# if process.returncode == 0:
# r_cache.setbit(url, 0, 1)
# r_cache.expire(url, 360000)
# downloaded.append(url)
# print('downloaded : {}'.format(downloaded))
# '''tempfile = process.stdout.read().strip()
# tempfile = tempfile.decode('utf8')
# #with open(tempfile, 'r') as f:
# filename = path + domain + '.gz'
# fetched = f.read()
# content = base64.standard_b64decode(fetched)
# save_path = os.path.join(os.environ['AIL_HOME'],
# p.config.get("Directories", "pastes"),
# filename)
# dirname = os.path.dirname(save_path)
# if not os.path.exists(dirname):
# os.makedirs(dirname)
# with open(save_path, 'w') as ff:
# ff.write(content)
# p.populate_set_out(save_path, 'Global')
# p.populate_set_out(url, 'ValidOnion')
# p.populate_set_out(fetched, 'FetchedOnion')'''
# yield url
# #os.unlink(tempfile)
# else:
# r_cache.setbit(url, 0, 0)
# r_cache.expire(url, 3600)
# failed.append(url)
# print('Failed at downloading', url)
# print(process.stdout.read())
# print('Failed:', len(failed), 'Downloaded:', len(downloaded))
class Onion(AbstractModule): class Onion(AbstractModule):
"""docstring for Onion module.""" """docstring for Onion module."""
@ -103,68 +41,63 @@ class Onion(AbstractModule):
self.regex_timeout = 30 self.regex_timeout = 30
self.faup = crawlers.get_faup() self.faup = crawlers.get_faup()
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
# activate_crawler = p.config.get("Crawler", "activate_crawler") # activate_crawler = p.config.get("Crawler", "activate_crawler")
self.url_regex = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" self.onion_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
self.i2p_regex = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" # self.i2p_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
re.compile(self.url_regex) re.compile(self.onion_regex)
re.compile(self.i2p_regex) # re.compile(self.i2p_regex)
self.redis_logger.info(f"Module: {self.module_name} Launched") self.redis_logger.info(f"Module: {self.module_name} Launched")
# TEMP var: SAVE I2P Domain (future I2P crawler) # TEMP var: SAVE I2P Domain (future I2P crawler)
self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p") # self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p")
def compute(self, message): def compute(self, message):
# list of tuples: (url, subdomains, domain) onion_urls = []
urls_to_crawl = [] domains = []
id, score = message.split() item_id, score = message.split()
item = Item(id) item = Item(item_id)
item_content = item.get_content() item_content = item.get_content()
# max execution time on regex # max execution time on regex
res = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.url_regex, item.get_id(), item_content) res = self.regex_findall(self.onion_regex, item.get_id(), item_content)
for x in res: for x in res:
# String to tuple # String to tuple
x = x[2:-2].replace(" '", "").split("',") x = x[2:-2].replace(" '", "").split("',")
url = x[0] url = x[0]
subdomain = x[4].lower() print(url)
self.faup.decode(url)
url_unpack = self.faup.get()
try: ## TODO: # FIXME: check faup version
domain = url_unpack['domain'].decode().lower()
except Exception as e:
domain = url_unpack['domain'].lower()
# TODO Crawl subdomain
url_unpack = crawlers.unpack_url(url)
domain = url_unpack['domain']
if crawlers.is_valid_onion_domain(domain): if crawlers.is_valid_onion_domain(domain):
urls_to_crawl.append((url, subdomain, domain)) domains.append(domain)
onion_urls.append(url)
to_print = f'Onion;{item.get_source()};{item.get_date()};{item.get_basename()};' if onion_urls:
if not urls_to_crawl: if crawlers.is_crawler_activated():
self.redis_logger.info(f'{to_print}Onion related;{item.get_id()}') for domain in domains:# TODO LOAD DEFAULT SCREENSHOT + HAR
return task_uuid = crawlers.add_crawler_task(domain, parent=item.get_id())
if task_uuid:
print(f'{domain} added to crawler queue: {task_uuid}')
else:
to_print = f'Onion;{item.get_source()};{item.get_date()};{item.get_basename()};'
print(f'{to_print}Detected {len(domains)} .onion(s);{item.get_id()}')
self.redis_logger.warning(f'{to_print}Detected {len(domains)} .onion(s);{item.get_id()}')
# TAG Item # TAG Item
msg = f'infoleak:automatic-detection="onion";{item.get_id()}' msg = f'infoleak:automatic-detection="onion";{item.get_id()}'
self.send_message_to_queue(msg, 'Tags') self.send_message_to_queue(msg, 'Tags')
if crawlers.is_crawler_activated():
for to_crawl in urls_to_crawl:
print(f'{to_crawl[2]} added to crawler queue: {to_crawl[0]}')
crawlers.add_item_to_discovery_queue('onion', to_crawl[2], to_crawl[1], to_crawl[0], item.get_id())
else:
print(f'{to_print}Detected {len(urls_to_crawl)} .onion(s);{item.get_id()}')
self.redis_logger.warning(f'{to_print}Detected {len(urls_to_crawl)} .onion(s);{item.get_id()}')
# keep manual fetcher ????
## Manually fetch first page if crawler is disabled
# for url in fetch(p, r_cache, urls, domains_list):
# publisher.info('{}Checked {};{}'.format(to_print, url, PST.p_rel_path))
if __name__ == "__main__": if __name__ == "__main__":
module = Onion() module = Onion()
# module.compute('submitted/2022/10/10/submitted_705d1d92-7e9a-4a44-8c21-ccd167bfb7db.gz 9')
module.run() module.run()
# 5ajw6aqf3ep7sijnscdzw77t7xq4xjpsy335yb2wiwgouo7yfxtjlmid.onion to debian.org

View file

@ -17,7 +17,6 @@ It apply phone number regexes on item content and warn if above a threshold.
import os import os
import re import re
import sys import sys
import time
import phonenumbers import phonenumbers
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
@ -25,7 +24,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from lib.objects.Items import Item
# # TODO: # FIXME: improve regex / filter false positives # # TODO: # FIXME: improve regex / filter false positives
class Phone(AbstractModule): class Phone(AbstractModule):
@ -37,14 +36,12 @@ class Phone(AbstractModule):
# reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})') # reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
REG_PHONE = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})') REG_PHONE = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
def __init__(self): def __init__(self):
super(Phone, self).__init__() super(Phone, self).__init__()
# Waiting time in secondes between to message proccessed # Waiting time in seconds between to message processed
self.pending_seconds = 1 self.pending_seconds = 1
def compute(self, message): def compute(self, message):
item = Item(message) item = Item(message)
content = item.get_content() content = item.get_content()
@ -79,6 +76,5 @@ class Phone(AbstractModule):
if __name__ == '__main__': if __name__ == '__main__':
module = Phone() module = Phone()
module.run() module.run()

View file

@ -14,11 +14,11 @@ It test different possibility to makes some sqlInjection.
import os import os
import sys import sys
import re import re
import redis
import urllib.request import urllib.request
from datetime import datetime from datetime import datetime
from pyfaup.faup import Faup from pyfaup.faup import Faup
from urllib.parse import unquote
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
@ -26,7 +26,7 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
from packages.Item import Item from lib.objects.Items import Item
class SQLInjectionDetection(AbstractModule): class SQLInjectionDetection(AbstractModule):
"""docstring for SQLInjectionDetection module.""" """docstring for SQLInjectionDetection module."""
@ -46,13 +46,13 @@ class SQLInjectionDetection(AbstractModule):
self.redis_logger.info(f"Module: {self.module_name} Launched") self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, message): def compute(self, message):
url, id = message.split() url, item_id = message.split()
if self.is_sql_injection(url): if self.is_sql_injection(url):
self.faup.decode(url) self.faup.decode(url)
url_parsed = self.faup.get() url_parsed = self.faup.get()
item = Item(id) item = Item(item_id)
item_id = item.get_id() item_id = item.get_id()
print(f"Detected SQL in URL: {item_id}") print(f"Detected SQL in URL: {item_id}")
print(urllib.request.unquote(url)) print(urllib.request.unquote(url))
@ -69,7 +69,7 @@ class SQLInjectionDetection(AbstractModule):
# statistics # statistics
tld = url_parsed['tld'] tld = url_parsed['tld']
if tld is not None: if tld is not None:
## TODO: # FIXME: remove me # # TODO: # FIXME: remove me
try: try:
tld = tld.decode() tld = tld.decode()
except: except:
@ -77,15 +77,13 @@ class SQLInjectionDetection(AbstractModule):
date = datetime.now().strftime("%Y%m") date = datetime.now().strftime("%Y%m")
self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1) self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)
# Try to detect if the url passed might be an sql injection by appliying the regex # Try to detect if the url passed might be an sql injection by applying the regex
# defined above on it. # defined above on it.
def is_sql_injection(self, url_parsed): def is_sql_injection(self, url_parsed):
line = urllib.request.unquote(url_parsed) line = unquote(url_parsed)
return re.search(SQLInjectionDetection.SQLI_REGEX, line, re.I) is not None return re.search(SQLInjectionDetection.SQLI_REGEX, line, re.I) is not None
if __name__ == "__main__": if __name__ == "__main__":
module = SQLInjectionDetection() module = SQLInjectionDetection()
module.run() module.run()

View file

@ -34,9 +34,8 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages import Paste from lib.objects.Items import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) from lib import ConfigLoader
import ConfigLoader
class TimeoutException(Exception): class TimeoutException(Exception):
@ -53,12 +52,10 @@ class SentimentAnalysis(AbstractModule):
SentimentAnalysis module for AIL framework SentimentAnalysis module for AIL framework
""" """
# Config Variables # Config Variables
accepted_Mime_type = ['text/plain'] accepted_Mime_type = ['text/plain']
line_max_length_threshold = 1000 line_max_length_threshold = 1000
def __init__(self): def __init__(self):
super(SentimentAnalysis, self).__init__() super(SentimentAnalysis, self).__init__()
@ -75,7 +72,6 @@ class SentimentAnalysis(AbstractModule):
# Send module state to logs # Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized") self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message): def compute(self, message):
# Max time to compute one entry # Max time to compute one entry
signal.alarm(60) signal.alarm(60)
@ -87,16 +83,31 @@ class SentimentAnalysis(AbstractModule):
else: else:
signal.alarm(0) signal.alarm(0)
def get_p_content_with_removed_lines(self, threshold, item_content):
num_line_removed = 0
line_length_threshold = threshold
string_content = ""
f = item_content
for line_id, line in enumerate(f):
length = len(line)
if length < line_length_threshold:
string_content += line
else:
num_line_removed += 1
return num_line_removed, string_content
def analyse(self, message): def analyse(self, message):
paste = Paste.Paste(message) item = Item(message)
# get content with removed line + number of them # get content with removed line + number of them
num_line_removed, p_content = paste.get_p_content_with_removed_lines(SentimentAnalysis.line_max_length_threshold) num_line_removed, p_content = self.get_p_content_with_removed_lines(SentimentAnalysis.line_max_length_threshold,
provider = paste.p_source item.get_content())
p_date = str(paste._get_p_date()) provider = item.get_source()
p_MimeType = paste._get_p_encoding() p_date = item.get_date()
p_MimeType = item.get_mimetype()
# Perform further analysis # Perform further analysis
if p_MimeType == "text/plain": if p_MimeType == "text/plain":

View file

@ -20,7 +20,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from lib.objects.Items import Item
from lib import Tag from lib import Tag
@ -32,13 +32,12 @@ class Tags(AbstractModule):
def __init__(self): def __init__(self):
super(Tags, self).__init__() super(Tags, self).__init__()
# Waiting time in secondes between to message proccessed # Waiting time in seconds between to message processed
self.pending_seconds = 10 self.pending_seconds = 10
# Send module state to logs # Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized') self.redis_logger.info(f'Module {self.module_name} initialized')
def compute(self, message): def compute(self, message):
# Extract item ID and tag from message # Extract item ID and tag from message
mess_split = message.split(';') mess_split = message.split(';')
@ -62,6 +61,5 @@ class Tags(AbstractModule):
if __name__ == '__main__': if __name__ == '__main__':
module = Tags() module = Tags()
module.run() module.run()

View file

@ -16,7 +16,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from lib.objects.Items import Item
from lib import regex_helper from lib import regex_helper
from lib import telegram from lib import telegram
@ -78,7 +78,7 @@ class Telegram(AbstractModule):
# CREATE TAG # CREATE TAG
if invite_code_found: if invite_code_found:
#tags # tags
msg = f'infoleak:automatic-detection="telegram-invite-hash";{item.get_id()}' msg = f'infoleak:automatic-detection="telegram-invite-hash";{item.get_id()}'
self.send_message_to_queue(msg, 'Tags') self.send_message_to_queue(msg, 'Tags')

View file

@ -13,7 +13,6 @@ This module extract URLs from an item and send them to others modules.
# Import External packages # Import External packages
################################## ##################################
import os import os
import re
import sys import sys
from pyfaup.faup import Faup from pyfaup.faup import Faup
@ -23,8 +22,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from lib.objects.Items import Item
from lib import regex_helper
# # TODO: Faup packages: Add new binding: Check TLD # # TODO: Faup packages: Add new binding: Check TLD
@ -40,7 +38,6 @@ class Urls(AbstractModule):
super(Urls, self).__init__() super(Urls, self).__init__()
self.faup = Faup() self.faup = Faup()
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
# Protocol file path # Protocol file path
protocolsfile_path = os.path.join(os.environ['AIL_HOME'], protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
@ -58,21 +55,26 @@ class Urls(AbstractModule):
# Send module state to logs # Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized") self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message): def compute(self, message):
""" """
Search for Web links from given message Search for Web links from given message
""" """
# Extract item # Extract item
id, score = message.split() item_id, score = message.split()
item = Item(id) item = Item(item_id)
item_content = item.get_content() item_content = item.get_content()
l_urls = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.url_regex, item.get_id(), item_content) # TODO Handle invalid URL
l_urls = self.regex_findall(self.url_regex, item.get_id(), item_content)
for url in l_urls: for url in l_urls:
self.faup.decode(url) self.faup.decode(url)
unpack_url = self.faup.get() url_decoded = self.faup.get()
# decode URL
try:
url = url_decoded['url'].decode()
except AttributeError:
url = url_decoded['url']
to_send = f"{url} {item.get_id()}" to_send = f"{url} {item.get_id()}"
print(to_send) print(to_send)
@ -83,7 +85,7 @@ class Urls(AbstractModule):
to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};' to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};'
self.redis_logger.info(f'{to_print}Detected {len(l_urls)} URL;{item.get_id()}') self.redis_logger.info(f'{to_print}Detected {len(l_urls)} URL;{item.get_id()}')
if __name__ == '__main__':
if __name__ == '__main__':
module = Urls() module = Urls()
module.run() module.run()

View file

@ -11,9 +11,8 @@ This module spots zerobins-like services for further processing
################################## ##################################
import os import os
import sys import sys
import time
import pdb
import re import re
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
@ -42,33 +41,31 @@ class Zerobins(AbstractModule):
# Send module state to logs # Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized') self.redis_logger.info(f'Module {self.module_name} initialized')
def computeNone(self): def computeNone(self):
""" """
Compute when no message in queue Compute when no message in queue
""" """
self.redis_logger.debug("No message in queue") self.redis_logger.debug("No message in queue")
def compute(self, message): def compute(self, message):
"""regex_helper.regex_findall(self.module_name, self.redis_cache_key """
Compute a message in queue Compute a message in queue
""" """
print(message) url, item_id = message.split()
url, id = message.split()
# Extract zerobins addresses # Extract zerobins addresses
matching_binz = self.regex_findall(self.regex, id, url) matching_binz = self.regex_findall(self.regex, item_id, url)
if len(matching_binz) > 0: if len(matching_binz) > 0:
for bin in matching_binz: for bin_url in matching_binz:
print("send {} to crawler".format(bin)) print(f'send {bin_url} to crawler')
crawlers.create_crawler_task(bin, screenshot=False, har=False, depth_limit=1, max_pages=1, auto_crawler=False, crawler_delta=3600, crawler_type=None, cookiejar_uuid=None, user_agent=None) crawlers.add_crawler_task(bin_url, depth=0, har=False, screenshot=False, proxy='force_tor',
parent='manual', priority=10)
self.redis_logger.debug("Compute message in queue") self.redis_logger.debug("Compute message in queue")
# TODO TEST ME
if __name__ == '__main__': if __name__ == '__main__':
module = Zerobins() module = Zerobins()
module.run() module.run()

View file

@ -59,6 +59,9 @@ class AbstractModule(ABC):
# Setup the I/O queues # Setup the I/O queues
self.process = Process(self.queue_name) self.process = Process(self.queue_name)
# Debug Mode
self.debug = False
def get_message(self): def get_message(self):
""" """
Get message from the Redis Queue (QueueIn) Get message from the Redis Queue (QueueIn)
@ -104,6 +107,8 @@ class AbstractModule(ABC):
# Module processing with the message from the queue # Module processing with the message from the queue
self.compute(message) self.compute(message)
except Exception as err: except Exception as err:
if self.debug:
raise err
trace = traceback.format_tb(err.__traceback__) trace = traceback.format_tb(err.__traceback__)
trace = ''.join(trace) trace = ''.join(trace)
self.redis_logger.critical(f"Error in module {self.module_name}: {err}") self.redis_logger.critical(f"Error in module {self.module_name}: {err}")

View file

@ -16,7 +16,6 @@ import os
import sys import sys
import gzip import gzip
import io import io
import redis
import base64 import base64
import datetime import datetime
import time import time
@ -51,6 +50,7 @@ class SubmitPaste(AbstractModule):
""" """
super(SubmitPaste, self).__init__(queue_name='submit_paste') super(SubmitPaste, self).__init__(queue_name='submit_paste')
# TODO KVROCKS
self.r_serv_db = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_DB") self.r_serv_db = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_DB")
self.r_serv_log_submit = ConfigLoader.ConfigLoader().get_redis_conn("Redis_Log_submit") self.r_serv_log_submit = ConfigLoader.ConfigLoader().get_redis_conn("Redis_Log_submit")
self.r_serv_tags = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Tags") self.r_serv_tags = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Tags")
@ -61,7 +61,6 @@ class SubmitPaste(AbstractModule):
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], ConfigLoader.ConfigLoader().get_config_str("Directories", "pastes")) + '/' self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], ConfigLoader.ConfigLoader().get_config_str("Directories", "pastes")) + '/'
def compute(self, uuid): def compute(self, uuid):
""" """
Main method of the Module to implement Main method of the Module to implement
@ -129,7 +128,6 @@ class SubmitPaste(AbstractModule):
self.redis_logger.debug(f'{self.module_name}, waiting for new message, Idling {self.pending_seconds}s') self.redis_logger.debug(f'{self.module_name}, waiting for new message, Idling {self.pending_seconds}s')
time.sleep(self.pending_seconds) time.sleep(self.pending_seconds)
def _manage_text(self, uuid, paste_content, ltags, ltagsgalaxies, source): def _manage_text(self, uuid, paste_content, ltags, ltagsgalaxies, source):
""" """
Create a paste for given text Create a paste for given text
@ -141,7 +139,6 @@ class SubmitPaste(AbstractModule):
else: else:
self.abord_file_submission(uuid, f'Text size is over {SubmitPaste.TEXT_MAX_SIZE} bytes') self.abord_file_submission(uuid, f'Text size is over {SubmitPaste.TEXT_MAX_SIZE} bytes')
def _manage_file(self, uuid, file_full_path, ltags, ltagsgalaxies, source): def _manage_file(self, uuid, file_full_path, ltags, ltagsgalaxies, source):
""" """
Create a paste for given file Create a paste for given file
@ -230,7 +227,6 @@ class SubmitPaste(AbstractModule):
else: else:
self.abord_file_submission(uuid, "Server Error, the archive can't be found") self.abord_file_submission(uuid, "Server Error, the archive can't be found")
def _is_compressed_type(self, file_type): def _is_compressed_type(self, file_type):
""" """
Check if file type is in the list of compressed file extensions format Check if file type is in the list of compressed file extensions format
@ -239,7 +235,6 @@ class SubmitPaste(AbstractModule):
return file_type in compressed_type return file_type in compressed_type
def remove_submit_uuid(self, uuid): def remove_submit_uuid(self, uuid):
# save temp value on disk # save temp value on disk
self.r_serv_db.delete(f'{uuid}:ltags') self.r_serv_db.delete(f'{uuid}:ltags')
@ -262,7 +257,6 @@ class SubmitPaste(AbstractModule):
self.redis_logger.debug(f'{uuid} all file submitted') self.redis_logger.debug(f'{uuid} all file submitted')
print(f'{uuid} all file submitted') print(f'{uuid} all file submitted')
def create_paste(self, uuid, paste_content, ltags, ltagsgalaxies, name, source=None): def create_paste(self, uuid, paste_content, ltags, ltagsgalaxies, name, source=None):
# # TODO: Use Item create # # TODO: Use Item create
@ -272,8 +266,8 @@ class SubmitPaste(AbstractModule):
source = source if source else 'submitted' source = source if source else 'submitted'
save_path = source + '/' + now.strftime("%Y") + '/' + now.strftime("%m") + '/' + now.strftime("%d") + '/submitted_' + name + '.gz' save_path = source + '/' + now.strftime("%Y") + '/' + now.strftime("%m") + '/' + now.strftime("%d") + '/submitted_' + name + '.gz'
full_path = filename = os.path.join(os.environ['AIL_HOME'], full_path = os.path.join(os.environ['AIL_HOME'],
self.process.config.get("Directories", "pastes"), save_path) self.process.config.get("Directories", "pastes"), save_path)
self.redis_logger.debug(f'file path of the paste {full_path}') self.redis_logger.debug(f'file path of the paste {full_path}')
@ -281,7 +275,7 @@ class SubmitPaste(AbstractModule):
# file not exists in AIL paste directory # file not exists in AIL paste directory
self.redis_logger.debug(f"new paste {paste_content}") self.redis_logger.debug(f"new paste {paste_content}")
gzip64encoded = self._compress_encode_content(paste_content) gzip64encoded = self._compress_encode_content(paste_content, uuid)
if gzip64encoded: if gzip64encoded:
@ -321,36 +315,30 @@ class SubmitPaste(AbstractModule):
return result return result
def _compress_encode_content(self, content, uuid):
def _compress_encode_content(self, content):
gzip64encoded = None gzip64encoded = None
try: try:
gzipencoded = gzip.compress(content) gzipencoded = gzip.compress(content)
gzip64encoded = base64.standard_b64encode(gzipencoded).decode() gzip64encoded = base64.standard_b64encode(gzipencoded).decode()
except: except:
self.abord_file_submission(uuid, "file error") self.abord_file_submission(uuid, "file error")
return gzip64encoded return gzip64encoded
def addError(self, uuid, errorMessage): def addError(self, uuid, errorMessage):
self.redis_logger.debug(errorMessage) self.redis_logger.debug(errorMessage)
print(errorMessage) print(errorMessage)
error = self.r_serv_log_submit.get(f'{uuid}:error') error = self.r_serv_log_submit.get(f'{uuid}:error')
if error != None: if error is not None:
self.r_serv_log_submit.set(f'{uuid}:error', error + '<br></br>' + errorMessage) self.r_serv_log_submit.set(f'{uuid}:error', error + '<br></br>' + errorMessage)
self.r_serv_log_submit.incr(f'{uuid}:nb_end') self.r_serv_log_submit.incr(f'{uuid}:nb_end')
def abord_file_submission(self, uuid, errorMessage): def abord_file_submission(self, uuid, errorMessage):
self.redis_logger.debug(f'abord {uuid}, {errorMessage}') self.redis_logger.debug(f'abord {uuid}, {errorMessage}')
self.addError(uuid, errorMessage) self.addError(uuid, errorMessage)
self.r_serv_log_submit.set(f'{uuid}:end', 1) self.r_serv_log_submit.set(f'{uuid}:end', 1)
curr_date = datetime.date.today() curr_date = datetime.date.today()
self.serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'submit_abord', 1) self.serv_statistics.hincrby(curr_date.strftime("%Y%m%d"), 'submit_abord', 1)
self.remove_submit_uuid(uuid) self.remove_submit_uuid(uuid)
# # TODO: use Item function # # TODO: use Item function
@ -358,14 +346,13 @@ class SubmitPaste(AbstractModule):
l_directory = item_filename.split('/') l_directory = item_filename.split('/')
return f'{l_directory[-4]}{l_directory[-3]}{l_directory[-2]}' return f'{l_directory[-4]}{l_directory[-3]}{l_directory[-2]}'
def verify_extention_filename(self, filename): def verify_extention_filename(self, filename):
if not '.' in filename: if not '.' in filename:
return True return True
else: else:
file_type = filename.rsplit('.', 1)[1] file_type = filename.rsplit('.', 1)[1]
#txt file # txt file
if file_type in SubmitPaste.ALLOWED_EXTENSIONS: if file_type in SubmitPaste.ALLOWED_EXTENSIONS:
return True return True
else: else:
@ -373,6 +360,5 @@ class SubmitPaste(AbstractModule):
if __name__ == '__main__': if __name__ == '__main__':
module = SubmitPaste() module = SubmitPaste()
module.run() module.run()

View file

@ -153,6 +153,9 @@ def sanitise_date_range(date_from, date_to, separator='', date_type='str'):
date_from = date_to date_from = date_to
elif not date_to and date_from: elif not date_to and date_from:
date_to = date_from date_to = date_from
elif not date_to and not date_from:
date = datetime.date.today().strftime("%Y%m%d")
return {"date_from": date, "date_to": date}
if date_type=='str': if date_type=='str':
# remove separators # remove separators

View file

@ -320,39 +320,6 @@ class HiddenServices(object):
har_path = os.path.join(self.screenshot_directory, item_path) + '.json' har_path = os.path.join(self.screenshot_directory, item_path) + '.json'
return har_path return har_path
def create_domain_basic_archive(self, l_pastes):
all_har = self.get_all_har(l_pastes, filename=True)
all_screenshot = self.get_all_domain_screenshot(l_pastes, filename=True)
all_items = self.get_all_item_full_path(l_pastes, filename=True)
# try:
# zip buffer
zip_buffer = BytesIO()
with zipfile.ZipFile(zip_buffer, "a") as zf:
#print(all_har)
self.write_in_zip_buffer(zf, all_har)
self.write_in_zip_buffer(zf, all_screenshot)
self.write_in_zip_buffer(zf, all_items)
# write map url
map_file_content = self.get_metadata_file(l_pastes).encode()
zf.writestr( '_URL_MAP_', BytesIO(map_file_content).getvalue())
zip_buffer.seek(0)
return zip_buffer
# except Exception as e:
# print(e)
# return 'Server Error'
def write_in_zip_buffer(self, zf, list_file):
for file_path, file_name in list_file:
with open(file_path, "rb") as f:
har_content = f.read()
zf.writestr( file_name, BytesIO(har_content).getvalue())
def get_metadata_file(self, list_items): def get_metadata_file(self, list_items):
file_content = '' file_content = ''

View file

@ -20,7 +20,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
# from lib.objects.Items import Item
class Template(AbstractModule): class Template(AbstractModule):
""" """
@ -36,19 +36,20 @@ class Template(AbstractModule):
# Send module state to logs # Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized') self.redis_logger.info(f'Module {self.module_name} initialized')
def computeNone(self): def computeNone(self):
""" """
Compute when no message in queue Do something when there is no message in the queue
""" """
self.redis_logger.debug("No message in queue") self.redis_logger.debug("No message in queue")
def compute(self, message): def compute(self, message):
""" """
Compute a message in queue Compute a message in queue / process the message (item_id, ...)
""" """
self.redis_logger.debug("Compute message in queue") self.redis_logger.debug("Compute message in queue")
# # if message is an item_id:
# item = Item(message)
# content = item.get_content()
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -1,71 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import socks
import socket
import urllib.request
import io
import gzip
import base64
import sys
import tempfile
# Max size in Mb
max_size = 5
def create_connection(address, timeout=None, source_address=None):
sock = socks.socksocket()
sock.connect(address)
return sock
def get_page(url, torclient_host='127.0.0.1', torclient_port=9050):
request = urllib.request.Request(url)
# UA of the Tor browser bundle
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0')
return urllib.request.urlopen(request, timeout=5).read(max_size * 100000)
#FIXME don't work at all
def makegzip64(s):
out = io.BytesIO()
with gzip.GzipFile(fileobj=out, mode='ab') as fo:
fo.write(base64.standard_b64encode(s))
return out.getvalue()
if __name__ == "__main__":
if len(sys.argv) != 2:
print('usage:', 'tor_fetcher.py', 'URL (base64 encoded)')
exit(1)
try:
url = base64.standard_b64decode(sys.argv[1]).decode('utf8')
print(url)
except:
print('unable to decode')
exit(1)
torclient_host = '127.0.0.1'
torclient_port = 9050
# Setup Proxy
socks.set_default_proxy(socks.SOCKS5, torclient_host, torclient_port, True)
socket.socket = socks.socksocket
socket.create_connection = create_connection
try:
page = get_page(url)
except:
print('unable to fetch')
exit(1)
to_write = makegzip64(page)
t, path = tempfile.mkstemp()
#with open(path, 'w') as f:
#f.write(to_write)
print(path)
exit(0)

View file

@ -1,328 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import uuid
import datetime
import redis
import json
import time
from hashlib import sha256
from scrapy.spidermiddlewares.httperror import HttpError
from twisted.internet.error import DNSLookupError
from twisted.internet.error import TimeoutError
from twisted.web._newclient import ResponseNeverReceived
from scrapy import Spider
from scrapy.linkextractors import LinkExtractor
from scrapy.crawler import CrawlerProcess, Crawler
from scrapy_splash import SplashRequest, SplashJsonResponse
sys.path.append(os.environ['AIL_BIN'])
from Helper import Process
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import ConfigLoader
import Screenshot
import crawlers
script_cookie = """
function main(splash, args)
-- Default values
splash.js_enabled = true
splash.private_mode_enabled = true
splash.images_enabled = true
splash.webgl_enabled = true
splash.media_source_enabled = true
-- Force enable things
splash.plugins_enabled = true
splash.request_body_enabled = true
splash.response_body_enabled = true
splash.indexeddb_enabled = true
splash.html5_media_enabled = true
splash.http2_enabled = true
-- User Agent
splash:set_user_agent(args.user_agent)
-- User defined
splash.resource_timeout = args.resource_timeout
splash.timeout = args.timeout
-- Allow to pass cookies
splash:init_cookies(args.cookies)
-- Run
ok, reason = splash:go{args.url}
if not ok and not reason:find("http") then
return {
error = reason,
last_url = splash:url()
}
end
if reason == "http504" then
splash:set_result_status_code(504)
return ''
end
splash:wait{args.wait}
-- Page instrumentation
-- splash.scroll_position = {y=1000}
-- splash:wait{args.wait}
-- Response
return {
har = splash:har(),
html = splash:html(),
png = splash:png{render_all=true},
cookies = splash:get_cookies(),
last_url = splash:url(),
}
end
"""
class TorSplashCrawler():
def __init__(self, splash_url, crawler_options):
self.process = CrawlerProcess({'LOG_ENABLED': True})
self.crawler = Crawler(self.TorSplashSpider, {
'USER_AGENT': crawler_options['user_agent'], # /!\ overwritten by lua script
'SPLASH_URL': splash_url,
'ROBOTSTXT_OBEY': False,
'DOWNLOADER_MIDDLEWARES': {'scrapy_splash.SplashCookiesMiddleware': 723,
'scrapy_splash.SplashMiddleware': 725,
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,
},
'SPIDER_MIDDLEWARES': {'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,},
'DUPEFILTER_CLASS': 'scrapy_splash.SplashAwareDupeFilter',
'HTTPERROR_ALLOW_ALL': True,
'RETRY_TIMES': 2,
'CLOSESPIDER_PAGECOUNT': crawler_options['closespider_pagecount'],
'DEPTH_LIMIT': crawler_options['depth_limit'],
'SPLASH_COOKIES_DEBUG': False
})
def crawl(self, splash_url, type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item):
self.process.crawl(self.crawler, splash_url=splash_url, type=type, crawler_options=crawler_options, date=date, requested_mode=requested_mode, url=url, domain=domain, port=port, cookies=cookies, original_item=original_item)
self.process.start()
class TorSplashSpider(Spider):
name = 'TorSplashSpider'
def __init__(self, splash_url, type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item, *args, **kwargs):
self.splash_url = splash_url
self.domain_type = type
self.requested_mode = requested_mode
self.original_item = original_item
self.root_key = None
self.start_urls = url
self.domains = [domain]
self.port = str(port)
date_str = '{}/{}/{}'.format(date['date_day'][0:4], date['date_day'][4:6], date['date_day'][6:8])
self.full_date = date['date_day']
self.date_month = date['date_month']
self.date_epoch = int(date['epoch'])
self.user_agent = crawler_options['user_agent']
self.png = crawler_options['png']
self.har = crawler_options['har']
self.cookies = cookies
config_section = 'Crawler'
self.p = Process(config_section)
self.item_dir = os.path.join(self.p.config.get("Directories", "crawled"), date_str )
config_loader = ConfigLoader.ConfigLoader()
self.har_dir = os.path.join(config_loader.get_files_directory('har') , date_str )
config_loader = None
self.r_serv_log_submit = redis.StrictRedis(
host=self.p.config.get("Redis_Log_submit", "host"),
port=self.p.config.getint("Redis_Log_submit", "port"),
db=self.p.config.getint("Redis_Log_submit", "db"),
decode_responses=True)
self.root_key = None
def build_request_arg(self, cookies):
return {'wait': 10,
'resource_timeout': 30, # /!\ Weird behaviour if timeout < resource_timeout /!\
'timeout': 30,
'user_agent': self.user_agent,
'cookies': cookies,
'lua_source': script_cookie
}
def start_requests(self):
l_cookies = self.build_request_arg(self.cookies)
yield SplashRequest(
self.start_urls,
self.parse,
errback=self.errback_catcher,
endpoint='execute',
meta={'father': self.original_item, 'current_url': self.start_urls},
args=l_cookies
)
# # TODO: remove duplicate and anchor
def parse(self,response):
#print(response.headers)
#print(response.status)
#print(response.meta)
#print(response.data) # # TODO: handle lua script error
#{'type': 'ScriptError', 'info': {'error': "'}' expected (to close '{' at line 47) near 'error_retry'",
#'message': '[string "..."]:53: \'}\' expected (to close \'{\' at line 47) near \'error_retry\'',
#'type': 'LUA_INIT_ERROR', 'source': '[string "..."]', 'line_number': 53},
#'error': 400, 'description': 'Error happened while executing Lua script'}
if response.status == 504:
# no response
#print('504 detected')
pass
# LUA ERROR # # TODO: logs errors
elif 'error' in response.data:
if(response.data['error'] == 'network99'):
## splash restart ##
error_retry = response.meta.get('error_retry', 0)
if error_retry < 3:
error_retry += 1
url = response.data['last_url']
father = response.meta['father']
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url)
time.sleep(10)
if 'cookies' in response.data:
all_cookies = response.data['cookies'] # # TODO: use initial cookie ?????
else:
all_cookies = []
l_cookies = self.build_request_arg(all_cookies)
yield SplashRequest(
url,
self.parse,
errback=self.errback_catcher,
endpoint='execute',
dont_filter=True,
meta={'father': father, 'current_url': url, 'error_retry': error_retry},
args=l_cookies
)
else:
if self.requested_mode == 'test':
crawlers.save_test_ail_crawlers_result(False, 'Connection to proxy refused')
print('Connection to proxy refused')
elif response.data['error'] == 'network3':
if self.requested_mode == 'test':
crawlers.save_test_ail_crawlers_result(False, 'HostNotFoundError: the remote host name was not found (invalid hostname)')
print('HostNotFoundError: the remote host name was not found (invalid hostname)')
else:
if self.requested_mode == 'test':
crawlers.save_test_ail_crawlers_result(False, response.data['error'])
print(response.data['error'])
elif response.status != 200:
print('other response: {}'.format(response.status))
# detect connection to proxy refused
error_log = (json.loads(response.body.decode()))
print(error_log)
#elif crawlers.is_redirection(self.domains[0], response.data['last_url']):
# pass # ignore response
else:
## TEST MODE ##
if self.requested_mode == 'test':
if 'It works!' in response.data['html']:
crawlers.save_test_ail_crawlers_result(True, 'It works!')
else:
print('TEST ERROR')
crawlers.save_test_ail_crawlers_result(False, 'TEST ERROR')
return
## -- ##
item_id = crawlers.create_item_id(self.item_dir, self.domains[0])
self.save_crawled_item(item_id, response.data['html'])
crawlers.create_item_metadata(item_id, self.domains[0], response.data['last_url'], self.port, response.meta['father'])
if self.root_key is None:
self.root_key = item_id
crawlers.add_domain_root_item(item_id, self.domain_type, self.domains[0], self.date_epoch, self.port)
crawlers.create_domain_metadata(self.domain_type, self.domains[0], self.port, self.full_date, self.date_month)
if 'cookies' in response.data:
all_cookies = response.data['cookies']
else:
all_cookies = []
# SCREENSHOT
if 'png' in response.data and self.png:
sha256_string = Screenshot.save_crawled_screeshot(response.data['png'], 5000000, f_save=self.requested_mode)
if sha256_string:
Screenshot.save_item_relationship(sha256_string, item_id)
Screenshot.save_domain_relationship(sha256_string, self.domains[0])
# HAR
if 'har' in response.data and self.har:
crawlers.save_har(self.har_dir, item_id, response.data['har'])
le = LinkExtractor(allow_domains=self.domains, unique=True)
for link in le.extract_links(response):
l_cookies = self.build_request_arg(all_cookies)
yield SplashRequest(
link.url,
self.parse,
errback=self.errback_catcher,
endpoint='execute',
meta={'father': item_id, 'current_url': link.url},
args=l_cookies
)
def errback_catcher(self, failure):
# catch all errback failures,
self.logger.error(repr(failure))
if failure.check(ResponseNeverReceived):
## DEBUG ##
self.logger.error(failure.request)
if failure.value.response:
self.logger.error(failure.value.response)
## ----- ##
# Extract request metadata
url = failure.request.meta['current_url']
father = failure.request.meta['father']
l_cookies = self.build_request_arg(failure.request.meta['splash']['args']['cookies'])
# Check if Splash restarted
if not crawlers.is_splash_reachable(self.splash_url):
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 30s ...', url)
time.sleep(30)
yield SplashRequest(
url,
self.parse,
errback=self.errback_catcher,
endpoint='execute',
meta={'father': father, 'current_url': url},
args=l_cookies
)
else:
self.logger.error(failure.type)
self.logger.error(failure.getErrorMessage())
def save_crawled_item(self, item_id, item_content):
gzip64encoded = crawlers.save_crawled_item(item_id, item_content)
# Send item to queue
# send paste to Global
relay_message = "{0} {1}".format(item_id, gzip64encoded)
self.p.populate_set_out(relay_message, 'Mixer')
# increase nb of paste by feeder name
self.r_serv_log_submit.hincrby("mixer_cache:list_feeder", "crawler", 1)
# tag crawled paste
msg = 'infoleak:submission="crawler";{}'.format(item_id)
self.p.populate_set_out(msg, 'Tags')

View file

@ -1,80 +0,0 @@
#!/bin/bash
issplashed=`screen -ls | egrep '[0-9]+.Docker_Splash' | cut -d. -f1`
usage() { echo "Usage: sudo $0 [-f <config_absolute_path>] [-p <port_start>] [-n <number_of_splash_servers>]" 1>&2;
echo " -f: absolute path to splash docker proxy-profiles directory (used for proxy configuration)";
echo " -p: number of the first splash server port number. This number is incremented for the others splash server";
echo " -n: number of splash servers to start";
echo "";
echo " -options:";
echo " -u: max unbound in-memory cache (Mb, Restart Splash when full, default=3000 Mb)";
echo "";
echo "example:";
echo "sudo ./launch_splash_crawler.sh -f /home/my_user/AIL-framework/configs/docker/splash_onion/etc/splash/proxy-profiles/ -p 8050 -n 3";
exit 1;
}
while getopts ":p:f:n:u:" o; do
case "${o}" in
p)
p=${OPTARG}
;;
f)
f=${OPTARG}
;;
n)
n=${OPTARG}
;;
u)
u=${OPTARG}
;;
*)
usage
;;
esac
done
shift $((OPTIND-1))
if [ -z "${p}" ] || [ -z "${f}" ] || [ -z "${n}" ]; then
usage;
fi
RED="\\033[1;31m"
DEFAULT="\\033[0;39m"
GREEN="\\033[1;32m"
WHITE="\\033[0;02m"
if [ "$EUID" -ne 0 ]; then
echo -e $RED"\t* Please run as root or sudo.\n"$DEFAULT
exit 1
fi
if [ ! -d "${f}" ]; then
printf "$RED\n Error -f, proxy-profiles directory: $WHITE${f}$RED not found\n$DEFAULT Please check if you enter the correct path\n"
exit 1
fi
if [ ! -f "${f}default.ini" ]; then
printf "$RED\n Error -f, proxy configuration file:$WHITE default.ini$RED not found\n$DEFAULT Please check if you enter the correct path\n"
exit 1
fi
if [[ $issplashed ]]; then
echo -e $RED"\t* A screen is already launched, please kill it before creating another one."$DEFAULT
exit 1
fi
if [ -z "${u}" ]; then
u=3000;
fi
screen -dmS "Docker_Splash"
sleep 0.1
for ((i=0;i<=$((${n} - 1));i++)); do
port_number=$((${p} + $i))
screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -d -p '$port_number':8050 --restart=always --cpus=1 --memory=2G -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash --maxrss '$u'; read x'
sleep 0.1
printf "$GREEN Splash server launched on port $port_number$DEFAULT\n"
done

View file

@ -1,53 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import json
import redis
from TorSplashCrawler import TorSplashCrawler
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import crawlers
if __name__ == '__main__':
if len(sys.argv) != 2:
print('usage:', 'tor_crawler.py', 'uuid')
exit(1)
config_loader = ConfigLoader.ConfigLoader()
redis_cache = config_loader.get_redis_conn("Redis_Cache")
config_loader = None
# get crawler config key
uuid = sys.argv[1]
# get configs
crawler_json = json.loads(redis_cache.get('crawler_request:{}'.format(uuid)))
splash_url = crawler_json['splash_url']
service_type = crawler_json['service_type']
url = crawler_json['url']
domain = crawler_json['domain']
port = crawler_json['port']
original_item = crawler_json['item']
crawler_options = crawler_json['crawler_options']
date = crawler_json['date']
requested_mode = crawler_json['requested']
if crawler_options['cookiejar_uuid']:
cookies = crawlers.load_crawler_cookies(crawler_options['cookiejar_uuid'], domain, crawler_type=service_type)
else:
cookies = []
redis_cache.delete('crawler_request:{}'.format(uuid))
try:
crawler = TorSplashCrawler(splash_url, crawler_options)
crawler.crawl(splash_url, service_type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item)
except Exception as e:
print(e)
print(e, file=sys.stderr)

View file

@ -10,7 +10,6 @@ The Retro_Hunt trackers module
# Import External packages # Import External packages
################################## ##################################
import os import os
import re
import sys import sys
import time import time
import yara import yara
@ -20,15 +19,15 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from lib.objects.Items import Item
from packages.Item import Date from packages import Date
from lib import Tracker from lib import Tracker
import NotificationHelper # # TODO: refractor import NotificationHelper # # TODO: refractor
class Retro_Hunt(AbstractModule): class Retro_Hunt(AbstractModule):
#mail_body_template = "AIL Framework,\nNew YARA match: {}\nitem id: {}\nurl: {}{}" # mail_body_template = "AIL Framework,\nNew YARA match: {}\nitem id: {}\nurl: {}{}"
""" """
Retro_Hunt module for AIL framework Retro_Hunt module for AIL framework
@ -39,9 +38,6 @@ class Retro_Hunt(AbstractModule):
self.full_item_url = self.process.config.get("Notifications", "ail_domain") + "/object/item?id=" self.full_item_url = self.process.config.get("Notifications", "ail_domain") + "/object/item?id="
self.refresh_deleta = 10
self.last_refresh = 0
# reset on each loop # reset on each loop
self.task_uuid = None self.task_uuid = None
self.date_from = 0 self.date_from = 0
@ -49,13 +45,12 @@ class Retro_Hunt(AbstractModule):
self.nb_src_done = 0 self.nb_src_done = 0
self.progress = 0 self.progress = 0
self.item = None self.item = None
self.tags = []
self.redis_logger.info(f"Module: {self.module_name} Launched") self.redis_logger.info(f"Module: {self.module_name} Launched")
# # TODO: send mails # # TODO: send mails
# # TODO: # start_time # # TODO: # start_time # end_time
# end_time
def compute(self, task_uuid): def compute(self, task_uuid):
self.redis_logger.warning(f'{self.module_name}, starting Retro hunt task {task_uuid}') self.redis_logger.warning(f'{self.module_name}, starting Retro hunt task {task_uuid}')
@ -75,7 +70,7 @@ class Retro_Hunt(AbstractModule):
self.tags = Tracker.get_retro_hunt_task_tags(task_uuid) self.tags = Tracker.get_retro_hunt_task_tags(task_uuid)
curr_date = Tracker.get_retro_hunt_task_current_date(task_uuid) curr_date = Tracker.get_retro_hunt_task_current_date(task_uuid)
self.nb_src_done = Tracker.get_retro_hunt_task_nb_src_done(task_uuid, sources=sources) self.nb_src_done = Tracker.get_retro_hunt_task_nb_src_done(task_uuid, sources=sources)
self.progress = self.update_progress(sources, curr_date) self.update_progress(sources, curr_date)
# iterate on date # iterate on date
filter_last = True filter_last = True
while int(curr_date) <= int(self.date_to): while int(curr_date) <= int(self.date_to):
@ -91,14 +86,15 @@ class Retro_Hunt(AbstractModule):
self.redis_logger.debug(f'{self.module_name}, Retro Hunt searching in directory {dir}') self.redis_logger.debug(f'{self.module_name}, Retro Hunt searching in directory {dir}')
l_obj = Tracker.get_items_to_analyze(dir) l_obj = Tracker.get_items_to_analyze(dir)
for id in l_obj: for id in l_obj:
#print(f'{dir} / {id}') # print(f'{dir} / {id}')
self.item = Item(id) self.item = Item(id)
# save current item in cache # save current item in cache
Tracker.set_cache_retro_hunt_task_id(task_uuid, id) Tracker.set_cache_retro_hunt_task_id(task_uuid, id)
self.redis_logger.debug(f'{self.module_name}, Retro Hunt rule {task_uuid}, searching item {id}') self.redis_logger.debug(f'{self.module_name}, Retro Hunt rule {task_uuid}, searching item {id}')
yara_match = rule.match(data=self.item.get_content(), callback=self.yara_rules_match, which_callbacks=yara.CALLBACK_MATCHES, timeout=timeout) yara_match = rule.match(data=self.item.get_content(), callback=self.yara_rules_match,
which_callbacks=yara.CALLBACK_MATCHES, timeout=timeout)
# save last item # save last item
if nb_id % 10 == 0: # # TODO: Add nb before save in DB if nb_id % 10 == 0: # # TODO: Add nb before save in DB
@ -110,7 +106,7 @@ class Retro_Hunt(AbstractModule):
self.update_progress(sources, curr_date) self.update_progress(sources, curr_date)
if Tracker.check_retro_hunt_pause(task_uuid): if Tracker.check_retro_hunt_pause(task_uuid):
Tracker.set_retro_hunt_last_analyzed(task_uuid, id) Tracker.set_retro_hunt_last_analyzed(task_uuid, id)
#self.update_progress(sources, curr_date, save_db=True) # self.update_progress(sources, curr_date, save_db=True)
Tracker.pause_retro_hunt_task(task_uuid) Tracker.pause_retro_hunt_task(task_uuid)
Tracker.clear_retro_hunt_task_cache(task_uuid) Tracker.clear_retro_hunt_task_cache(task_uuid)
return None return None
@ -142,7 +138,7 @@ class Retro_Hunt(AbstractModule):
def yara_rules_match(self, data): def yara_rules_match(self, data):
id = self.item.get_id() id = self.item.get_id()
#print(data) # print(data)
task_uuid = data['namespace'] task_uuid = data['namespace']
self.redis_logger.info(f'{self.module_name}, Retro hunt {task_uuid} match found: {id}') self.redis_logger.info(f'{self.module_name}, Retro hunt {task_uuid} match found: {id}')
@ -177,9 +173,9 @@ class Retro_Hunt(AbstractModule):
if task_uuid: if task_uuid:
# Module processing with the message from the queue # Module processing with the message from the queue
self.redis_logger.debug(task_uuid) self.redis_logger.debug(task_uuid)
#try: # try:
self.compute(task_uuid) self.compute(task_uuid)
#except Exception as err: # except Exception as err:
# self.redis_logger.error(f'Error in module {self.module_name}: {err}') # self.redis_logger.error(f'Error in module {self.module_name}: {err}')
# # Remove uuid ref # # Remove uuid ref
# self.remove_submit_uuid(uuid) # self.remove_submit_uuid(uuid)

View file

@ -9,7 +9,6 @@ It processes every item coming from the global module and test the regex
""" """
import os import os
import re
import sys import sys
import time import time
import requests import requests
@ -19,10 +18,9 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from lib.objects.Items import Item
from packages import Term from packages import Term
from lib import Tracker from lib import Tracker
from lib import regex_helper
import NotificationHelper import NotificationHelper
@ -42,8 +40,6 @@ class Tracker_Regex(AbstractModule):
self.full_item_url = self.process.config.get("Notifications", "ail_domain") + "/object/item?id=" self.full_item_url = self.process.config.get("Notifications", "ail_domain") + "/object/item?id="
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
# refresh Tracked Regex # refresh Tracked Regex
self.dict_regex_tracked = Term.get_regex_tracked_words_dict() self.dict_regex_tracked = Term.get_regex_tracked_words_dict()
self.last_refresh = time.time() self.last_refresh = time.time()
@ -63,7 +59,7 @@ class Tracker_Regex(AbstractModule):
item_content = item.get_content() item_content = item.get_content()
for regex in self.dict_regex_tracked: for regex in self.dict_regex_tracked:
matched = regex_helper.regex_search(self.module_name, self.redis_cache_key, self.dict_regex_tracked[regex], item_id, item_content, max_time=self.max_execution_time) matched = self.regex_findall(self.dict_regex_tracked[regex], item_id, item_content)
if matched: if matched:
self.new_tracker_found(regex, 'regex', item) self.new_tracker_found(regex, 'regex', item)
@ -92,8 +88,8 @@ class Tracker_Regex(AbstractModule):
if mail_to_notify: if mail_to_notify:
mail_subject = Tracker.get_email_subject(tracker_uuid) mail_subject = Tracker.get_email_subject(tracker_uuid)
mail_body = Tracker_Regex.mail_body_template.format(tracker, item_id, self.full_item_url, item_id) mail_body = Tracker_Regex.mail_body_template.format(tracker, item_id, self.full_item_url, item_id)
for mail in mail_to_notify: for mail in mail_to_notify:
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body) NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
# Webhook # Webhook
webhook_to_post = Term.get_term_webhook(tracker_uuid) webhook_to_post = Term.get_term_webhook(tracker_uuid)

View file

@ -22,7 +22,7 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
import NotificationHelper import NotificationHelper
from packages.Item import Item from lib.objects.Items import Item
from packages import Term from packages import Term
from lib import Tracker from lib import Tracker
@ -96,7 +96,7 @@ class Tracker_Term(AbstractModule):
# Term.create_token_statistics(item_date, word, dict_words_freq[word]) # Term.create_token_statistics(item_date, word, dict_words_freq[word])
# check solo words # check solo words
####### # TODO: check if source needed ####### # ###### # TODO: check if source needed #######
for word in self.list_tracked_words: for word in self.list_tracked_words:
if word in dict_words_freq: if word in dict_words_freq:
self.new_term_found(word, 'word', item) self.new_term_found(word, 'word', item)
@ -136,10 +136,10 @@ class Tracker_Term(AbstractModule):
if mail_to_notify: if mail_to_notify:
mail_subject = Tracker.get_email_subject(term_uuid) mail_subject = Tracker.get_email_subject(term_uuid)
mail_body = Tracker_Term.mail_body_template.format(term, item_id, self.full_item_url, item_id) mail_body = Tracker_Term.mail_body_template.format(term, item_id, self.full_item_url, item_id)
for mail in mail_to_notify: for mail in mail_to_notify:
self.redis_logger.debug(f'Send Mail {mail_subject}') self.redis_logger.debug(f'Send Mail {mail_subject}')
print(f'S print(item_content)end Mail {mail_subject}') print(f'S print(item_content)end Mail {mail_subject}')
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body) NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
# Webhook # Webhook
webhook_to_post = Term.get_term_webhook(term_uuid) webhook_to_post = Term.get_term_webhook(term_uuid)
@ -162,7 +162,6 @@ class Tracker_Term(AbstractModule):
self.redis_logger.error(f"Webhook request failed for {webhook_to_post}\nReason: Something went wrong") self.redis_logger.error(f"Webhook request failed for {webhook_to_post}\nReason: Something went wrong")
if __name__ == '__main__': if __name__ == '__main__':
module = Tracker_Term() module = Tracker_Term()
module.run() module.run()

View file

@ -8,7 +8,6 @@
# Import External packages # Import External packages
################################## ##################################
import os import os
import re
import sys import sys
import time import time
import yara import yara
@ -20,10 +19,10 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages import Term from packages import Term
from packages.Item import Item from lib.objects.Items import Item
from lib import Tracker from lib import Tracker
import NotificationHelper # # TODO: refactor import NotificationHelper # # TODO: refactor
class Tracker_Yara(AbstractModule): class Tracker_Yara(AbstractModule):
@ -46,7 +45,6 @@ class Tracker_Yara(AbstractModule):
self.redis_logger.info(f"Module: {self.module_name} Launched") self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, item_id): def compute(self, item_id):
# refresh YARA list # refresh YARA list
if self.last_refresh < Tracker.get_tracker_last_updated_by_type('yara'): if self.last_refresh < Tracker.get_tracker_last_updated_by_type('yara'):
@ -58,7 +56,8 @@ class Tracker_Yara(AbstractModule):
self.item = Item(item_id) self.item = Item(item_id)
item_content = self.item.get_content() item_content = self.item.get_content()
try: try:
yara_match = self.rules.match(data=item_content, callback=self.yara_rules_match, which_callbacks=yara.CALLBACK_MATCHES, timeout=60) yara_match = self.rules.match(data=item_content, callback=self.yara_rules_match,
which_callbacks=yara.CALLBACK_MATCHES, timeout=60)
if yara_match: if yara_match:
self.redis_logger.info(f'{self.item.get_id()}: {yara_match}') self.redis_logger.info(f'{self.item.get_id()}: {yara_match}')
print(f'{self.item.get_id()}: {yara_match}') print(f'{self.item.get_id()}: {yara_match}')
@ -91,10 +90,10 @@ class Tracker_Yara(AbstractModule):
if mail_to_notify: if mail_to_notify:
mail_subject = Tracker.get_email_subject(tracker_uuid) mail_subject = Tracker.get_email_subject(tracker_uuid)
mail_body = Tracker_Yara.mail_body_template.format(data['rule'], item_id, self.full_item_url, item_id) mail_body = Tracker_Yara.mail_body_template.format(data['rule'], item_id, self.full_item_url, item_id)
for mail in mail_to_notify: for mail in mail_to_notify:
self.redis_logger.debug(f'Send Mail {mail_subject}') self.redis_logger.debug(f'Send Mail {mail_subject}')
print(f'Send Mail {mail_subject}') print(f'Send Mail {mail_subject}')
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body) NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
# Webhook # Webhook
webhook_to_post = Term.get_term_webhook(tracker_uuid) webhook_to_post = Term.get_term_webhook(tracker_uuid)
@ -116,7 +115,6 @@ class Tracker_Yara(AbstractModule):
except: except:
self.redis_logger.error(f"Webhook request failed for {webhook_to_post}\nReason: Something went wrong") self.redis_logger.error(f"Webhook request failed for {webhook_to_post}\nReason: Something went wrong")
return yara.CALLBACK_CONTINUE return yara.CALLBACK_CONTINUE

View file

@ -262,14 +262,10 @@ db = 0
[Crawler] [Crawler]
activate_crawler = False activate_crawler = False
crawler_depth_limit = 1 default_depth_limit = 1
default_crawler_har = True default_har = True
default_crawler_png = True default_screenshot = True
default_crawler_closespider_pagecount = 50 onion_proxy = onion.foundation
default_crawler_user_agent = Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0
splash_url = http://127.0.0.1
splash_port = 8050-8052
domain_proxy = onion.foundation
[IP] [IP]
# list of comma-separated CIDR that you wish to be alerted for. e.g: # list of comma-separated CIDR that you wish to be alerted for. e.g:

View file

@ -94,7 +94,7 @@ DEFAULT_HOME=$(pwd)
#### KVROCKS #### #### KVROCKS ####
test ! -d kvrocks/ && git clone https://github.com/apache/incubator-kvrocks.git kvrocks test ! -d kvrocks/ && git clone https://github.com/apache/incubator-kvrocks.git kvrocks
pushd kvrocks pushd kvrocks
./build.sh build ./x.py build
popd popd
DEFAULT_KVROCKS_DATA=$DEFAULT_HOME/DATA_KVROCKS DEFAULT_KVROCKS_DATA=$DEFAULT_HOME/DATA_KVROCKS

View file

@ -1,11 +1,12 @@
pyail pyail
pylacus
pymisp>=2.4.144 pymisp>=2.4.144
d4-pyclient>=0.1.6 d4-pyclient>=0.1.6
thehive4py thehive4py
# Core # Core
redis==2.10.6 redis==3.0.0
python-magic>0.4.15 python-magic>0.4.15
yara-python>4.0.2 yara-python>4.0.2

View file

@ -40,6 +40,7 @@ class Test_Module_ApiKey(unittest.TestCase):
def setUp(self): def setUp(self):
self.module_obj = ApiKey() self.module_obj = ApiKey()
self.module_obj.debug = True
def test_module(self): def test_module(self):
item_id = 'tests/2021/01/01/api_keys.gz' item_id = 'tests/2021/01/01/api_keys.gz'
@ -56,6 +57,7 @@ class Test_Module_Categ(unittest.TestCase):
def setUp(self): def setUp(self):
self.module_obj = Categ() self.module_obj = Categ()
self.module_obj.debug = True
def test_module(self): def test_module(self):
item_id = 'tests/2021/01/01/categ.gz' item_id = 'tests/2021/01/01/categ.gz'
@ -69,14 +71,15 @@ class Test_Module_CreditCards(unittest.TestCase):
def setUp(self): def setUp(self):
self.module_obj = CreditCards() self.module_obj = CreditCards()
self.module_obj.debug = True
def test_module(self): def test_module(self):
item_id = 'tests/2021/01/01/credit_cards.gz 7' item_id = 'tests/2021/01/01/credit_cards.gz 7'
test_cards = ['341039324930797', # American Express test_cards = ['341039324930797', # American Express
'6011613905509166', # Discover Card '6011613905509166', # Discover Card
'3547151714018657', # Japan Credit Bureau (JCB) '3547151714018657', # Japan Credit Bureau (JCB)
'5492981206527330', # 16 digits MasterCard '5492981206527330', # 16 digits MasterCard
'4024007132849695', # '4532525919781' # 16-digit VISA, with separators '4024007132849695', # '4532525919781' # 16-digit VISA, with separators
] ]
result = self.module_obj.compute(item_id, r_result=True) result = self.module_obj.compute(item_id, r_result=True)
@ -86,6 +89,7 @@ class Test_Module_DomClassifier(unittest.TestCase):
def setUp(self): def setUp(self):
self.module_obj = DomClassifier() self.module_obj = DomClassifier()
self.module_obj.debug = True
def test_module(self): def test_module(self):
test_host = 'foo.be' test_host = 'foo.be'
@ -98,6 +102,7 @@ class Test_Module_Global(unittest.TestCase):
def setUp(self): def setUp(self):
self.module_obj = Global() self.module_obj = Global()
self.module_obj.debug = True
def test_module(self): def test_module(self):
# # TODO: delete item # # TODO: delete item
@ -138,6 +143,7 @@ class Test_Module_Keys(unittest.TestCase):
def setUp(self): def setUp(self):
self.module_obj = Keys() self.module_obj = Keys()
self.module_obj.debug = True
def test_module(self): def test_module(self):
item_id = 'tests/2021/01/01/keys.gz' item_id = 'tests/2021/01/01/keys.gz'
@ -148,6 +154,7 @@ class Test_Module_Onion(unittest.TestCase):
def setUp(self): def setUp(self):
self.module_obj = Onion() self.module_obj = Onion()
self.module_obj.debug = True
def test_module(self): def test_module(self):
item_id = 'tests/2021/01/01/onion.gz' item_id = 'tests/2021/01/01/onion.gz'
@ -157,7 +164,7 @@ class Test_Module_Onion(unittest.TestCase):
self.module_obj.compute(f'{item_id} 3') self.module_obj.compute(f'{item_id} 3')
if crawlers.is_crawler_activated(): if crawlers.is_crawler_activated():
## check domain queues # # check domain queues
# all domains queue # all domains queue
self.assertTrue(crawlers.is_domain_in_queue('onion', domain_1)) self.assertTrue(crawlers.is_domain_in_queue('onion', domain_1))
# all url/item queue # all url/item queue
@ -177,11 +184,13 @@ class Test_Module_Telegram(unittest.TestCase):
def setUp(self): def setUp(self):
self.module_obj = Telegram() self.module_obj = Telegram()
self.module_obj.debug = True
def test_module(self): def test_module(self):
item_id = 'tests/2021/01/01/keys.gz' item_id = 'tests/2021/01/01/keys.gz'
# # TODO: check results # # TODO: check results
result = self.module_obj.compute(item_id) result = self.module_obj.compute(item_id)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -12,10 +12,8 @@ Requirements:
""" """
import redis
import os import os
import time import time
from packages import Paste
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process

View file

@ -37,8 +37,9 @@ def get_object_correlation_json(correlation_id, subtype, max_nodes):
object_type = 'cryptocurrency' object_type = 'cryptocurrency'
max_nodes = sanitise_nb_max_nodes(max_nodes) max_nodes = sanitise_nb_max_nodes(max_nodes)
# FIXME
# ALL correlations # ALL correlations
correlation_names = Correlate_object.sanitise_correlation_names('') #correlation_names = Correlate_object.sanitise_correlation_names('')
#correlation_objects = Correlate_object.sanitise_correlation_objects('') #correlation_objects = Correlate_object.sanitise_correlation_objects('')
correlation_objects = ['domain'] correlation_objects = ['domain']

View file

@ -29,12 +29,10 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from lib.ConfigLoader import ConfigLoader
from lib.Users import User from lib.Users import User
from lib import Tag from lib import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
# Import config # Import config
import Flask_config import Flask_config
@ -50,14 +48,14 @@ from blueprints.hunters import hunters
from blueprints.old_endpoints import old_endpoints from blueprints.old_endpoints import old_endpoints
from blueprints.ail_2_ail_sync import ail_2_ail_sync from blueprints.ail_2_ail_sync import ail_2_ail_sync
from blueprints.settings_b import settings_b from blueprints.settings_b import settings_b
from blueprints.objects_cve import objects_cve
from blueprints.objects_decoded import objects_decoded from blueprints.objects_decoded import objects_decoded
from blueprints.objects_range import objects_range
Flask_dir = os.environ['AIL_FLASK'] Flask_dir = os.environ['AIL_FLASK']
# CONFIG # # CONFIG #
config_loader = ConfigLoader.ConfigLoader() config_loader = ConfigLoader()
baseUrl = config_loader.get_config_str("Flask", "baseurl") baseUrl = config_loader.get_config_str("Flask", "baseurl")
host = config_loader.get_config_str("Flask", "host") host = config_loader.get_config_str("Flask", "host")
baseUrl = baseUrl.replace('/', '') baseUrl = baseUrl.replace('/', '')
@ -111,8 +109,8 @@ app.register_blueprint(hunters, url_prefix=baseUrl)
app.register_blueprint(old_endpoints, url_prefix=baseUrl) app.register_blueprint(old_endpoints, url_prefix=baseUrl)
app.register_blueprint(ail_2_ail_sync, url_prefix=baseUrl) app.register_blueprint(ail_2_ail_sync, url_prefix=baseUrl)
app.register_blueprint(settings_b, url_prefix=baseUrl) app.register_blueprint(settings_b, url_prefix=baseUrl)
app.register_blueprint(objects_cve, url_prefix=baseUrl)
app.register_blueprint(objects_decoded, url_prefix=baseUrl) app.register_blueprint(objects_decoded, url_prefix=baseUrl)
app.register_blueprint(objects_range, url_prefix=baseUrl)
# ========= =========# # ========= =========#
# ========= Cookie name ======== # ========= Cookie name ========
@ -162,33 +160,32 @@ for root, dirs, files in os.walk(os.path.join(Flask_dir, 'modules')):
if name == 'Flask_config.py': if name == 'Flask_config.py':
continue continue
name = name.strip('.py') name = name.strip('.py')
#print('importing {}'.format(name))
importlib.import_module(name) importlib.import_module(name)
elif name == 'header_{}.html'.format(module_name): elif name == 'header_{}.html'.format(module_name):
with open(join(root, name), 'r') as f: with open(join(root, name), 'r') as f:
to_add_to_header_dico[module_name] = f.read() to_add_to_header_dico[module_name] = f.read()
#create header.html # create header.html
complete_header = "" complete_header = ""
with open(os.path.join(Flask_dir, 'templates', 'header_base.html'), 'r') as f: with open(os.path.join(Flask_dir, 'templates', 'header_base.html'), 'r') as f:
complete_header = f.read() complete_header = f.read()
modified_header = complete_header modified_header = complete_header
#Add the header in the supplied order # Add the header in the supplied order
for module_name, txt in list(to_add_to_header_dico.items()): for module_name, txt in list(to_add_to_header_dico.items()):
to_replace = '<!--{}-->'.format(module_name) to_replace = '<!--{}-->'.format(module_name)
if to_replace in complete_header: if to_replace in complete_header:
modified_header = modified_header.replace(to_replace, txt) modified_header = modified_header.replace(to_replace, txt)
del to_add_to_header_dico[module_name] del to_add_to_header_dico[module_name]
#Add the header for no-supplied order # Add the header for no-supplied order
to_add_to_header = [] to_add_to_header = []
for module_name, txt in to_add_to_header_dico.items(): for module_name, txt in to_add_to_header_dico.items():
to_add_to_header.append(txt) to_add_to_header.append(txt)
modified_header = modified_header.replace('<!--insert here-->', '\n'.join(to_add_to_header)) modified_header = modified_header.replace('<!--insert here-->', '\n'.join(to_add_to_header))
#Write the header.html file # Write the header.html file
with open(os.path.join(Flask_dir, 'templates', 'header.html'), 'w') as f: with open(os.path.join(Flask_dir, 'templates', 'header.html'), 'w') as f:
f.write(modified_header) f.write(modified_header)
@ -250,6 +247,7 @@ def page_not_found(e):
# avoid endpoint enumeration # avoid endpoint enumeration
return render_template('error/404.html'), 404 return render_template('error/404.html'), 404
# ========== INITIAL taxonomies ============ # ========== INITIAL taxonomies ============
default_taxonomies = ["infoleak", "gdpr", "fpf", "dark-web"] default_taxonomies = ["infoleak", "gdpr", "fpf", "dark-web"]

View file

@ -26,22 +26,6 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
from lib.objects import ail_objects from lib.objects import ail_objects
################################################################################
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import Correlate_object
import Domain
import Screenshot
import btc_ail
import Username
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Cryptocurrency
import Pgp
import Decoded
import Tag
bootstrap_label = Flask_config.bootstrap_label bootstrap_label = Flask_config.bootstrap_label
vt_enabled = Flask_config.vt_enabled vt_enabled = Flask_config.vt_enabled
@ -74,77 +58,15 @@ def sanitise_nb_max_nodes(nb_max_nodes):
nb_max_nodes = 300 nb_max_nodes = 300
return nb_max_nodes return nb_max_nodes
def sanitise_correlation_names(correlation_names):
'''
correlation_names ex = 'pgp,crypto'
'''
all_correlation_names = Correlate_object.get_all_correlation_names()
if correlation_names is None:
return all_correlation_names
else:
l_correlation_names = []
for correl in correlation_names.split(','):
if correl in all_correlation_names:
l_correlation_names.append(correl)
if l_correlation_names:
return l_correlation_names
else:
return all_correlation_names
def sanitise_correlation_objects(correlation_objects):
'''
correlation_objects ex = 'domain,decoded'
'''
all_correlation_objects = Correlate_object.get_all_correlation_objects()
if correlation_objects is None:
return all_correlation_objects
else:
l_correlation_objects = []
for correl in correlation_objects.split(','):
if correl in all_correlation_objects:
l_correlation_objects.append(correl)
if l_correlation_objects:
return l_correlation_objects
else:
return all_correlation_objects
def get_card_metadata(object_type, correlation_id, type_id=None, expand_card=False):
card_dict = {}
if object_type == 'cryptocurrency':
card_dict["sparkline"] = Cryptocurrency.cryptocurrency.get_list_nb_previous_correlation_object(type_id, correlation_id, 6)
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, type_id)
if type_id == 'bitcoin' and expand_card:
card_dict["related_btc"] = btc_ail.get_bitcoin_info(correlation_id)
elif object_type == 'pgp':
card_dict["sparkline"] = Pgp.pgp.get_list_nb_previous_correlation_object(type_id, correlation_id, 6)
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, type_id)
elif object_type == 'username':
card_dict["sparkline"] = Username.correlation.get_list_nb_previous_correlation_object(type_id, correlation_id, 6)
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, type_id)
elif object_type == 'decoded':
card_dict["sparkline"] = Decoded.get_list_nb_previous_hash(correlation_id, 6)
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, value=correlation_id)
card_dict["vt"] = Decoded.get_decoded_vt_report(correlation_id)
card_dict["vt"]["status"] = vt_enabled
card_dict["add_tags_modal"] = Tag.get_modal_add_tags(correlation_id, object_type='decoded')
elif object_type == 'domain':
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, value=correlation_id)
card_dict["tags"] = Domain.get_domain_tags(correlation_id)
elif object_type == 'screenshot':
card_dict["add_tags_modal"] = Tag.get_modal_add_tags(correlation_id, object_type='image')
elif object_type == 'paste':
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, value=correlation_id)
return card_dict
# ============= ROUTES ============== # ============= ROUTES ==============
@correlation.route('/correlation/show_correlation', methods=['GET', 'POST']) # GET + POST @correlation.route('/correlation/show', methods=['GET', 'POST']) # GET + POST
@login_required @login_required
@login_read_only @login_read_only
def show_correlation(): def show_correlation():
if request.method == 'POST': if request.method == 'POST':
object_type = request.form.get('object_type') object_type = request.form.get('obj_type')
type_id = request.form.get('type_id') subtype = request.form.get('subtype')
correlation_id = request.form.get('correlation_id') obj_id = request.form.get('obj_id')
max_nodes = request.form.get('max_nb_nodes_in') max_nodes = request.form.get('max_nb_nodes_in')
mode = request.form.get('mode') mode = request.form.get('mode')
if mode: if mode:
@ -153,73 +75,71 @@ def show_correlation():
mode = 'union' mode = 'union'
## get all selected correlations ## get all selected correlations
correlation_names = [] filter_types = []
correlation_objects = [] correl_option = request.form.get('CveCheck')
#correlation_names if correl_option:
filter_types.append('cve')
correl_option = request.form.get('CryptocurrencyCheck') correl_option = request.form.get('CryptocurrencyCheck')
if correl_option: if correl_option:
correlation_names.append('cryptocurrency') filter_types.append('cryptocurrency')
correl_option = request.form.get('PgpCheck') correl_option = request.form.get('PgpCheck')
if correl_option: if correl_option:
correlation_names.append('pgp') filter_types.append('pgp')
correl_option = request.form.get('UsernameCheck') correl_option = request.form.get('UsernameCheck')
if correl_option: if correl_option:
correlation_names.append('username') filter_types.append('username')
correl_option = request.form.get('DecodedCheck') correl_option = request.form.get('DecodedCheck')
if correl_option: if correl_option:
correlation_names.append('decoded') filter_types.append('decoded')
correl_option = request.form.get('ScreenshotCheck') correl_option = request.form.get('ScreenshotCheck')
if correl_option: if correl_option:
correlation_names.append('screenshot') filter_types.append('screenshot')
# correlation_objects # correlation_objects
correl_option = request.form.get('DomainCheck') correl_option = request.form.get('DomainCheck')
if correl_option: if correl_option:
correlation_objects.append('domain') filter_types.append('domain')
correl_option = request.form.get('PasteCheck') correl_option = request.form.get('ItemCheck')
if correl_option: if correl_option:
correlation_objects.append('item') filter_types.append('item')
# list as params # list as params
correlation_names = ",".join(correlation_names) filter_types = ",".join(filter_types)
correlation_objects = ",".join(correlation_objects)
# redirect to keep history and bookmark # redirect to keep history and bookmark
return redirect(url_for('correlation.show_correlation', object_type=object_type, type_id=type_id, correlation_id=correlation_id, mode=mode, return redirect(url_for('correlation.show_correlation', type=object_type, subtype=subtype, id=obj_id, mode=mode,
max_nodes=max_nodes, correlation_names=correlation_names, correlation_objects=correlation_objects)) max_nodes=max_nodes, filter=filter_types))
# request.method == 'GET' # request.method == 'GET'
else: else:
object_type = request.args.get('object_type') obj_type = request.args.get('type')
type_id = request.args.get('type_id') subtype = request.args.get('subtype', '')
correlation_id = request.args.get('correlation_id') obj_id = request.args.get('id')
max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes')) max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes'))
mode = sanitise_graph_mode(request.args.get('mode')) mode = sanitise_graph_mode(request.args.get('mode'))
expand_card = request.args.get('expand_card') related_btc = bool(request.args.get('expand_card', False))
correlation_names = ail_objects.sanitize_objs_types(request.args.get('correlation_names', '').split(',')) filter_types = ail_objects.sanitize_objs_types(request.args.get('filter', '').split(','))
correlation_objects = ail_objects.sanitize_objs_types(request.args.get('correlation_objects', '').split(','))
# # TODO: remove me, rename screenshot to image # # TODO: remove me, rename screenshot to image
if object_type == 'image': if obj_type == 'image':
object_type == 'screenshot' obj_type = 'screenshot'
# check if correlation_id exist # check if obj_id exist
if not Correlate_object.exist_object(object_type, correlation_id, type_id=type_id): if not ail_objects.exists_obj(obj_type, subtype, obj_id):
abort(404) # return 404 abort(404) # return 404
# oject exist # object exist
else: else:
dict_object = {"object_type": object_type, "correlation_id": correlation_id} dict_object = {"object_type": obj_type,
dict_object["max_nodes"] = max_nodes "correlation_id": obj_id,
dict_object["mode"] = mode "max_nodes": max_nodes, "mode": mode,
dict_object["correlation_names"] = correlation_names "filter": filter_types, "filter_str": ",".join(filter_types),
dict_object["correlation_names_str"] = ",".join(correlation_names) "metadata": ail_objects.get_object_meta(obj_type, subtype, obj_id, flask_context=True)
dict_object["correlation_objects"] = correlation_objects }
dict_object["correlation_objects_str"] = ",".join(correlation_objects) print(dict_object)
dict_object["metadata"] = Correlate_object.get_object_metadata(object_type, correlation_id, type_id=type_id) if subtype:
if type_id: dict_object["metadata"]['type_id'] = subtype
dict_object["metadata"]['type_id'] = type_id dict_object["metadata_card"] = ail_objects.get_object_card_meta(obj_type, subtype, obj_id, related_btc=related_btc)
dict_object["metadata_card"] = get_card_metadata(object_type, correlation_id, type_id=type_id, expand_card=expand_card)
return render_template("show_correlation.html", dict_object=dict_object, bootstrap_label=bootstrap_label) return render_template("show_correlation.html", dict_object=dict_object, bootstrap_label=bootstrap_label)
@correlation.route('/correlation/get/description') @correlation.route('/correlation/get/description')
@ -254,19 +174,17 @@ def get_description():
@login_required @login_required
@login_read_only @login_read_only
def graph_node_json(): def graph_node_json():
obj_id = request.args.get('correlation_id') #######################3 obj_id = request.args.get('id')
subtype = request.args.get('type_id') ####################### subtype = request.args.get('subtype')
obj_type = request.args.get('object_type') ####################### obj_type = request.args.get('type')
max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes')) max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes'))
correlation_names = ail_objects.sanitize_objs_types(request.args.get('correlation_names', '').split(',')) filter_types = ail_objects.sanitize_objs_types(request.args.get('filter', '').split(','))
correlation_objects = ail_objects.sanitize_objs_types(request.args.get('correlation_objects', '').split(','))
# # TODO: remove me, rename screenshot # # TODO: remove me, rename screenshot
if obj_type == 'image': if obj_type == 'image':
obj_type == 'screenshot' obj_type = 'screenshot'
filter_types = correlation_names + correlation_objects
json_graph = ail_objects.get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=filter_types, max_nodes=max_nodes, level=2, flask_context=True) json_graph = ail_objects.get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=filter_types, max_nodes=max_nodes, level=2, flask_context=True)
#json_graph = Correlate_object.get_graph_node_object_correlation(obj_type, obj_id, 'union', correlation_names, correlation_objects, requested_correl_type=subtype, max_nodes=max_nodes) #json_graph = Correlate_object.get_graph_node_object_correlation(obj_type, obj_id, 'union', correlation_names, correlation_objects, requested_correl_type=subtype, max_nodes=max_nodes)
return jsonify(json_graph) return jsonify(json_graph)

View file

@ -6,11 +6,13 @@
''' '''
import os import os
import sys
import json import json
import random import random
import sys
import time
from datetime import datetime
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, make_response from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, send_file, abort
from flask_login import login_required, current_user, login_user, logout_user from flask_login import login_required, current_user, login_user, logout_user
sys.path.append('modules') sys.path.append('modules')
@ -19,15 +21,6 @@ import Flask_config
# Import Role_Manager # Import Role_Manager
from Role_Manager import login_admin, login_analyst, login_read_only from Role_Manager import login_admin, login_analyst, login_read_only
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Tag
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
@ -36,6 +29,10 @@ sys.path.append(os.environ['AIL_BIN'])
from lib import crawlers from lib import crawlers
from lib import Language from lib import Language
from lib.objects import Domains from lib.objects import Domains
from lib.objects.Items import Item
from lib import Tag
from packages import Date
from lib import Domain # # # # # # # # # # # # # # # # TODO: from lib import Domain # # # # # # # # # # # # # # # # TODO:
@ -50,9 +47,9 @@ crawler_splash = Blueprint('crawler_splash', __name__, template_folder=os.path.j
# ============ FUNCTIONS ============ # ============ FUNCTIONS ============
def api_validator(api_response): def api_validator(message, code):
if api_response: if message and code:
return Response(json.dumps(api_response[0], indent=2, sort_keys=True), mimetype='application/json'), api_response[1] return Response(json.dumps(message, indent=2, sort_keys=True), mimetype='application/json'), code
def create_json_response(data, status_code): def create_json_response(data, status_code):
return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code
@ -62,26 +59,26 @@ def create_json_response(data, status_code):
@login_required @login_required
@login_read_only @login_read_only
def crawlers_dashboard(): def crawlers_dashboard():
# # TODO: get splash manager status is_manager_connected = crawlers.get_lacus_connection_metadata()
is_manager_connected = crawlers.get_splash_manager_connection_metadata() crawlers_status = crawlers.get_crawler_capture_status()
all_splash_crawler_status = crawlers.get_all_spash_crawler_status() print(crawlers_status)
splash_crawlers_latest_stats = crawlers.get_splash_crawler_latest_stats() crawlers_latest_stats = crawlers.get_crawlers_stats()
print(crawlers_latest_stats)
date = crawlers.get_current_date() date = crawlers.get_current_date()
return render_template("dashboard_crawler.html", date=date,
return render_template("dashboard_splash_crawler.html", all_splash_crawler_status = all_splash_crawler_status, is_manager_connected=is_manager_connected,
is_manager_connected=is_manager_connected, date=date, crawlers_status=crawlers_status,
splash_crawlers_latest_stats=splash_crawlers_latest_stats) crawlers_latest_stats=crawlers_latest_stats)
@crawler_splash.route("/crawlers/crawler_dashboard_json", methods=['GET']) @crawler_splash.route("/crawlers/crawler_dashboard_json", methods=['GET'])
@login_required @login_required
@login_read_only @login_read_only
def crawler_dashboard_json(): def crawler_dashboard_json():
crawlers_status = crawlers.get_crawler_capture_status()
crawlers_latest_stats = crawlers.get_crawlers_stats()
all_splash_crawler_status = crawlers.get_all_spash_crawler_status() return jsonify({'crawlers_status': crawlers_status,
splash_crawlers_latest_stats = crawlers.get_splash_crawler_latest_stats() 'stats': crawlers_latest_stats})
return jsonify({'all_splash_crawler_status': all_splash_crawler_status,
'splash_crawlers_latest_stats':splash_crawlers_latest_stats})
@crawler_splash.route("/crawlers/manual", methods=['GET']) @crawler_splash.route("/crawlers/manual", methods=['GET'])
@login_required @login_required
@ -89,12 +86,12 @@ def crawler_dashboard_json():
def manual(): def manual():
user_id = current_user.get_id() user_id = current_user.get_id()
l_cookiejar = crawlers.api_get_cookies_list_select(user_id) l_cookiejar = crawlers.api_get_cookies_list_select(user_id)
all_crawlers_types = crawlers.get_all_crawlers_queues_types() crawlers_types = crawlers.get_crawler_all_types()
all_splash_name = crawlers.get_all_crawlers_to_launch_splash_name() proxies = [] # TODO HANDLE PROXIES
return render_template("crawler_manual.html", return render_template("crawler_manual.html",
is_manager_connected=crawlers.get_splash_manager_connection_metadata(), is_manager_connected=crawlers.get_lacus_connection_metadata(),
all_crawlers_types=all_crawlers_types, crawlers_types=crawlers_types,
all_splash_name=all_splash_name, proxies=proxies,
l_cookiejar=l_cookiejar) l_cookiejar=l_cookiejar)
@crawler_splash.route("/crawlers/send_to_spider", methods=['POST']) @crawler_splash.route("/crawlers/send_to_spider", methods=['POST'])
@ -106,17 +103,16 @@ def send_to_spider():
# POST val # POST val
url = request.form.get('url_to_crawl') url = request.form.get('url_to_crawl')
crawler_type = request.form.get('crawler_queue_type') crawler_type = request.form.get('crawler_queue_type')
splash_name = request.form.get('splash_name') proxy = request.form.get('proxy_name')
auto_crawler = request.form.get('crawler_type') auto_crawler = request.form.get('crawler_type') # TODO Auto Crawler
crawler_delta = request.form.get('crawler_epoch') crawler_delta = request.form.get('crawler_epoch') # TODO Auto Crawler
screenshot = request.form.get('screenshot') screenshot = request.form.get('screenshot')
har = request.form.get('har') har = request.form.get('har')
depth_limit = request.form.get('depth_limit') depth_limit = request.form.get('depth_limit')
max_pages = request.form.get('max_pages')
cookiejar_uuid = request.form.get('cookiejar') cookiejar_uuid = request.form.get('cookiejar')
if splash_name: if crawler_type == 'onion':
crawler_type = splash_name proxy = 'force_tor'
if cookiejar_uuid: if cookiejar_uuid:
if cookiejar_uuid == 'None': if cookiejar_uuid == 'None':
@ -125,13 +121,55 @@ def send_to_spider():
cookiejar_uuid = cookiejar_uuid.rsplit(':') cookiejar_uuid = cookiejar_uuid.rsplit(':')
cookiejar_uuid = cookiejar_uuid[-1].replace(' ', '') cookiejar_uuid = cookiejar_uuid[-1].replace(' ', '')
res = crawlers.api_create_crawler_task(user_id, url, screenshot=screenshot, har=har, depth_limit=depth_limit, max_pages=max_pages, data = {'url': url, 'depth': depth_limit, 'har': har, 'screenshot': screenshot}
crawler_type=crawler_type, if proxy:
auto_crawler=auto_crawler, crawler_delta=crawler_delta, cookiejar_uuid=cookiejar_uuid) data['proxy'] = proxy
if res: if cookiejar_uuid:
data['cookiejar'] = cookiejar_uuid
res = crawlers.api_add_crawler_task(data, user_id=user_id)
if res[1] != 200:
return create_json_response(res[0], res[1]) return create_json_response(res[0], res[1])
return redirect(url_for('crawler_splash.manual')) return redirect(url_for('crawler_splash.manual'))
@crawler_splash.route("/crawlers/last/domains", methods=['GET'])
@login_required
@login_read_only
def crawlers_last_domains():
domain_type = request.args.get('type')
if domain_type not in crawlers.get_crawler_all_types():
return jsonify({'error': 'Invalid domain type'}), 400
# TODO STAT by EPOCH
domains = []
for domain_row in crawlers.get_last_crawled_domains(domain_type):
domain, epoch = domain_row.split(':', 1)
dom = Domains.Domain(domain)
meta = dom.get_meta()
meta['epoch'] = epoch
meta['status_epoch'] = dom.is_up_by_epoch(epoch)
domains.append(meta)
crawler_stats = crawlers.get_crawlers_stats(domain_type=domain_type)
now = datetime.now()
date = now.strftime("%Y%m%d")
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
return render_template("last_crawled.html", domains=domains, type=domain_type,
is_manager_connected=crawlers.get_lacus_connection_metadata(),
date_from=date_string, date_to=date_string,
crawler_stats=crawler_stats)
@crawler_splash.route('/crawlers/last/domains/json')
@login_required
@login_read_only
def crawlers_last_domains_json():
domain_type = request.args.get('type')
if domain_type not in crawlers.get_crawler_all_types():
return jsonify({'error': 'Invalid domain type'}), 400
stats = []
for date in Date.get_date_range(7):
stats.append(crawlers.get_crawlers_stats_by_day(date, domain_type))
return jsonify(stats)
#### Domains #### #### Domains ####
@ -143,36 +181,69 @@ def showDomain():
if request.method == 'POST': if request.method == 'POST':
domain_name = request.form.get('in_show_domain') domain_name = request.form.get('in_show_domain')
epoch = None epoch = None
port = None
else: else:
domain_name = request.args.get('domain') domain_name = request.args.get('domain')
epoch = request.args.get('epoch') epoch = request.args.get('epoch')
port = request.args.get('port') try:
epoch = int(epoch)
res = api_validator(Domain.api_verify_if_domain_exist(domain_name)) except (ValueError, TypeError):
if res: epoch = None
return res
domain = Domains.Domain(domain_name) domain = Domains.Domain(domain_name)
dom = Domain.Domain(domain_name, port=port) if not domain.exists():
abort(404)
dict_domain = dom.get_domain_metadata() dict_domain = domain.get_meta(options=['last_origin', 'languages'])
dict_domain['domain'] = domain_name dict_domain['domain'] = domain.id
if dom.domain_was_up(): if domain.was_up():
dict_domain = {**dict_domain, **domain.get_correlations()} dict_domain = {**dict_domain, **domain.get_correlations()}
print(dict_domain)
dict_domain['correlation_nb'] = len(dict_domain['decoded']) + len(dict_domain['username']) + len(dict_domain['pgp']) + len(dict_domain['cryptocurrency']) + len(dict_domain['screenshot']) dict_domain['correlation_nb'] = len(dict_domain['decoded']) + len(dict_domain['username']) + len(dict_domain['pgp']) + len(dict_domain['cryptocurrency']) + len(dict_domain['screenshot'])
dict_domain['father'] = dom.get_domain_father()
dict_domain['languages'] = Language.get_languages_from_iso(dom.get_domain_languages(), sort=True)
dict_domain['tags'] = dom.get_domain_tags()
dict_domain['tags_safe'] = Tag.is_tags_safe(dict_domain['tags']) dict_domain['tags_safe'] = Tag.is_tags_safe(dict_domain['tags'])
dict_domain['history'] = dom.get_domain_history_with_status() dict_domain['history'] = domain.get_history(status=True)
dict_domain['crawler_history'] = dom.get_domain_items_crawled(items_link=True, epoch=epoch, item_screenshot=True, item_tag=True) # # TODO: handle multiple port curr_epoch = None
if dict_domain['crawler_history'].get('items', []): # Select valid epoch
dict_domain['crawler_history']['random_item'] = random.choice(dict_domain['crawler_history']['items']) if epoch:
for row in dict_domain['history']:
if row['epoch'] == epoch:
curr_epoch = row['epoch']
break
else:
curr_epoch = -1
for row in dict_domain['history']:
if row['epoch'] > curr_epoch:
curr_epoch = row['epoch']
dict_domain['epoch'] = curr_epoch
dict_domain["date"] = time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(curr_epoch))
return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label, print(dict_domain['epoch'])
modal_add_tags=Tag.get_modal_add_tags(dict_domain['domain'], object_type="domain"))
dict_domain['crawler_history_items'] = []
for item_id in domain.get_crawled_items_by_epoch(epoch):
dict_domain['crawler_history_items'].append(Item(item_id).get_meta(options=['crawler']))
if dict_domain['crawler_history_items']:
dict_domain['random_item'] = random.choice(dict_domain['crawler_history_items'])
return render_template("showDomain.html",
dict_domain=dict_domain, bootstrap_label=bootstrap_label,
modal_add_tags=Tag.get_modal_add_tags(dict_domain['domain'], object_type="domain"))
@crawler_splash.route('/crawlers/domain/download', methods=['GET'])
@login_required
@login_read_only
def crawlers_domain_download():
domain = request.args.get('domain')
epoch = request.args.get('epoch')
try:
epoch = int(epoch)
except (ValueError, TypeError):
epoch = None
dom = Domains.Domain(domain)
if not dom.exists():
abort(404)
zip_file = dom.get_download_zip(epoch=epoch)
if not zip_file:
abort(404)
return send_file(zip_file, download_name=f'{dom.get_id()}.zip', as_attachment=True)
@crawler_splash.route('/domains/explorer/domain_type_post', methods=['POST']) @crawler_splash.route('/domains/explorer/domain_type_post', methods=['POST'])
@login_required @login_required
@ -304,13 +375,36 @@ def domains_search_name():
l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label, l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label,
domains_types=domains_types) domains_types=domains_types)
@crawler_splash.route('/domains/TODO', methods=['GET']) @crawler_splash.route('/domains/date', methods=['GET'])
@login_required @login_required
@login_analyst @login_analyst
def domains_todo(): def domains_search_date():
# TODO sanitize type + date
domain_type = request.args.get('type') domain_type = request.args.get('type')
last_domains = Domain.get_last_crawled_domains(domain_type) date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
# page = request.args.get('page')
date = Date.sanitise_date_range(date_from, date_to)
domains_date = Domains.get_domains_by_daterange(date['date_from'], date['date_to'], domain_type)
dict_domains = {}
for d in domains_date:
dict_domains[d] = Domains.get_domains_meta(domains_date[d])
date_from = f"{date['date_from'][0:4]}-{date['date_from'][4:6]}-{date['date_from'][6:8]}"
date_to = f"{date['date_to'][0:4]}-{date['date_to'][4:6]}-{date['date_to'][6:8]}"
return render_template("domains_daterange.html", date_from=date_from, date_to=date_to,
bootstrap_label=bootstrap_label,
dict_domains=dict_domains, type=domain_type)
@crawler_splash.route('/domains/date/post', methods=['POST'])
@login_required
@login_analyst
def domains_search_date_post():
domain_type = request.form.get('type')
date_from = request.form.get('date_from')
date_to = request.form.get('date_to')
return redirect(url_for('crawler_splash.domains_search_date', date_from=date_from, date_to=date_to, type=domain_type))
##-- --## ##-- --##
@ -521,49 +615,8 @@ def crawler_cookiejar_cookie_json_add_post():
return redirect(url_for('crawler_splash.crawler_cookiejar_cookie_add', cookiejar_uuid=cookiejar_uuid)) return redirect(url_for('crawler_splash.crawler_cookiejar_cookie_add', cookiejar_uuid=cookiejar_uuid))
@crawler_splash.route('/crawler/settings', methods=['GET']) #--- Cookiejar ---#
@login_required
@login_analyst
def crawler_splash_setings():
all_proxies = crawlers.get_all_proxies_metadata()
all_splash = crawlers.get_all_splash_crawler_metadata()
splash_manager_url = crawlers.get_splash_manager_url()
api_key = crawlers.get_hidden_splash_api_key()
is_manager_connected = crawlers.get_splash_manager_connection_metadata(force_ping=True)
nb_crawlers_to_launch = crawlers.get_nb_crawlers_to_launch()
#crawler_full_config = Config_DB.get_full_config_by_section('crawler')
is_crawler_working = crawlers.is_test_ail_crawlers_successful()
crawler_error_mess = crawlers.get_test_ail_crawlers_message()
return render_template("settings_splash_crawler.html",
is_manager_connected=is_manager_connected,
splash_manager_url=splash_manager_url, api_key=api_key,
all_splash=all_splash, all_proxies=all_proxies,
nb_crawlers_to_launch=nb_crawlers_to_launch,
is_crawler_working=is_crawler_working,
crawler_error_mess=crawler_error_mess,
#crawler_full_config=crawler_full_config
)
@crawler_splash.route('/crawler/settings/crawler_manager', methods=['GET', 'POST'])
@login_required
@login_admin
def crawler_splash_setings_crawler_manager():
if request.method == 'POST':
splash_manager_url = request.form.get('splash_manager_url')
api_key = request.form.get('api_key')
res = crawlers.api_save_splash_manager_url_api({'url':splash_manager_url, 'api_key':api_key})
if res[1] != 200:
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
else:
return redirect(url_for('crawler_splash.crawler_splash_setings'))
else:
splash_manager_url = crawlers.get_splash_manager_url()
api_key = crawlers.get_splash_api_key()
return render_template("settings_edit_splash_crawler_manager.html",
splash_manager_url=splash_manager_url, api_key=api_key)
@crawler_splash.route('/crawler/settings/crawlers_to_lauch', methods=['GET', 'POST']) @crawler_splash.route('/crawler/settings/crawlers_to_lauch', methods=['GET', 'POST'])
@login_required @login_required
@ -583,13 +636,6 @@ def crawler_splash_setings_crawlers_to_lauch():
return render_template("settings_edit_crawlers_to_launch.html", return render_template("settings_edit_crawlers_to_launch.html",
nb_crawlers_to_launch=nb_crawlers_to_launch) nb_crawlers_to_launch=nb_crawlers_to_launch)
@crawler_splash.route('/crawler/settings/test_crawler', methods=['GET'])
@login_required
@login_admin
def crawler_splash_setings_test_crawler():
crawlers.test_ail_crawlers()
return redirect(url_for('crawler_splash.crawler_splash_setings'))
@crawler_splash.route('/crawler/settings/relaunch_crawler', methods=['GET']) @crawler_splash.route('/crawler/settings/relaunch_crawler', methods=['GET'])
@login_required @login_required
@login_admin @login_admin
@ -598,3 +644,59 @@ def crawler_splash_setings_relaunch_crawler():
return redirect(url_for('crawler_splash.crawler_splash_setings')) return redirect(url_for('crawler_splash.crawler_splash_setings'))
## - - ## ## - - ##
#### LACUS ####
@crawler_splash.route('/crawler/settings', methods=['GET'])
@login_required
@login_analyst
def crawler_settings():
lacus_url = crawlers.get_lacus_url()
api_key = crawlers.get_hidden_lacus_api_key()
is_manager_connected = crawlers.get_lacus_connection_metadata(force_ping=True)
is_crawler_working = crawlers.is_test_ail_crawlers_successful()
crawler_error_mess = crawlers.get_test_ail_crawlers_message()
# TODO REGISTER PROXY
# all_proxies = crawlers.get_all_proxies_metadata()
# nb_crawlers_to_launch = crawlers.get_nb_crawlers_to_launch()
# crawler_full_config = Config_DB.get_full_config_by_section('crawler')
return render_template("settings_crawler.html",
is_manager_connected=is_manager_connected,
lacus_url=lacus_url, api_key=api_key,
#all_proxies=all_proxies,
#nb_crawlers_to_launch=nb_crawlers_to_launch,
is_crawler_working=is_crawler_working,
crawler_error_mess=crawler_error_mess,
)
@crawler_splash.route('/crawler/settings/crawler/manager', methods=['GET', 'POST'])
@login_required
@login_admin
def crawler_lacus_settings_crawler_manager():
if request.method == 'POST':
lacus_url = request.form.get('lacus_url')
api_key = request.form.get('api_key')
res = crawlers.api_save_lacus_url_key({'url': lacus_url, 'api_key': api_key})
print(res)
if res[1] != 200:
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
else:
return redirect(url_for('crawler_splash.crawler_settings'))
else:
lacus_url = crawlers.get_lacus_url()
api_key = crawlers.get_lacus_api_key()
return render_template("settings_edit_lacus_crawler.html", lacus_url=lacus_url, api_key=api_key)
@crawler_splash.route('/crawler/settings/crawler/test', methods=['GET'])
@login_required
@login_admin
def crawler_settings_crawler_test():
crawlers.test_ail_crawlers()
return redirect(url_for('crawler_splash.crawler_settings'))
#--- LACUS ---#

View file

@ -53,7 +53,7 @@ def show_investigation():
investigation_uuid = request.args.get("uuid") investigation_uuid = request.args.get("uuid")
investigation = Investigations.Investigation(investigation_uuid) investigation = Investigations.Investigation(investigation_uuid)
metadata = investigation.get_metadata(r_str=True) metadata = investigation.get_metadata(r_str=True)
objs = ail_objects.get_objects_meta(investigation.get_objects(), icon=True, url=True, flask_context=True) objs = ail_objects.get_objects_meta(investigation.get_objects(), flask_context=True)
return render_template("view_investigation.html", bootstrap_label=bootstrap_label, return render_template("view_investigation.html", bootstrap_label=bootstrap_label,
metadata=metadata, investigation_objs=objs) metadata=metadata, investigation_objs=objs)

View file

@ -0,0 +1,82 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
'''
import os
import sys
import json
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort, send_file
from flask_login import login_required, current_user
# Import Role_Manager
from Role_Manager import login_admin, login_analyst, login_read_only
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.objects import Cves
from packages import Date
# ============ BLUEPRINT ============
objects_cve = Blueprint('objects_cve', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/cve'))
# ============ VARIABLES ============
bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
# ============ FUNCTIONS ============
@objects_cve.route("/objects/cve", methods=['GET'])
@login_required
@login_read_only
def objects_cves():
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
show_objects = request.args.get('show_objects')
date = Date.sanitise_date_range(date_from, date_to)
date_from = date['date_from']
date_to = date['date_to']
# barchart_type
# correlation_type_search_endpoint
dict_objects = Cves.api_get_cves_meta_by_daterange(date_from, date_to)
print(date_from, date_to, dict_objects)
return render_template("CveDaterange.html", date_from=date_from, date_to=date_to,
dict_objects=dict_objects, show_objects=show_objects)
@objects_cve.route("/objects/cve/post", methods=['POST'])
@login_required
@login_read_only
def objects_cves_post():
date_from = request.form.get('date_from')
date_to = request.form.get('date_to')
show_objects = request.form.get('show_objects')
return redirect(url_for('objects_cve.objects_cves', date_from=date_from, date_to=date_to, show_objects=show_objects))
@objects_cve.route("/objects/cve/range/json", methods=['GET'])
@login_required
@login_read_only
def objects_cve_range_json():
return None
@objects_cve.route("/objects/cve/search", methods=['POST'])
@login_required
@login_read_only
def objects_cve_search():
to_search = request.form.get('object_id')
# TODO SANITIZE ID
# TODO Search all
cve = Cves.Cve(to_search)
if not cve.exists():
abort(404)
else:
return redirect(cve.get_link(flask_context=True))
# ============= ROUTES ==============

View file

@ -36,16 +36,16 @@ bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
# ============= ROUTES ============== # ============= ROUTES ==============
@objects_item.route("/object/item") #completely shows the paste in a new tab @objects_item.route("/object/item")
@login_required @login_required
@login_read_only @login_read_only
def showItem(): # # TODO: support post def showItem(): # # TODO: support post
item_id = request.args.get('id') item_id = request.args.get('id')
if not item_id or not item_basic.exist_item(item_id): if not item_id or not item_basic.exist_item(item_id):
abort(404) abort(404)
item = Item(item_id) item = Item(item_id)
meta = item.get_meta(options=set(['content', 'crawler', 'duplicates', 'lines', 'size'])) meta = item.get_meta(options=['content', 'crawler', 'duplicates', 'lines', 'size'])
meta['name'] = meta['id'].replace('/', ' / ') meta['name'] = meta['id'].replace('/', ' / ')
meta['father'] = item_basic.get_item_parent(item_id) meta['father'] = item_basic.get_item_parent(item_id)
@ -94,4 +94,4 @@ def item_download(): # # TODO: support post
if not item_id or not item_basic.exist_item(item_id): if not item_id or not item_basic.exist_item(item_id):
abort(404) abort(404)
item = Item(item_id) item = Item(item_id)
return send_file(item.get_raw_content(), attachment_filename=item_id, as_attachment=True) return send_file(item.get_raw_content(), download_name=item_id, as_attachment=True)

View file

@ -17,7 +17,6 @@ import redis
import unicodedata import unicodedata
import uuid import uuid
from io import BytesIO from io import BytesIO
from Date import Date
from functools import wraps from functools import wraps
@ -31,9 +30,9 @@ from flask_login import login_required
# Import Project packages # Import Project packages
################################## ##################################
from lib import Tag from lib import Tag
from lib.objects.Items import Item
import Paste from packages import Import_helper
import Import_helper
from pytaxonomies import Taxonomies from pytaxonomies import Taxonomies
from pymispgalaxies import Galaxies, Clusters from pymispgalaxies import Galaxies, Clusters
@ -98,8 +97,6 @@ def limit_content_length():
# ============ FUNCTIONS ============ # ============ FUNCTIONS ============
def one():
return 1
def allowed_file(filename): def allowed_file(filename):
if not '.' in filename: if not '.' in filename:
@ -126,15 +123,14 @@ def date_to_str(date):
def misp_create_event(distribution, threat_level_id, analysis, info, l_tags, publish, path): def misp_create_event(distribution, threat_level_id, analysis, info, l_tags, publish, path):
paste = Paste.Paste(path) item = Item(path)
source = path.split('/')[-6:] source = item.get_source()
source = '/'.join(source)[:-3]
ail_uuid = r_serv_db.get('ail:uuid') ail_uuid = r_serv_db.get('ail:uuid')
pseudofile = BytesIO(paste.get_p_content().encode()) pseudofile = BytesIO(item.get_content(binary=True))
temp = paste._get_p_duplicate() temp = item.get_duplicates()
#beautifier # beautifier
if not temp: if not temp:
temp = '' temp = ''
@ -181,7 +177,7 @@ def misp_create_event(distribution, threat_level_id, analysis, info, l_tags, pub
leak_obj = MISPObject(obj_name) leak_obj = MISPObject(obj_name)
leak_obj.add_attribute('sensor', value=ail_uuid, type="text") leak_obj.add_attribute('sensor', value=ail_uuid, type="text")
leak_obj.add_attribute('origin', value=source, type='text') leak_obj.add_attribute('origin', value=source, type='text')
leak_obj.add_attribute('last-seen', value=date_to_str(paste.p_date), type='datetime') leak_obj.add_attribute('last-seen', value=date_to_str(item.get_date()), type='datetime')
leak_obj.add_attribute('raw-data', value=source, data=pseudofile, type="attachment") leak_obj.add_attribute('raw-data', value=source, data=pseudofile, type="attachment")
if p_duplicate_number > 0: if p_duplicate_number > 0:
@ -192,7 +188,8 @@ def misp_create_event(distribution, threat_level_id, analysis, info, l_tags, pub
templateID = [x['ObjectTemplate']['id'] for x in pymisp.get_object_templates_list()['response'] if x['ObjectTemplate']['name'] == obj_name][0] templateID = [x['ObjectTemplate']['id'] for x in pymisp.get_object_templates_list()['response'] if x['ObjectTemplate']['name'] == obj_name][0]
except IndexError: except IndexError:
valid_types = ", ".join([x['ObjectTemplate']['name'] for x in pymisp.get_object_templates_list()]) valid_types = ", ".join([x['ObjectTemplate']['name'] for x in pymisp.get_object_templates_list()])
print ("Template for type {} not found! Valid types are: {%s}".format(obj_name, valid_types)) print (f"Template for type {obj_name} not found! Valid types are: {valid_types}")
return False
r = pymisp.add_object(eventid, templateID, leak_obj) r = pymisp.add_object(eventid, templateID, leak_obj)
if 'errors' in r: if 'errors' in r:
print(r) print(r)
@ -206,7 +203,7 @@ def hive_create_case(hive_tlp, threat_level, hive_description, hive_case_title,
ail_uuid = r_serv_db.get('ail:uuid') ail_uuid = r_serv_db.get('ail:uuid')
source = path.split('/')[-6:] source = path.split('/')[-6:]
source = '/'.join(source)[:-3] source = '/'.join(source)[:-3]
# get paste date # get item date
var = path.split('/') var = path.split('/')
last_seen = "{0}-{1}-{2}".format(var[-4], var[-3], var[-2]) last_seen = "{0}-{1}-{2}".format(var[-4], var[-3], var[-2])

View file

@ -41,45 +41,6 @@ dic_type_name={'onion':'Onion', 'regular':'Website'}
# ============ FUNCTIONS ============ # ============ FUNCTIONS ============
def one():
return 1
def get_date_range(num_day):
curr_date = datetime.date.today()
date = Date( '{}{}{}'.format(str(curr_date.year), str(curr_date.month).zfill(2), str(curr_date.day).zfill(2)) )
date_list = []
for i in range(0, num_day):
date_list.append(date.substract_day(i))
return list(reversed(date_list))
def substract_date(date_from, date_to):
date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
delta = date_to - date_from # timedelta
l_date = []
for i in range(delta.days + 1):
date = date_from + datetime.timedelta(i)
l_date.append( date.strftime('%Y%m%d') )
return l_date
def unpack_paste_tags(p_tags):
l_tags = []
for tag in p_tags:
complete_tag = tag
tag = tag.split('=')
if len(tag) > 1:
if tag[1] != '':
tag = tag[1][1:-1]
# no value
else:
tag = tag[0][1:-1]
# use for custom tags
else:
tag = tag[0]
l_tags.append( (tag, complete_tag) )
return l_tags
def is_valid_domain(domain): def is_valid_domain(domain):
faup.decode(domain) faup.decode(domain)
@ -89,26 +50,6 @@ def is_valid_domain(domain):
else: else:
return False return False
def is_valid_service_type(service_type):
accepted_service = ['onion', 'regular']
if service_type in accepted_service:
return True
else:
return False
def get_onion_status(domain, date):
if r_serv_onion.sismember('onion_up:'+date , domain):
return True
else:
return False
def get_domain_type(domain):
type_id = domain.split(':')[-1]
if type_id == 'onion':
return 'onion'
else:
return 'regular'
def get_type_domain(domain): def get_type_domain(domain):
if domain is None: if domain is None:
type = 'regular' type = 'regular'
@ -133,18 +74,6 @@ def get_domain_from_url(url):
def get_last_domains_crawled(type): # DONE def get_last_domains_crawled(type): # DONE
return r_serv_onion.lrange('last_{}'.format(type), 0 ,-1) return r_serv_onion.lrange('last_{}'.format(type), 0 ,-1)
def get_nb_domains_inqueue(type):
nb = r_serv_onion.scard('{}_crawler_queue'.format(type))
nb += r_serv_onion.scard('{}_crawler_priority_queue'.format(type))
return nb
def get_stats_last_crawled_domains(type, date):
statDomains = {}
statDomains['domains_up'] = r_serv_onion.scard('{}_up:{}'.format(type, date))
statDomains['domains_down'] = r_serv_onion.scard('{}_down:{}'.format(type, date))
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
statDomains['domains_queue'] = get_nb_domains_inqueue(type)
return statDomains
def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None, auto_mode=False): def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None, auto_mode=False):
list_crawled_metadata = [] list_crawled_metadata = []
@ -201,22 +130,6 @@ def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None, aut
list_crawled_metadata.append(metadata_domain) list_crawled_metadata.append(metadata_domain)
return list_crawled_metadata return list_crawled_metadata
def get_crawler_splash_status(type):
crawler_metadata = []
all_crawlers = r_cache.smembers('{}_crawlers'.format(type))
for crawler in all_crawlers:
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
crawler_info = '{} - {}'.format(crawler, started_time)
if status_info=='Waiting' or status_info=='Crawling':
status=True
else:
status=False
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
return crawler_metadata
def delete_auto_crawler(url): def delete_auto_crawler(url):
domain = get_domain_from_url(url) domain = get_domain_from_url(url)
type = get_type_domain(domain) type = get_type_domain(domain)
@ -231,67 +144,6 @@ def delete_auto_crawler(url):
# ============= ROUTES ============== # ============= ROUTES ==============
# @hiddenServices.route("/crawlers/", methods=['GET'])
# @login_required
# @login_read_only
# def dashboard():
# crawler_metadata_onion = get_crawler_splash_status('onion')
# crawler_metadata_regular = get_crawler_splash_status('regular')
#
# now = datetime.datetime.now()
# date = now.strftime("%Y%m%d")
# statDomains_onion = get_stats_last_crawled_domains('onion', date)
# statDomains_regular = get_stats_last_crawled_domains('regular', date)
#
# return render_template("Crawler_dashboard.html", crawler_metadata_onion = crawler_metadata_onion,
# date=date,
# crawler_metadata_regular=crawler_metadata_regular,
# statDomains_onion=statDomains_onion, statDomains_regular=statDomains_regular)
@hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET'])
@login_required
@login_read_only
def crawler_splash_onion():
type = 'onion'
last_onions = get_last_domains_crawled(type)
list_onion = []
now = datetime.datetime.now()
date = now.strftime("%Y%m%d")
statDomains = get_stats_last_crawled_domains(type, date)
list_onion = get_last_crawled_domains_metadata(last_onions, date, type=type)
crawler_metadata = get_crawler_splash_status(type)
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
return render_template("Crawler_Splash_onion.html", last_onions=list_onion, statDomains=statDomains,
crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
@hiddenServices.route("/crawlers/Crawler_Splash_last_by_type", methods=['GET'])
@login_required
@login_read_only
def Crawler_Splash_last_by_type():
type = request.args.get('type')
# verify user input
if type not in list_types:
type = 'onion'
type_name = dic_type_name[type]
list_domains = []
now = datetime.datetime.now()
date = now.strftime("%Y%m%d")
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
statDomains = get_stats_last_crawled_domains(type, date)
list_domains = get_last_crawled_domains_metadata(get_last_domains_crawled(type), date, type=type)
crawler_metadata = get_crawler_splash_status(type)
return render_template("Crawler_Splash_last_by_type.html", type=type, type_name=type_name,
is_manager_connected=crawlers.get_splash_manager_connection_metadata(),
last_domains=list_domains, statDomains=statDomains,
crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
@hiddenServices.route("/crawlers/blacklisted_domains", methods=['GET']) @hiddenServices.route("/crawlers/blacklisted_domains", methods=['GET'])
@login_required @login_required
@login_read_only @login_read_only
@ -424,7 +276,7 @@ def auto_crawler():
return render_template("Crawler_auto.html", page=page, nb_page_max=nb_page_max, return render_template("Crawler_auto.html", page=page, nb_page_max=nb_page_max,
last_domains=last_domains, last_domains=last_domains,
is_manager_connected=crawlers.get_splash_manager_connection_metadata(), is_manager_connected=crawlers.get_lacus_connection_metadata(),
auto_crawler_domain_onions_metadata=auto_crawler_domain_onions_metadata, auto_crawler_domain_onions_metadata=auto_crawler_domain_onions_metadata,
auto_crawler_domain_regular_metadata=auto_crawler_domain_regular_metadata) auto_crawler_domain_regular_metadata=auto_crawler_domain_regular_metadata)
@ -439,285 +291,6 @@ def remove_auto_crawler():
delete_auto_crawler(url) delete_auto_crawler(url)
return redirect(url_for('hiddenServices.auto_crawler', page=page)) return redirect(url_for('hiddenServices.auto_crawler', page=page))
# # TODO: refractor
@hiddenServices.route("/hiddenServices/last_crawled_domains_with_stats_json", methods=['GET'])
@login_required
@login_read_only
def last_crawled_domains_with_stats_json():
last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
list_onion = []
now = datetime.datetime.now()
date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d"))
statDomains = {}
statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
for onion in last_onions:
metadata_onion = {}
metadata_onion['domain'] = onion
metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check')
if metadata_onion['last_check'] is None:
metadata_onion['last_check'] = '********'
metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen')
if metadata_onion['first_seen'] is None:
metadata_onion['first_seen'] = '********'
if get_onion_status(onion, metadata_onion['last_check']):
metadata_onion['status_text'] = 'UP'
metadata_onion['status_color'] = 'Green'
metadata_onion['status_icon'] = 'fa-check-circle'
else:
metadata_onion['status_text'] = 'DOWN'
metadata_onion['status_color'] = 'Red'
metadata_onion['status_icon'] = 'fa-times-circle'
list_onion.append(metadata_onion)
crawler_metadata=[]
all_onion_crawler = r_cache.smembers('all_crawler:onion')
for crawler in all_onion_crawler:
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
crawler_info = '{} - {}'.format(crawler, started_time)
if status_info=='Waiting' or status_info=='Crawling':
status=True
else:
status=False
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
return jsonify({'last_onions': list_onion, 'statDomains': statDomains, 'crawler_metadata':crawler_metadata})
@hiddenServices.route("/hiddenServices/get_onions_by_daterange", methods=['POST'])
@login_required
@login_read_only
def get_onions_by_daterange():
date_from = request.form.get('date_from')
date_to = request.form.get('date_to')
service_type = request.form.get('service_type')
domains_up = request.form.get('domains_up')
domains_down = request.form.get('domains_down')
domains_tags = request.form.get('domains_tags')
return redirect(url_for('hiddenServices.show_domains_by_daterange', date_from=date_from, date_to=date_to, service_type=service_type, domains_up=domains_up, domains_down=domains_down, domains_tags=domains_tags))
@hiddenServices.route("/hiddenServices/show_domains_by_daterange", methods=['GET'])
@login_required
@login_read_only
def show_domains_by_daterange():
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
service_type = request.args.get('service_type')
domains_up = request.args.get('domains_up')
domains_down = request.args.get('domains_down')
domains_tags = request.args.get('domains_tags')
# incorrect service type
if not is_valid_service_type(service_type):
service_type = 'onion'
type_name = dic_type_name[service_type]
date_range = []
if date_from is not None and date_to is not None:
#change format
try:
if len(date_from) != 8:
date_from = date_from[0:4] + date_from[5:7] + date_from[8:10]
date_to = date_to[0:4] + date_to[5:7] + date_to[8:10]
date_range = substract_date(date_from, date_to)
except:
pass
if not date_range:
date_range.append(datetime.date.today().strftime("%Y%m%d"))
date_from = date_range[0][0:4] + '-' + date_range[0][4:6] + '-' + date_range[0][6:8]
date_to = date_from
else:
date_from = date_from[0:4] + '-' + date_from[4:6] + '-' + date_from[6:8]
date_to = date_to[0:4] + '-' + date_to[4:6] + '-' + date_to[6:8]
statDomains = {}
statDomains['domains_up'] = 0
statDomains['domains_down'] = 0
statDomains['total'] = 0
statDomains['domains_queue'] = get_nb_domains_inqueue(service_type)
domains_by_day = {}
domain_metadata = {}
stats_by_date = {}
for date in date_range:
stats_by_date[date] = {}
stats_by_date[date]['domain_up'] = 0
stats_by_date[date]['domain_down'] = 0
if domains_up:
domains_up = True
domains_by_day[date] = list(r_serv_onion.smembers('{}_up:{}'.format(service_type, date)))
for domain in domains_by_day[date]:
h = HiddenServices(domain, 'onion')
domain_metadata[domain] = {}
if domains_tags:
domains_tags = True
domain_metadata[domain]['tags'] = h.get_domain_tags(update=True)
domain_metadata[domain]['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'last_check')
if domain_metadata[domain]['last_check'] is None:
domain_metadata[domain]['last_check'] = '********'
domain_metadata[domain]['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'first_seen')
if domain_metadata[domain]['first_seen'] is None:
domain_metadata[domain]['first_seen'] = '********'
domain_metadata[domain]['status_text'] = 'UP'
domain_metadata[domain]['status_color'] = 'Green'
domain_metadata[domain]['status_icon'] = 'fa-check-circle'
statDomains['domains_up'] += 1
stats_by_date[date]['domain_up'] += 1
if domains_down:
domains_down = True
domains_by_day_down = list(r_serv_onion.smembers('{}_down:{}'.format(service_type, date)))
if domains_up:
domains_by_day[date].extend(domains_by_day_down)
else:
domains_by_day[date] = domains_by_day_down
for domain in domains_by_day_down:
#h = HiddenServices(onion_domain, 'onion')
domain_metadata[domain] = {}
#domain_metadata[domain]['tags'] = h.get_domain_tags()
domain_metadata[domain]['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'last_check')
if domain_metadata[domain]['last_check'] is None:
domain_metadata[domain]['last_check'] = '********'
domain_metadata[domain]['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'first_seen')
if domain_metadata[domain]['first_seen'] is None:
domain_metadata[domain]['first_seen'] = '********'
domain_metadata[domain]['status_text'] = 'DOWN'
domain_metadata[domain]['status_color'] = 'Red'
domain_metadata[domain]['status_icon'] = 'fa-times-circle'
statDomains['domains_down'] += 1
stats_by_date[date]['domain_down'] += 1
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
return render_template("domains.html", date_range=date_range, domains_by_day=domains_by_day,
statDomains=statDomains, type_name=type_name,
domain_metadata=domain_metadata,
stats_by_date=stats_by_date,
date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down,
domains_tags=domains_tags, type=service_type, bootstrap_label=bootstrap_label)
@hiddenServices.route("/crawlers/download_domain", methods=['GET'])
@login_required
@login_read_only
@no_cache
def download_domain():
domain = request.args.get('domain')
epoch = request.args.get('epoch')
try:
epoch = int(epoch)
except:
epoch = None
port = request.args.get('port')
faup.decode(domain)
unpack_url = faup.get()
## TODO: # FIXME: remove me
try:
domain = unpack_url['domain'].decode()
except:
domain = unpack_url['domain']
if not port:
if unpack_url['port']:
try:
port = unpack_url['port'].decode()
except:
port = unpack_url['port']
else:
port = 80
try:
port = int(port)
except:
port = 80
type = get_type_domain(domain)
if domain is None or not r_serv_onion.exists('{}_metadata:{}'.format(type, domain)):
return '404'
# # TODO: FIXME return 404
origin_paste = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'paste_parent')
h = HiddenServices(domain, type, port=port)
item_core = h.get_domain_crawled_core_item(epoch=epoch)
if item_core:
l_pastes = h.get_last_crawled_pastes(item_root=item_core['root_item'])
else:
l_pastes = []
#dict_links = h.get_all_links(l_pastes)
zip_file = h.create_domain_basic_archive(l_pastes)
filename = domain + '.zip'
return send_file(zip_file, attachment_filename=filename, as_attachment=True)
@hiddenServices.route("/hiddenServices/onion_son", methods=['GET'])
@login_required
@login_analyst
def onion_son():
onion_domain = request.args.get('onion_domain')
h = HiddenServices(onion_domain, 'onion')
l_pastes = h.get_last_crawled_pastes()
l_son = h.get_domain_son(l_pastes)
return 'l_son'
# ============= JSON ==============
@hiddenServices.route("/hiddenServices/domain_crawled_7days_json", methods=['GET'])
@login_required
@login_read_only
def domain_crawled_7days_json():
type = 'onion'
## TODO: # FIXME: 404 error
date_range = get_date_range(7)
json_domain_stats = []
#try:
for date in date_range:
nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date))
nb_domain_down = r_serv_onion.scard('{}_up:{}'.format(type, date))
date = date[0:4] + '-' + date[4:6] + '-' + date[6:8]
json_domain_stats.append({ 'date': date, 'value': int( nb_domain_up ), 'nb_domain_down': int( nb_domain_down )})
#except:
#return jsonify()
return jsonify(json_domain_stats)
@hiddenServices.route('/hiddenServices/domain_crawled_by_type_json')
@login_required
@login_read_only
def domain_crawled_by_type_json():
current_date = request.args.get('date')
type = request.args.get('type')
if type in list_types:
num_day_type = 7
date_range = get_date_range(num_day_type)
range_decoder = []
for date in date_range:
day_crawled = {}
day_crawled['date']= date[0:4] + '-' + date[4:6] + '-' + date[6:8]
day_crawled['UP']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date))
day_crawled['DOWN']= nb_domain_up = r_serv_onion.scard('{}_down:{}'.format(type, date))
range_decoder.append(day_crawled)
return jsonify(range_decoder)
else:
return jsonify('Incorrect Type')
# ========= REGISTRATION ========= # ========= REGISTRATION =========
app.register_blueprint(hiddenServices, url_prefix=baseUrl) app.register_blueprint(hiddenServices, url_prefix=baseUrl)

View file

@ -1,476 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>AIL-Framework</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
<style>
.bar {
fill: steelblue;
}
.bar:hover{
fill: brown;
cursor: pointer;
}
.bar_stack:hover{
cursor: pointer;
}
div.tooltip {
position: absolute;
text-align: center;
padding: 2px;
font: 12px sans-serif;
background: #ebf4fb;
border: 2px solid #b7ddf2;
border-radius: 8px;
pointer-events: none;
color: #000000;
}
</style>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="row">
<div class="col-12 col-xl-6">
<div class="table-responsive mt-1 table-hover table-borderless table-striped">
<table class="table">
<thead class="thead-dark">
<tr>
<th>Domain</th>
<th>First Seen</th>
<th>Last Check</th>
<th>Status</th>
</tr>
</thead>
<tbody id="tbody_last_crawled">
{% for metadata_onion in last_onions %}
<tr>
<td><a target="_blank" href="{{ url_for('hiddenServices.onion_domain') }}?onion_domain={{ metadata_onion['domain'] }}">{{ metadata_onion['domain'] }}</a></td>
<td>{{'{}/{}/{}'.format(metadata_onion['first_seen'][0:4], metadata_onion['first_seen'][4:6], metadata_onion['first_seen'][6:8])}}</td>
<td>{{'{}/{}/{}'.format(metadata_onion['last_check'][0:4], metadata_onion['last_check'][4:6], metadata_onion['last_check'][6:8])}}</td>
<td><div style="color:{{metadata_onion['status_color']}}; display:inline-block">
<i class="fas {{metadata_onion['status_icon']}} "></i>
{{metadata_onion['status_text']}}
</div>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<a href="{{ url_for('hiddenServices.blacklisted_onion') }}">
<button type="button" class="btn btn-outline-danger">Show Blacklisted Onion</button>
</a>
</div>
<div class="col-12 col-xl-6">
<div class="card text-white bg-dark mb-3 mt-1">
<div class="card-header">
<div class="row">
<div class="col-6">
<span class="badge badge-success">{{ statDomains['domains_up'] }}</span> UP
<span class="badge badge-danger ml-md-3">{{ statDomains['domains_down'] }}</span> DOWN
</div>
<div class="col-6">
<span class="badge badge-success">{{ statDomains['total'] }}</span> Crawled
<span class="badge badge-warning ml-md-3">{{ statDomains['domains_queue'] }}</span> Queue
</div>
</div>
</div>
<div class="card-body">
<h5 class="card-title">Select domains by date range :</h5>
<p class="card-text">Some quick example text to build on the card title and make up the bulk of the card's content.</p>
<form action="{{ url_for('hiddenServices.get_onions_by_daterange') }}" id="hash_selector_form" method='post'>
<div class="row">
<div class="col-6">
<div class="input-group" id="date-range-from">
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
<input class="form-control" id="date-range-from-input" placeholder="yyyy-mm-dd" value="{{ date_from }}" name="date_from">
</div>
<div class="input-group" id="date-range-to">
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
<input class="form-control" id="date-range-to-input" placeholder="yyyy-mm-dd" value="{{ date_to }}" name="date_to">
</div>
</div>
<div class="col-6">
<div class="custom-control custom-switch">
<input class="custom-control-input" type="checkbox" name="domains_up" value="True" id="domains_up_id" checked>
<label class="custom-control-label" for="domains_up_id">
<span class="badge badge-success"><i class="fas fa-check-circle"></i> Domains UP </span>
</label>
</div>
<div class="custom-control custom-switch">
<input class="custom-control-input" type="checkbox" name="domains_down" value="True" id="domains_down_id">
<label class="custom-control-label" for="domains_down_id">
<span class="badge badge-danger"><i class="fas fa-times-circle"></i> Domains DOWN</span>
</label>
</div>
<div class="custom-control custom-switch mt-2">
<input class="custom-control-input" type="checkbox" name="domains_tags" value="True" id="domains_tags_id">
<label class="custom-control-label" for="domains_tags_id">
<span class="badge badge-dark"><i class="fas fa-tags"></i> Domains Tags</span>
</label>
</div>
</div>
</div>
<button class="btn btn-primary">
<i class="fas fa-eye"></i> Show Onions
</button>
<form>
</div>
</div>
<div id="barchart_type">
</div>
<div class="card mt-1 mb-1">
<div class="card-header text-white bg-dark">
Crawlers Status
</div>
<div class="card-body px-0 py-0 ">
<table class="table">
<tbody id="tbody_crawler_info">
{% for crawler in crawler_metadata %}
<tr>
<td>
<i class="fas fa-{%if crawler['status']%}check{%else%}times{%endif%}-circle" style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};"></i> {{crawler['crawler_info']}}
</td>
<td>
{{crawler['crawling_domain']}}
</td>
<td style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};">
{{crawler['status_info']}}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</body>
<script>
var chart = {};
$(document).ready(function(){
$("#page-Crawler").addClass("active");
$("#nav_onion_crawler").addClass("active");
$('#date-range-from').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
$('#date-range-to').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
chart.stackBarChart =barchart_type_stack("{{ url_for('hiddenServices.automatic_onion_crawler_json') }}", 'id');
chart.onResize();
$(window).on("resize", function() {
chart.onResize();
});
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
</script>
<script>/*
function refresh_list_crawled(){
$.getJSON("{{ url_for('hiddenServices.last_crawled_domains_with_stats_json') }}",
function(data) {
var tableRef = document.getElementById('tbody_last_crawled');
$("#tbody_last_crawled").empty()
for (var i = 0; i < data.last_onions.length; i++) {
var data_domain = data.last_onions[i]
var newRow = tableRef.insertRow(tableRef.rows.length);
var newCell = newRow.insertCell(0);
newCell.innerHTML = "<td><a target=\"_blank\" href=\"{{ url_for('hiddenServices.onion_domain') }}?onion_domain="+data_domain['domain']+"\">"+data_domain['domain']+"</a></td>";
newCell = newRow.insertCell(1);
newCell.innerHTML = "<td>"+data_domain['first_seen'].substr(0, 4)+"/"+data_domain['first_seen'].substr(4, 2)+"/"+data_domain['first_seen'].substr(6, 2)+"</td>"
newCell = newRow.insertCell(2);
newCell.innerHTML = "<td>"+data_domain['last_check'].substr(0, 4)+"/"+data_domain['last_check'].substr(4, 2)+"/"+data_domain['last_check'].substr(6, 2)+"</td>"
newCell = newRow.insertCell(3);
newCell.innerHTML = "<td><div style=\"color:"+data_domain['status_color']+"; display:inline-block\"><i class=\"fa "+data_domain['status_icon']+" fa-2x\"></i>"+data_domain['status_text']+"</div></td>"
}
var statDomains = data.statDomains
document.getElementById('text_domain_up').innerHTML = statDomains['domains_up']
document.getElementById('text_domain_down').innerHTML = statDomains['domains_down']
document.getElementById('text_domain_queue').innerHTML = statDomains['domains_queue']
document.getElementById('text_total_domains').innerHTML = statDomains['total']
if(data.crawler_metadata.length!=0){
$("#tbody_crawler_info").empty();
var tableRef = document.getElementById('tbody_crawler_info');
for (var i = 0; i < data.crawler_metadata.length; i++) {
var crawler = data.crawler_metadata[i];
var newRow = tableRef.insertRow(tableRef.rows.length);
var text_color;
var icon;
if(crawler['status']){
text_color = 'Green';
icon = 'check';
} else {
text_color = 'Red';
icon = 'times';
}
var newCell = newRow.insertCell(0);
newCell.innerHTML = "<td><i class=\"fa fa-"+icon+"-circle\" style=\"color:"+text_color+";\"></i>"+crawler['crawler_info']+"</td>";
newCell = newRow.insertCell(1);
newCell.innerHTML = "<td><a target=\"_blank\" href=\"{{ url_for('hiddenServices.onion_domain') }}?onion_domain="+crawler['crawling_domain']+"\">"+crawler['crawling_domain']+"</a></td>";
newCell = newRow.insertCell(2);
newCell.innerHTML = "<td><div style=\"color:"+text_color+";\">"+crawler['status_info']+"</div></td>";
$("#panel_crawler").show();
}
} else {
$("#panel_crawler").hide();
}
}
);
if (to_refresh) {
setTimeout("refresh_list_crawled()", 10000);
}
}*/
</script>
<script>
var margin = {top: 20, right: 90, bottom: 55, left: 0},
width = parseInt(d3.select('#barchart_type').style('width'), 10);
width = 1000 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var x = d3.scaleBand().rangeRound([0, width]).padding(0.1);
var y = d3.scaleLinear().rangeRound([height, 0]);
var xAxis = d3.axisBottom(x);
var yAxis = d3.axisLeft(y);
var color = d3.scaleOrdinal(d3.schemeSet3);
var svg = d3.select("#barchart_type").append("svg")
.attr("id", "thesvg")
.attr("viewBox", "0 0 "+width+" 500")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
function barchart_type_stack(url, id) {
d3.json(url)
.then(function(data){
var labelVar = 'date'; //A
var varNames = d3.keys(data[0])
.filter(function (key) { return key !== labelVar;}); //B
data.forEach(function (d) { //D
var y0 = 0;
d.mapping = varNames.map(function (name) {
return {
name: name,
label: d[labelVar],
y0: y0,
y1: y0 += +d[name]
};
});
d.total = d.mapping[d.mapping.length - 1].y1;
});
x.domain(data.map(function (d) { return (d.date); })); //E
y.domain([0, d3.max(data, function (d) { return d.total; })]);
svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis)
.selectAll("text")
.attr("class", "bar")
.on("click", function (d) { window.location.href = "#" })
.attr("transform", "rotate(-18)" )
//.attr("transform", "rotate(-40)" )
.style("text-anchor", "end");
svg.append("g")
.attr("class", "y axis")
.call(yAxis)
.append("text")
.attr("transform", "rotate(-90)")
.attr("y", 6)
.attr("dy", ".71em")
.style("text-anchor", "end");
var selection = svg.selectAll(".series")
.data(data)
.enter().append("g")
.attr("class", "series")
.attr("transform", function (d) { return "translate(" + x((d.date)) + ",0)"; });
selection.selectAll("rect")
.data(function (d) { return d.mapping; })
.enter().append("rect")
.attr("class", "bar_stack")
.attr("width", x.bandwidth())
.attr("y", function (d) { return y(d.y1); })
.attr("height", function (d) { return y(d.y0) - y(d.y1); })
.style("fill", function (d) { return color(d.name); })
.style("stroke", "grey")
.on("mouseover", function (d) { showPopover.call(this, d); })
.on("mouseout", function (d) { removePopovers(); })
.on("click", function(d){ window.location.href = "#" });
data.forEach(function(d) {
if(d.total != 0){
svg.append("text")
.attr("class", "bar")
.attr("dy", "-.35em")
.attr('x', x(d.date) + x.bandwidth()/2)
.attr('y', y(d.total))
.on("click", function () {window.location.href = "#" })
.style("text-anchor", "middle")
.text(d.total);
}
});
drawLegend(varNames);
});
}
function drawLegend (varNames) {
var legend = svg.selectAll(".legend")
.data(varNames.slice().reverse())
.enter().append("g")
.attr("class", "legend")
.attr("transform", function (d, i) { return "translate(0," + i * 20 + ")"; });
legend.append("rect")
.attr("x", 943)
.attr("width", 10)
.attr("height", 10)
.style("fill", color)
.style("stroke", "grey");
legend.append("text")
.attr("class", "svgText")
.attr("x", 941)
.attr("y", 6)
.attr("dy", ".35em")
.style("text-anchor", "end")
.text(function (d) { return d; });
}
function removePopovers () {
$('.popover').each(function() {
$(this).remove();
});
}
function showPopover (d) {
$(this).popover({
title: d.name,
placement: 'top',
container: 'body',
trigger: 'manual',
html : true,
content: function() {
return d.label +
"<br/>num: " + d3.format(",")(d.value ? d.value: d.y1 - d.y0); }
});
$(this).popover('show')
}
chart.onResize = function () {
var aspect = width / height, chart = $("#thesvg");
var targetWidth = chart.parent().width();
chart.attr("width", targetWidth);
chart.attr("height", targetWidth / 2);
}
window.chart = chart;
</script>

View file

@ -4,7 +4,6 @@
''' '''
Flask functions and routes for the trending modules page Flask functions and routes for the trending modules page
''' '''
import redis
import json import json
import os import os
import datetime import datetime
@ -14,11 +13,12 @@ from flask import Flask, render_template, jsonify, request, Blueprint
from Role_Manager import login_admin, login_analyst from Role_Manager import login_admin, login_analyst
from flask_login import login_required from flask_login import login_required
import Paste
from whoosh import index from whoosh import index
from whoosh.fields import Schema, TEXT, ID from whoosh.fields import Schema, TEXT, ID
from whoosh.qparser import QueryParser from whoosh.qparser import QueryParser
from lib.objects.Items import Item
import time import time
# ============ VARIABLES ============ # ============ VARIABLES ============
@ -27,7 +27,6 @@ import Flask_config
app = Flask_config.app app = Flask_config.app
config_loader = Flask_config.config_loader config_loader = Flask_config.config_loader
baseUrl = Flask_config.baseUrl baseUrl = Flask_config.baseUrl
r_serv_metadata = Flask_config.r_serv_metadata
max_preview_char = Flask_config.max_preview_char max_preview_char = Flask_config.max_preview_char
max_preview_modal = Flask_config.max_preview_modal max_preview_modal = Flask_config.max_preview_modal
bootstrap_label = Flask_config.bootstrap_label bootstrap_label = Flask_config.bootstrap_label
@ -128,15 +127,14 @@ def search():
for x in results: for x in results:
r.append(x.items()[0][1].replace(PASTES_FOLDER, '', 1)) r.append(x.items()[0][1].replace(PASTES_FOLDER, '', 1))
path = x.items()[0][1].replace(PASTES_FOLDER, '', 1) path = x.items()[0][1].replace(PASTES_FOLDER, '', 1)
paste = Paste.Paste(path) item = Item(path)
content = paste.get_p_content() content = item.get_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
c.append(content[0:content_range]) c.append(content[0:content_range])
curr_date = str(paste._get_p_date()) curr_date = item.get_date(separator=True)
curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:]
paste_date.append(curr_date) paste_date.append(curr_date)
paste_size.append(paste._get_p_size()) paste_size.append(item.get_size())
p_tags = r_serv_metadata.smembers('tag:'+path) p_tags = item.get_tags()
l_tags = [] l_tags = []
for tag in p_tags: for tag in p_tags:
complete_tag = tag complete_tag = tag
@ -205,15 +203,14 @@ def get_more_search_result():
path = x.items()[0][1] path = x.items()[0][1]
path = path.replace(PASTES_FOLDER, '', 1) path = path.replace(PASTES_FOLDER, '', 1)
path_array.append(path) path_array.append(path)
paste = Paste.Paste(path) item = Item(path)
content = paste.get_p_content() content = item.get_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
preview_array.append(content[0:content_range]) preview_array.append(content[0:content_range])
curr_date = str(paste._get_p_date()) curr_date = item.get_date(separator=True)
curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:]
date_array.append(curr_date) date_array.append(curr_date)
size_array.append(paste._get_p_size()) size_array.append(item.get_size())
p_tags = r_serv_metadata.smembers('tag:'+path) p_tags = item.get_tags()
l_tags = [] l_tags = []
for tag in p_tags: for tag in p_tags:
complete_tag = tag complete_tag = tag

View file

@ -4,7 +4,6 @@
''' '''
Flask functions and routes for the trending modules page Flask functions and routes for the trending modules page
''' '''
import redis
import json import json
import os import os
import sys import sys
@ -15,17 +14,14 @@ from Role_Manager import login_admin, login_analyst, login_read_only, no_cache
from flask_login import login_required from flask_login import login_required
import difflib import difflib
import ssdeep
import Paste import Paste
import requests import requests
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Tag
import Item import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import Domain
# ============ VARIABLES ============ # ============ VARIABLES ============
import Flask_config import Flask_config
@ -52,214 +48,11 @@ showsavedpastes = Blueprint('showsavedpastes', __name__, template_folder='templa
def get_item_screenshot_path(item): def get_item_screenshot_path(item):
screenshot = r_serv_metadata.hget('paste_metadata:{}'.format(item), 'screenshot') screenshot = r_serv_metadata.hget('paste_metadata:{}'.format(item), 'screenshot')
if screenshot: if screenshot:
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8], screenshot[8:10], screenshot[10:12], screenshot[12:]) screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8], screenshot[8:10], screenshot[10:12], screenshot[12:])
else: else:
screenshot = '' screenshot = ''
return screenshot return screenshot
def showpaste(content_range, requested_path):
if PASTES_FOLDER not in requested_path:
# remove full path
requested_path_full = os.path.join(requested_path, PASTES_FOLDER)
else:
requested_path_full = requested_path
requested_path = requested_path.replace(PASTES_FOLDER, '', 1)
# escape directory transversal
if os.path.commonprefix((requested_path_full,PASTES_FOLDER)) != PASTES_FOLDER:
return 'path transversal detected'
vt_enabled = Flask_config.vt_enabled
try:
paste = Paste.Paste(requested_path)
except FileNotFoundError:
abort(404)
p_date = str(paste._get_p_date())
p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4]
p_source = paste.p_source
p_encoding = paste._get_p_encoding()
p_language = 'None'
p_size = paste.p_size
p_mime = paste.p_mime
p_lineinfo = paste.get_lines_info()
p_content = paste.get_p_content()
p_duplicate_str_full_list = paste._get_p_duplicate()
p_duplicate_full_list = []
p_duplicate_list = []
p_simil_list = []
p_date_list = []
p_hashtype_list = []
for dup_list in p_duplicate_str_full_list:
dup_list = dup_list[1:-1].replace('\'', '').replace(' ', '').split(',')
if dup_list[0] == "tlsh":
dup_list[2] = 100 - int(dup_list[2])
else:
dup_list[2] = int(dup_list[2])
p_duplicate_full_list.append(dup_list)
#p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True)
# Combine multiple duplicate paste name and format for display
new_dup_list = []
dup_list_removed = []
for dup_list_index in range(0, len(p_duplicate_full_list)):
if dup_list_index in dup_list_removed:
continue
indices = [i for i, x in enumerate(p_duplicate_full_list) if x[1] == p_duplicate_full_list[dup_list_index][1]]
hash_types = []
comp_vals = []
for i in indices:
hash_types.append(p_duplicate_full_list[i][0])
comp_vals.append(p_duplicate_full_list[i][2])
dup_list_removed.append(i)
#hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types)
#comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals)
if len(p_duplicate_full_list[dup_list_index]) > 3:
try:
date_paste = str(int(p_duplicate_full_list[dup_list_index][3]))
date_paste = date_paste[0:4]+"-"+date_paste[4:6]+"-"+date_paste[6:8]
except ValueError:
date_paste = str(p_duplicate_full_list[dup_list_index][3])
else:
date_paste = "No date available"
new_dup_list.append([hash_types, p_duplicate_full_list[dup_list_index][1], comp_vals, date_paste])
# Create the list to pass to the webpage
for dup_list in new_dup_list:
hash_type, path, simil_percent, date_paste = dup_list
p_duplicate_list.append(path)
p_simil_list.append(simil_percent)
p_hashtype_list.append(hash_type)
p_date_list.append(date_paste)
if content_range != 0:
p_content = p_content[0:content_range]
#active taxonomies
active_taxonomies = r_serv_tags.smembers('active_taxonomies')
l_tags = r_serv_metadata.smembers('tag:'+requested_path)
tags_safe = Tag.is_tags_safe(l_tags)
#active galaxies
active_galaxies = r_serv_tags.smembers('active_galaxies')
list_tags = []
for tag in l_tags:
if(tag[9:28] == 'automatic-detection'):
automatic = True
else:
automatic = False
if r_serv_statistics.sismember('tp:'+tag, requested_path):
tag_status_tp = True
else:
tag_status_tp = False
if r_serv_statistics.sismember('fp:'+tag, requested_path):
tag_status_fp = True
else:
tag_status_fp = False
list_tags.append( (tag, automatic, tag_status_tp, tag_status_fp) )
l_64 = []
# load hash files
if r_serv_metadata.scard('hash_paste:'+requested_path) > 0:
set_b64 = r_serv_metadata.smembers('hash_paste:'+requested_path)
for hash in set_b64:
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
# item list not updated
if nb_in_file is None:
l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1)
for paste_name in l_pastes:
# dynamic update
if PASTES_FOLDER in paste_name:
score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste_name)
r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste_name)
paste_name = paste_name.replace(PASTES_FOLDER, '', 1)
r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste_name)
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
nb_in_file = int(nb_in_file)
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
file_type = estimated_type.split('/')[0]
# set file icon
if file_type == 'application':
file_icon = 'fa-file-o '
elif file_type == 'audio':
file_icon = 'fa-file-video-o '
elif file_type == 'image':
file_icon = 'fa-file-image-o'
elif file_type == 'text':
file_icon = 'fa-file-text-o'
else:
file_icon = 'fa-file'
saved_path = r_serv_metadata.hget('metadata_hash:'+hash, 'saved_path')
if r_serv_metadata.hexists('metadata_hash:'+hash, 'vt_link'):
b64_vt = True
b64_vt_link = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_link')
b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report')
else:
b64_vt = False
b64_vt_link = ''
b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report')
# hash never refreshed
if b64_vt_report is None:
b64_vt_report = ''
l_64.append( (file_icon, estimated_type, hash, saved_path, nb_in_file, b64_vt, b64_vt_link, b64_vt_report) )
crawler_metadata = {}
if 'infoleak:submission="crawler"' in l_tags:
crawler_metadata['get_metadata'] = True
crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain')
crawler_metadata['domain'] = crawler_metadata['domain'].rsplit(':', 1)[0]
if tags_safe:
tags_safe = Tag.is_tags_safe(Domain.get_domain_tags(crawler_metadata['domain']))
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father')
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link')
crawler_metadata['screenshot'] = get_item_screenshot_path(requested_path)
else:
crawler_metadata['get_metadata'] = False
item_parent = Item.get_item_parent(requested_path)
if Flask_config.pymisp is False:
misp = False
else:
misp = True
if Flask_config.HiveApi is False:
hive = False
else:
hive = True
misp_event = r_serv_metadata.get('misp_events:' + requested_path)
if misp_event is None:
misp_eventid = False
misp_url = ''
else:
misp_eventid = True
misp_url = misp_event_url + misp_event
hive_case = r_serv_metadata.get('hive_cases:' + requested_path)
if hive_case is None:
hive_caseid = False
hive_url = ''
else:
hive_caseid = True
hive_url = hive_case_url.replace('id_here', hive_case)
return render_template("show_saved_paste.html", date=p_date, bootstrap_label=bootstrap_label, active_taxonomies=active_taxonomies, active_galaxies=active_galaxies, list_tags=list_tags, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list, date_list=p_date_list,
crawler_metadata=crawler_metadata, tags_safe=tags_safe, item_parent=item_parent,
l_64=l_64, vt_enabled=vt_enabled, misp=misp, hive=hive, misp_eventid=misp_eventid, misp_url=misp_url, hive_caseid=hive_caseid, hive_url=hive_url)
def get_item_basic_info(item): def get_item_basic_info(item):
item_basic_info = {} item_basic_info = {}
item_basic_info['date'] = str(item.get_p_date()) item_basic_info['date'] = str(item.get_p_date())
@ -286,7 +79,7 @@ def show_item_min(requested_path , content_range=0):
else: else:
relative_path = requested_path.replace(PASTES_FOLDER, '', 1) relative_path = requested_path.replace(PASTES_FOLDER, '', 1)
# remove old full path # remove old full path
#requested_path = requested_path.replace(PASTES_FOLDER, '') # requested_path = requested_path.replace(PASTES_FOLDER, '')
# escape directory transversal # escape directory transversal
if os.path.commonprefix((os.path.realpath(requested_path),PASTES_FOLDER)) != PASTES_FOLDER: if os.path.commonprefix((os.path.realpath(requested_path),PASTES_FOLDER)) != PASTES_FOLDER:
return 'path transversal detected' return 'path transversal detected'
@ -370,7 +163,7 @@ def show_item_min(requested_path , content_range=0):
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+relative_path, 'father') crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+relative_path, 'father')
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+relative_path,'real_link') crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+relative_path,'real_link')
crawler_metadata['screenshot'] = get_item_screenshot_path(relative_path) crawler_metadata['screenshot'] = get_item_screenshot_path(relative_path)
#crawler_metadata['har_file'] = Item.get_item_har(relative_path) # crawler_metadata['har_file'] = Item.get_item_har(relative_path)
else: else:
crawler_metadata['get_metadata'] = False crawler_metadata['get_metadata'] = False
@ -462,13 +255,6 @@ def showDiff():
def screenshot(filename): def screenshot(filename):
return send_from_directory(SCREENSHOT_FOLDER, filename+'.png', as_attachment=True) return send_from_directory(SCREENSHOT_FOLDER, filename+'.png', as_attachment=True)
# @showsavedpastes.route('/har/paste/<path:filename>')
# @login_required
# @login_read_only
# def har(filename):
# har_file = Item.get_item_har(filename)
# return jsonify(har_file)
@showsavedpastes.route('/send_file_to_vt/', methods=['POST']) @showsavedpastes.route('/send_file_to_vt/', methods=['POST'])
@login_required @login_required
@login_analyst @login_analyst

View file

@ -15,14 +15,11 @@ from flask import Flask, render_template, jsonify, request, Blueprint, url_for,
from Role_Manager import login_admin, login_analyst, login_user_no_api, login_read_only from Role_Manager import login_admin, login_analyst, login_user_no_api, login_read_only
from flask_login import login_required, current_user from flask_login import login_required, current_user
import re
from pprint import pprint
import Levenshtein import Levenshtein
# --------------------------------------------------------------- # ---------------------------------------------------------------
import Paste from lib.objects.Items import Item
import Term import Term
# ============ VARIABLES ============ # ============ VARIABLES ============
@ -262,21 +259,21 @@ def credentials_tracker():
@login_required @login_required
@login_user_no_api @login_user_no_api
def credentials_management_query_paste(): def credentials_management_query_paste():
cred = request.args.get('cred') cred = request.args.get('cred')
allPath = request.json['allPath'] allPath = request.json['allPath']
paste_info = [] paste_info = []
for pathNum in allPath: for pathNum in allPath:
path = r_serv_cred.hget(REDIS_KEY_ALL_PATH_SET_REV, pathNum) path = r_serv_cred.hget(REDIS_KEY_ALL_PATH_SET_REV, pathNum)
paste = Paste.Paste(path) item = Item(path)
p_date = str(paste._get_p_date()) p_date = item.get_date(separator=True)
p_date = p_date[0:4]+'/'+p_date[4:6]+'/'+p_date[6:8] p_source = item.get_source()
p_source = paste.p_source p_content = item.get_content()
p_encoding = paste._get_p_encoding() p_encoding = item.get_mimetype()
p_size = paste.p_size p_size = item.get_size()
p_mime = paste.p_mime p_mime = p_encoding
p_lineinfo = paste.get_lines_info() lineinfo = item.get_meta_lines(content=p_content)
p_content = paste.get_p_content() p_lineinfo = lineinfo['nb'], lineinfo['max_length']
if p_content != 0: if p_content != 0:
p_content = p_content[0:400] p_content = p_content[0:400]
paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content}) paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content})

View file

@ -51,7 +51,7 @@
<th>Total sent</th> <th>Total sent</th>
<th>Balance</th> <th>Balance</th>
<th>Inputs address seen in AIL</th> <th>Inputs address seen in AIL</th>
<th>Ouputs address seen in AIL</th> <th>Outputs address seen in AIL</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
@ -62,12 +62,12 @@
<td>{{ dict_object["metadata_card"]["related_btc"]["final_balance"] }}</td> <td>{{ dict_object["metadata_card"]["related_btc"]["final_balance"] }}</td>
<td> <td>
{% for btc_addr in dict_object["metadata_card"]["related_btc"]["btc_in"] %} {% for btc_addr in dict_object["metadata_card"]["related_btc"]["btc_in"] %}
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?object_type=cryptocurrency&type_id=bitcoin&correlation_id={{ btc_addr }}&correlation_objects=paste">{{ btc_addr }}</a> <a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type=cryptocurrency&subtype=bitcoin&id={{ btc_addr }}">{{ btc_addr }}</a>
{% endfor %} {% endfor %}
</td> </td>
<td> <td>
{% for btc_addr in dict_object["metadata_card"]["related_btc"]["btc_out"] %} {% for btc_addr in dict_object["metadata_card"]["related_btc"]["btc_out"] %}
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?object_type=cryptocurrency&type_id=bitcoin&correlation_id={{ btc_addr }}&correlation_objects=paste">{{ btc_addr }}</a> <a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type=cryptocurrency&subtype=bitcoin&id={{ btc_addr }}">{{ btc_addr }}</a>
{% endfor %} {% endfor %}
</td> </td>
</tr> </tr>
@ -75,7 +75,7 @@
</table> </table>
</div> </div>
{% else %} {% else %}
<a class="btn btn-secondary" href="{{ url_for('correlation.show_correlation') }}?object_type=cryptocurrency&type_id=bitcoin&correlation_id={{ dict_object['correlation_id'] }}&expand_card=True&correlation_objects=paste">Expand Bitcoin address</a> <a class="btn btn-secondary" href="{{ url_for('correlation.show_correlation') }}?type=cryptocurrency&subtype=bitcoin&id={{ dict_object['correlation_id'] }}&related_btc=True">Expand Bitcoin address</a>
{% endif %} {% endif %}
{% endif %} {% endif %}

View file

@ -0,0 +1,172 @@
<link href="{{ url_for('static', filename='css/tags.css') }}" rel="stylesheet" type="text/css" />
<script src="{{ url_for('static', filename='js/tags.js') }}"></script>
{#{% with modal_add_tags=dict_object['metadata_card']['add_tags_modal']%}#}
{# {% include 'modals/add_tags.html' %}#}
{#{% endwith %}#}
{% include 'modals/edit_tag.html' %}
<div class="card my-3">
<div class="card-header" style="background-color:#d9edf7;font-size: 15px">
<h4 class="text-secondary">{{ dict_object["correlation_id"] }} :</h4>
<ul class="list-group mb-2">
<li class="list-group-item py-0">
<div class="row">
<div class="col-md-10">
<table class="table">
<thead>
<tr>
<th>Object type</th>
<th>First seen</th>
<th>Last seen</th>
<th>Nb seen</th>
</tr>
</thead>
<tbody>
<tr>
<td>{{ dict_object["object_type"] }}</td>
<td>
<svg height="26" width="26">
<g class="nodes">
<circle cx="13" cy="13" r="13" fill="orange"></circle>
<text x="13" y="13" text-anchor="middle" dominant-baseline="central" class="graph_node_icon {{ dict_object["metadata_card"]["icon"]["icon_class"] }}" font-size="16px">{{ dict_object["metadata_card"]["icon"]["icon_text"] }}</text>
</g>
</svg>
</td>
<td>{{ dict_object["metadata"]['first_seen'] }}</td>
<td>{{ dict_object["metadata"]['last_seen'] }}</td>
<td>{{ dict_object["metadata"]['nb_seen'] }}</td>
</tr>
</tbody>
</table>
</div>
<div class="col-md-1">
<div id="sparkline"></div>
</div>
</div>
</li>
{# <li class="list-group-item py-0">#}
{# <br>#}
{# <div class="mb-3">#}
{# Tags:#}
{# {% for tag in dict_object["metadata"]['tags'] %}#}
{# <button class="btn btn-{{ bootstrap_label[loop.index0 % 5] }}" data-toggle="modal" data-target="#edit_tags_modal"#}
{# data-tagid="{{ tag }}" data-objtype="decoded" data-objid="{{ dict_object["correlation_id"] }}">#}
{# {{ tag }}#}
{# </button>#}
{# {% endfor %}#}
{# <button type="button" class="btn btn-light" data-toggle="modal" data-target="#add_tags_modal">#}
{# <i class="far fa-plus-square"></i>#}
{# </button>#}
{# </div>#}
{# </li>#}
</ul>
{% with obj_type='decoded', obj_id=dict_object['correlation_id'], obj_subtype='' %}
{% include 'modals/investigations_register_obj.html' %}
{% endwith %}
<button type="button" class="btn btn-primary" data-toggle="modal" data-target="#investigations_register_obj_modal">
<i class="fas fa-microscope"></i> Investigations
</button>
</div>
</div>
<script src="{{ url_for('static', filename='js/d3/sparklines.js')}}"></script>
<script>
sparkline("sparkline", {{ dict_object["metadata_card"]["sparkline"] }}, {});
</script>
<script>
function create_line_chart(id, url){
var width = 900;
var height = Math.round(width / 4);
var margin = {top: 20, right: 55, bottom: 50, left: 40};
var x = d3.scaleTime().range([0, width]);
var y = d3.scaleLinear().rangeRound([height, 0]);
var xAxis = d3.axisBottom(x);
var yAxis = d3.axisLeft(y);
var parseTime = d3.timeParse("%Y-%m-%d");
var line = d3.line()
.x(function(d) {
return x(d.date);
}).y(function(d) {
return y(d.value);
});
var svg_line = d3.select('#'+id).append('svg')
.attr("id", "graph_div")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append('g')
.attr('transform', "translate("+ margin.left +","+ margin.top +")");
var div = d3.select('body').append('div')
.attr('class', 'tooltip')
.style('opacity', 0);
//add div tooltip
d3.json(url)
.then(function(data){
data.forEach(function(d) {
d.date_label = d.date;
d.date = parseTime(d.date);
d.value = +d.value;
});
// fit the data
x.domain(d3.extent(data, function(d) { return d.date; }));
//x.domain(data.map(function (d) { return d.date; })); //E
y.domain([0, d3.max(data, function(d){ return d.value ; })]);
//line
svg_line.append("path")
.data([data])
.attr("class", "line_graph")
.attr("d", line);
// add X axis
svg_line.append("g")
.attr("transform", "translate(0," + height + ")")
.call(d3.axisBottom(x))
.selectAll("text")
.style("text-anchor", "end")
.attr("transform", "rotate(-45)" );
// Add the Y Axis
svg_line.append("g")
.call(d3.axisLeft(y));
//add a dot circle
svg_line.selectAll('dot')
.data(data).enter()
.append('circle')
.attr('r', 2)
.attr('cx', function(d) { return x(d.date); })
.attr('cy', function(d) { return y(d.value); })
.on('mouseover', function(d) {
div.transition().style('opacity', .9);
div.html('' + d.date_label+ '<br/>' + d.value).style('left', (d3.event.pageX) + 'px')
.style("left", (d3.event.pageX) + "px")
.style("top", (d3.event.pageY - 28) + "px");
})
.on('mouseout', function(d)
{
div.transition().style('opacity', 0);
});
});
}
</script>

View file

@ -39,7 +39,7 @@
</div> </div>
<div class="mb-2 float-right"> <div class="mb-2 float-right">
<a href="{{ url_for('objects_item.showItem')}}?id={{ dict_object["correlation_id"] }}" target="_blank" style="font-size: 15px"> <a href="{{ url_for('objects_item.showItem')}}?id={{ dict_object["correlation_id"] }}" target="_blank" style="font-size: 15px">
<button class="btn btn-info"><i class="fas fa-search"></i> Show Paste <button class="btn btn-info"><i class="fas fa-search"></i> Show Item
</button> </button>
</a> </a>
</div> </div>

View file

@ -99,12 +99,14 @@
{% include 'correlation/metadata_card_username.html' %} {% include 'correlation/metadata_card_username.html' %}
{% elif dict_object["object_type"] == "decoded" %} {% elif dict_object["object_type"] == "decoded" %}
{% include 'correlation/metadata_card_decoded.html' %} {% include 'correlation/metadata_card_decoded.html' %}
{% elif dict_object["object_type"] == "cve" %}
{% include 'correlation/metadata_card_cve.html' %}
{% elif dict_object["object_type"] == "domain" %} {% elif dict_object["object_type"] == "domain" %}
{% include 'correlation/metadata_card_domain.html' %} {% include 'correlation/metadata_card_domain.html' %}
{% elif dict_object["object_type"] == "screenshot" %} {% elif dict_object["object_type"] == "screenshot" %}
{% include 'correlation/metadata_card_screenshot.html' %} {% include 'correlation/metadata_card_screenshot.html' %}
{% elif dict_object["object_type"] == "paste" %} {% elif dict_object["object_type"] == "item" %}
{% include 'correlation/metadata_card_paste.html' %} {% include 'correlation/metadata_card_item.html' %}
{% endif %} {% endif %}
<div class="row"> <div class="row">
@ -146,82 +148,87 @@
<div class="card-body text-center px-0 py-0"> <div class="card-body text-center px-0 py-0">
<ul class="list-group"> <ul class="list-group">
<li class="list-group-item list-group-item-info">Select Correlation</i></li> <li class="list-group-item list-group-item-info">Select Correlation</li>
<li class="list-group-item text-left"> <form action="{{ url_for('correlation.show_correlation') }}" method="post">
<li class="list-group-item text-left">
<form action="{{ url_for('correlation.show_correlation') }}" method="post"> <input type="hidden" id="obj_type" name="obj_type" value="{{ dict_object["object_type"] }}">
<input type="hidden" id="object_type" name="object_type" value="{{ dict_object["object_type"] }}"> <input type="hidden" id="subtype" name="subtype" value="{{ dict_object["metadata"]["type_id"] }}">
<input type="hidden" id="type_id" name="type_id" value="{{ dict_object["metadata"]["type_id"] }}"> <input type="hidden" id="obj_id" name="obj_id" value="{{ dict_object["correlation_id"] }}">
<input type="hidden" id="correlation_id" name="correlation_id" value="{{ dict_object["correlation_id"] }}">
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="CryptocurrencyCheck" name="CryptocurrencyCheck" {%if "cryptocurrency" in dict_object["correlation_names"]%}checked{%endif%}>
<label class="form-check-label" for="CryptocurrencyCheck">Cryptocurrency</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="DecodedCheck" name="DecodedCheck" {%if "decoded" in dict_object["correlation_names"]%}checked{%endif%}>
<label class="form-check-label" for="DecodedCheck">Decoded</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="ScreenshotCheck" name="ScreenshotCheck" {%if "screenshot" in dict_object["correlation_names"]%}checked{%endif%}>
<label class="form-check-label" for="ScreenshotCheck">Screenshot</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="PgpCheck" name="PgpCheck" {%if "pgp" in dict_object["correlation_names"]%}checked{%endif%}>
<label class="form-check-label" for="PgpCheck">PGP</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="UsernameCheck" name="UsernameCheck" {%if "username" in dict_object["correlation_names"]%}checked{%endif%}>
<label class="form-check-label" for="UsernameCheck">Username</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="DomainCheck" name="DomainCheck" {%if "domain" in dict_object["correlation_objects"]%}checked{%endif%}>
<label class="form-check-label" for="DomainCheck">Domain</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="PasteCheck" name="PasteCheck" {%if "item" in dict_object["correlation_objects"]%}checked{%endif%}>
<label class="form-check-label" for="PasteCheck">Item</label>
</div>
</li> <div class="form-check">
<li class="list-group-item text-left"> <input class="form-check-input" type="checkbox" value="True" id="CveCheck" name="CveCheck" {%if "cve" in dict_object["filter"]%}checked{%endif%}>
<div class="d-flex mt-1"> <label class="form-check-label" for="CveCheck">Cve</label>
Union&nbsp;&nbsp; </div>
<div class="custom-control custom-switch"> <div class="form-check">
<input class="custom-control-input" type="checkbox" name="mode" value="True" id="mode" {%if dict_object["mode"]=="inter"%}checked{%endif%}> <input class="form-check-input" type="checkbox" value="True" id="CryptocurrencyCheck" name="CryptocurrencyCheck" {%if "cryptocurrency" in dict_object["filter"]%}checked{%endif%}>
<label class="custom-control-label" for="mode">Intersection</label> <label class="form-check-label" for="CryptocurrencyCheck">Cryptocurrency</label>
</div> </div>
</div> <div class="form-check">
</li> <input class="form-check-input" type="checkbox" value="True" id="DecodedCheck" name="DecodedCheck" {%if "decoded" in dict_object["filter"]%}checked{%endif%}>
<li class="list-group-item text-left"> <label class="form-check-label" for="DecodedCheck">Decoded</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="ScreenshotCheck" name="ScreenshotCheck" {%if "screenshot" in dict_object["filter"]%}checked{%endif%}>
<label class="form-check-label" for="ScreenshotCheck">Screenshot</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="PgpCheck" name="PgpCheck" {%if "pgp" in dict_object["filter"]%}checked{%endif%}>
<label class="form-check-label" for="PgpCheck">PGP</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="UsernameCheck" name="UsernameCheck" {%if "username" in dict_object["filter"]%}checked{%endif%}>
<label class="form-check-label" for="UsernameCheck">Username</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="DomainCheck" name="DomainCheck" {%if "domain" in dict_object["filter"]%}checked{%endif%}>
<label class="form-check-label" for="DomainCheck">Domain</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="ItemCheck" name="ItemCheck" {%if "item" in dict_object["filter"]%}checked{%endif%}>
<label class="form-check-label" for="ItemCheck">Item</label>
</div>
<div class="form-group"> </li>
<label for="max_nb_nodes_in">Max number of nodes:</label> <li class="list-group-item text-left">
<input class="form-control" type="number" value="{{dict_object["max_nodes"]}}" min="2" id="max_nb_nodes_in" name="max_nb_nodes_in"> <div class="d-flex mt-1">
</div> Union&nbsp;&nbsp;
<div class="custom-control custom-switch">
<input class="custom-control-input" type="checkbox" name="mode" value="True" id="mode" {%if dict_object["mode"]=="inter"%}checked{%endif%}>
<label class="custom-control-label" for="mode">Intersection</label>
</div>
</div>
</li>
<li class="list-group-item text-left">
<div class="text-center"> <div class="form-group">
<input class="btn btn-primary" type="submit" value="Redraw Graph"> <label for="max_nb_nodes_in">Max number of nodes:</label>
</div> <input class="form-control" type="number" value="{{dict_object["max_nodes"]}}" min="2" id="max_nb_nodes_in" name="max_nb_nodes_in">
</div>
</form> <div class="text-center">
<input class="btn btn-primary" type="submit" value="Redraw Graph">
</div>
</li>
</li>
</form>
</ul> </ul>
<ul class="list-group"> <ul class="list-group">
<li class="list-group-item list-group-item-info"><i class="fas fa-info-circle fa-2x"></i></li> <li class="list-group-item list-group-item-info"><i class="fas fa-info-circle fa-2x"></i></li>
<li class="list-group-item text-left"> <li class="list-group-item text-left">
<p>Double click on a node to open this object<br><br> <p>Double click on a node to open this object<br><br>
<svg height="26" width="26"> <svg height="26" width="26">
<g class="nodes"> <g class="nodes">
<circle cx="13" cy="13" r="13" fill="orange"></circle> <circle cx="13" cy="13" r="13" fill="orange"></circle>
</g> </g>
</svg> </svg>
Current Correlation<br> Current Correlation<br>
</p> </p>
</li> </li>
</ul> </ul>
</div> </div>
</div> </div>
@ -236,7 +243,7 @@
</div> </div>
</div> </div>
{% if dict_object["object_type"] in ["decoded", "pgp", "cryptocurrency"] %} {% if dict_object["object_type"] in ["cve", "decoded", "pgp", "cryptocurrency"] %}
<div class="card"> <div class="card">
<div class="card-header"> <div class="card-header">
<i class="fas fa-chart-bar"></i> Graph <i class="fas fa-chart-bar"></i> Graph
@ -257,14 +264,16 @@ var all_graph = {};
$(document).ready(function(){ $(document).ready(function(){
$("#page-Decoded").addClass("active"); $("#page-Decoded").addClass("active");
all_graph.node_graph = create_graph("{{ url_for('correlation.graph_node_json') }}?correlation_id={{ dict_object["correlation_id"] }}&object_type={{ dict_object["object_type"] }}&mode={{ dict_object["mode"] }}&correlation_names={{ dict_object["correlation_names_str"] }}&correlation_objects={{ dict_object["correlation_objects_str"] }}&max_nodes={{dict_object["max_nodes"]}}{% if 'type_id' in dict_object["metadata"] %}&type_id={{ dict_object["metadata"]["type_id"] }}{% endif %}"); all_graph.node_graph = create_graph("{{ url_for('correlation.graph_node_json') }}?id={{ dict_object["correlation_id"] }}&type={{ dict_object["object_type"] }}&mode={{ dict_object["mode"] }}&filter={{ dict_object["filter_str"] }}&max_nodes={{dict_object["max_nodes"]}}{% if 'type_id' in dict_object["metadata"] %}&subtype={{ dict_object["metadata"]["type_id"] }}{% endif %}");
{% if dict_object["object_type"] == "pgp" %} {% if dict_object["object_type"] == "pgp" %}
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.pgpdump_graph_line_json') }}?type_id={{dict_object["metadata"]["type_id"]}}&key_id={{dict_object["correlation_id"]}}"); all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.pgpdump_graph_line_json') }}?type_id={{dict_object["metadata"]["type_id"]}}&key_id={{dict_object["correlation_id"]}}");
{% elif dict_object["object_type"] == "cryptocurrency" %} {% elif dict_object["object_type"] == "cryptocurrency" %}
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.cryptocurrency_graph_line_json') }}?type_id={{dict_object["metadata"]["type_id"]}}&key_id={{dict_object["correlation_id"]}}"); all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.cryptocurrency_graph_line_json') }}?type_id={{dict_object["metadata"]["type_id"]}}&key_id={{dict_object["correlation_id"]}}");
{% elif dict_object["object_type"] == "decoded" %} {% elif dict_object["object_type"] == "decoded" %}
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.hash_graph_line_json') }}?hash={{dict_object["correlation_id"]}}"); all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.hash_graph_line_json') }}?hash={{dict_object["correlation_id"]}}");
{% endif %} {% elif dict_object["object_type"] == "cve" %}
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.hash_graph_line_json') }}?hash={{dict_object["correlation_id"]}}");
{% endif %}
all_graph.onResize(); all_graph.onResize();
}); });

View file

@ -37,7 +37,7 @@
<h5 class="card-title">Crawl a Domain</h5> <h5 class="card-title">Crawl a Domain</h5>
</div> </div>
<div class="card-body"> <div class="card-body">
<p class="card-text">Enter a domain and choose what kind of data you want.</p> <p class="card-text">Enter an url or a domain and choose what kind of option you want.</p>
<form action="{{ url_for('crawler_splash.send_to_spider') }}" method='post'> <form action="{{ url_for('crawler_splash.send_to_spider') }}" method='post'>
<div class="row"> <div class="row">
<div class="col-12 col-lg-6"> <div class="col-12 col-lg-6">
@ -49,22 +49,22 @@
<div class="custom-control custom-switch"> <div class="custom-control custom-switch">
<input class="custom-control-input" type="checkbox" name="queue_type_selector" value="True" id="queue_type_selector"> <input class="custom-control-input" type="checkbox" name="queue_type_selector" value="True" id="queue_type_selector">
<label class="custom-control-label" for="queue_type_selector"> <label class="custom-control-label" for="queue_type_selector">
<i class="fas fa-splotch"></i> &nbsp;Splash Name <i class="fas fa-splotch"></i> &nbsp;Proxy
</label> </label>
</div> </div>
</div> </div>
<div id="div_crawler_queue_type"> <div id="div_crawler_queue_type">
<select class="custom-select form-control" name="crawler_queue_type" id="crawler_queue_type"> <select class="custom-select form-control" name="crawler_queue_type" id="crawler_queue_type">
{%for crawler_type in all_crawlers_types%} {%for crawler_type in crawlers_types%}
<option value="{{crawler_type}}" {%if crawler_type=='tor'%}selected{%endif%}>{{crawler_type}}</option> <option value="{{crawler_type}}" {%if crawler_type=='tor'%}selected{%endif%}>{{crawler_type}}</option>
{%endfor%} {%endfor%}
</select> </select>
</div> </div>
<div id="div_splash_name"> <div id="div_proxy_name">
<select class="custom-select form-control" name="splash_name" id="splash_name"> <select class="custom-select form-control" name="proxy_name" id="proxy_name">
<option value="None" selected>Don't use a special splash crawler</option> <option value="None" selected>Use a proxy</option>
{%for splash_name in all_splash_name%} {%for proxy in proxies%}
<option value="{{splash_name}}">{{splash_name}}</option> <option value="{{proxy}}">{{proxy}}</option>
{%endfor%} {%endfor%}
</select> </select>
</div> </div>
@ -122,15 +122,16 @@
<span class="input-group-text">Depth Limit</span> <span class="input-group-text">Depth Limit</span>
</div> </div>
</div> </div>
<div class="input-group mt-2"> {# TEMPORARY DISABLED #}
<div class="input-group-prepend"> {# <div class="input-group mt-2">#}
<span class="input-group-text bg-light"><i class="fas fa-copy"></i>&nbsp;</span> {# <div class="input-group-prepend">#}
</div> {# <span class="input-group-text bg-light"><i class="fas fa-copy"></i>&nbsp;</span>#}
<input class="form-control" type="number" id="max_pages" name="max_pages" min="1" value="1" required> {# </div>#}
<div class="input-group-append"> {# <input class="form-control" type="number" id="max_pages" name="max_pages" min="1" value="1" required>#}
<span class="input-group-text">Max Pages</span> {# <div class="input-group-append">#}
</div> {# <span class="input-group-text">Max Pages</span>#}
</div> {# </div>#}
{# </div>#}
</div> </div>
</div> </div>
@ -204,10 +205,10 @@ function manual_crawler_input_controler() {
function queue_type_selector_input_controler() { function queue_type_selector_input_controler() {
if($('#queue_type_selector').is(':checked')){ if($('#queue_type_selector').is(':checked')){
$("#div_crawler_queue_type").hide(); $("#div_crawler_queue_type").hide();
$("#div_splash_name").show(); $("#div_proxy_name").show();
}else{ }else{
$("#div_crawler_queue_type").show(); $("#div_crawler_queue_type").show();
$("#div_splash_name").hide(); $("#div_proxy_name").hide();
} }
} }

View file

@ -7,10 +7,13 @@
<!-- Core CSS --> <!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet"> <link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet"> <link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
<!-- JS --> <!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script> <script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script> <script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
</head> </head>
@ -33,15 +36,15 @@
<div class="card mt-1 mb-1"> <div class="card mt-1 mb-1">
<div class="card-header text-white bg-dark"> <div class="card-header text-white bg-dark">
<h5><a class="text-info" href="{{ url_for('hiddenServices.Crawler_Splash_last_by_type')}}?type=onion"><i class="fas fa-user-secret"></i> Onions Crawlers</a></h5> <h5><a class="text-info" href="{{ url_for('crawler_splash.crawlers_last_domains')}}?type=onion"><i class="fas fa-user-secret"></i> Onions Crawlers</a></h5>
<div class="row"> <div class="row">
<div class="col-6"> <div class="col-6">
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=onion&domains_up=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_onion_domain_up">{{ splash_crawlers_latest_stats['onion']['domains_up'] }}</a> UP <a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=onion&domains_up=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_onion_domain_up">{{ crawlers_latest_stats['onion']['up'] }}</a> UP
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=onion&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-danger ml-md-3" id="stat_onion_domain_down">{{ splash_crawlers_latest_stats['onion']['domains_down'] }}</a> DOWN <a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=onion&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-danger ml-md-3" id="stat_onion_domain_down">{{ crawlers_latest_stats['onion']['down'] }}</a> DOWN
</div> </div>
<div class="col-6"> <div class="col-6">
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=onion&domains_up=True&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_onion_total">{{ splash_crawlers_latest_stats['onion']['total'] }}</a> Crawled <a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=onion&domains_up=True&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_onion_total">{{ crawlers_latest_stats['onion']['crawled'] }}</a> Crawled
<span class="badge badge-warning ml-md-3" id="stat_onion_queue">{{ splash_crawlers_latest_stats['onion']['domains_queue'] }}</span> Queue <span class="badge badge-warning ml-md-3" id="stat_onion_queue">{{ crawlers_latest_stats['onion']['queue'] }}</span> Queue
</div> </div>
</div> </div>
</div> </div>
@ -51,15 +54,15 @@
<div class="col-xl-6"> <div class="col-xl-6">
<div class="card mt-1 mb-1"> <div class="card mt-1 mb-1">
<div class="card-header text-white bg-dark"> <div class="card-header text-white bg-dark">
<h5><a class="text-info" href="{{ url_for('hiddenServices.Crawler_Splash_last_by_type')}}?type=regular"><i class="fab fa-html5"></i> Regular Crawlers</a></h5> <h5><a class="text-info" href="{{ url_for('crawler_splash.crawlers_last_domains')}}?type=web"><i class="fab fa-html5"></i> Web Crawlers</a></h5>
<div class="row"> <div class="row">
<div class="col-6"> <div class="col-6">
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=regular&domains_up=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_regular_domain_up">{{ splash_crawlers_latest_stats['regular']['domains_up'] }}</a> UP <a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=web&domains_up=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_web_domain_up">{{ crawlers_latest_stats['web']['up'] }}</a> UP
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=regular&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-danger ml-md-3" id="stat_regular_domain_down">{{ splash_crawlers_latest_stats['regular']['domains_down'] }}</a> DOWN <a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=web&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-danger ml-md-3" id="stat_web_domain_down">{{ crawlers_latest_stats['web']['down'] }}</a> DOWN
</div> </div>
<div class="col-6"> <div class="col-6">
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=regular&domains_up=True&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_regular_total">{{ splash_crawlers_latest_stats['regular']['total'] }}</a> Crawled <a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=web&domains_up=True&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_web_total">{{ crawlers_latest_stats['web']['crawled'] }}</a> Crawled
<span class="badge badge-warning ml-md-3" id="stat_regular_queue">{{ splash_crawlers_latest_stats['regular']['domains_queue'] }}</span> Queue <span class="badge badge-warning ml-md-3" id="stat_web_queue">{{ crawlers_latest_stats['web']['queue'] }}</span> Queue
</div> </div>
</div> </div>
</div> </div>
@ -69,23 +72,23 @@
<table class="table"> <table class="table">
<tbody id="tbody_crawler_onion_info"> <tbody id="tbody_crawler_onion_info">
{% for splash_crawler in all_splash_crawler_status %} {% for crawler in crawlers_status %}
<tr> <tr>
<td> <td>
<i class="fas fa-{%if splash_crawler['status']%}check{%else%}times{%endif%}-circle" style="color:{%if splash_crawler['status']%}Green{%else%}Red{%endif%};"></i> {{splash_crawler['crawler_info']}} <i class="fas fa-{%if crawler['status']%}check{%else%}times{%endif%}-circle" style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};"></i> {{crawler['start_time']}}
</td> </td>
<td> <td>
{%if splash_crawler['type']=='onion'%} {%if crawler['type']=='onion'%}
<i class="fas fa-user-secret"></i> <i class="fas fa-user-secret"></i>
{%else%} {%else%}
<i class="fab fa-html5"> <i class="fab fa-html5">
{%endif%} {%endif%}
</td> </td>
<td> <td>
{{splash_crawler['crawling_domain']}} {{crawler['domain']}}
</td> </td>
<td style="color:{%if splash_crawler['status']%}Green{%else%}Red{%endif%};"> <td style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};">
{{splash_crawler['status_info']}} {{crawler['status']}}
</td> </td>
</tr> </tr>
{% endfor %} {% endfor %}
@ -93,6 +96,9 @@
</table> </table>
{% include 'domains/block_domains_name_search.html' %} {% include 'domains/block_domains_name_search.html' %}
<div class="d-flex justify-content-center my-4">
{% include 'crawler/show_domains_by_daterange.html' %}
</div>
<hr> <hr>
<div class="row mb-3"> <div class="row mb-3">
@ -134,6 +140,32 @@ var to_refresh = false
$(document).ready(function(){ $(document).ready(function(){
$("#page-Crawler").addClass("active"); $("#page-Crawler").addClass("active");
$("#nav_dashboard").addClass("active"); $("#nav_dashboard").addClass("active");
$('#date-range-from').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
$('#date-range-to').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
$( window ).on("focus", function() { $( window ).on("focus", function() {
to_refresh = true to_refresh = true
refresh_crawler_status(); refresh_crawler_status();
@ -144,6 +176,7 @@ $(document).ready(function(){
to_refresh = true to_refresh = true
refresh_crawler_status(); refresh_crawler_status();
}); });
function toggle_sidebar(){ function toggle_sidebar(){
@ -165,21 +198,21 @@ function refresh_crawler_status(){
$.getJSON("{{ url_for('crawler_splash.crawler_dashboard_json') }}", $.getJSON("{{ url_for('crawler_splash.crawler_dashboard_json') }}",
function(data) { function(data) {
$('#stat_onion_domain_up').text(data.splash_crawlers_latest_stats['onion']['domains_up']); $('#stat_onion_domain_up').text(data.stats['onion']['up']);
$('#stat_onion_domain_down').text(data.splash_crawlers_latest_stats['onion']['domains_down']); $('#stat_onion_domain_down').text(data.stats['onion']['down']);
$('#stat_onion_total').text(data.splash_crawlers_latest_stats['onion']['total']); $('#stat_onion_total').text(data.stats['onion']['crawled']);
$('#stat_onion_queue').text(data.splash_crawlers_latest_stats['onion']['domains_queue']); $('#stat_onion_queue').text(data.stats['onion']['queue']);
$('#stat_regular_domain_up').text(data.splash_crawlers_latest_stats['regular']['domains_up']); $('#stat_web_domain_up').text(data.stats['web']['up']);
$('#stat_regular_domain_down').text(data.splash_crawlers_latest_stats['regular']['domains_down']); $('#stat_web_domain_down').text(data.stats['web']['down']);
$('#stat_regular_total').text(data.splash_crawlers_latest_stats['regular']['total']); $('#stat_web_total').text(data.stats['web']['crawled']);
$('#stat_regular_queue').text(data.splash_crawlers_latest_stats['regular']['domains_queue']); $('#stat_web_queue').text(data.stats['web']['queue']);
if(data.all_splash_crawler_status.length!=0){ if(data.crawlers_status.length!=0){
$("#tbody_crawler_onion_info").empty(); $("#tbody_crawler_onion_info").empty();
var tableRef = document.getElementById('tbody_crawler_onion_info'); var tableRef = document.getElementById('tbody_crawler_onion_info');
for (var i = 0; i < data.all_splash_crawler_status.length; i++) { for (var i = 0; i < data.crawlers_status.length; i++) {
var crawler = data.all_splash_crawler_status[i]; var crawler = data.crawlers_status[i];
var newRow = tableRef.insertRow(tableRef.rows.length); var newRow = tableRef.insertRow(tableRef.rows.length);
var text_color; var text_color;
var icon; var icon;
@ -198,16 +231,16 @@ function refresh_crawler_status(){
} }
var newCell = newRow.insertCell(0); var newCell = newRow.insertCell(0);
newCell.innerHTML = "<td><i class=\"fas fa-"+icon+"-circle\" style=\"color:"+text_color+";\"></i> "+crawler['crawler_info']+"</td>"; newCell.innerHTML = "<td><i class=\"fas fa-"+icon+"-circle\" style=\"color:"+text_color+";\"></i> "+crawler['start_time']+"</td>";
var newCell = newRow.insertCell(1); var newCell = newRow.insertCell(1);
newCell.innerHTML = "<td><i class=\""+icon_t+"\"></i></td>"; newCell.innerHTML = "<td><i class=\""+icon_t+"\"></i></td>";
newCell = newRow.insertCell(2); newCell = newRow.insertCell(2);
newCell.innerHTML = "<td>"+crawler['crawling_domain']+"</td>"; newCell.innerHTML = "<td>"+crawler['domain']+"</td>";
newCell = newRow.insertCell(3); newCell = newRow.insertCell(3);
newCell.innerHTML = "<td><div style=\"color:"+text_color+";\">"+crawler['status_info']+"</div></td>"; newCell.innerHTML = "<td><div style=\"color:"+text_color+";\">"+crawler['status']+"</div></td>";
//$("#panel_crawler").show(); //$("#panel_crawler").show();
} }

View file

@ -0,0 +1,154 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>AIL-Framework</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
{% include 'crawler/show_domains_by_daterange.html' %}
{% for date in dict_domains %}
<div class="card text-center mt-1 mb-3">
<div class="card-header bg-dark text-white">
<h3 style="text-align:center;">{{'{}/{}/{}'.format(date[0:4], date[4:6], date[6:8])}}</h3>
</div>
<div class="card-body px-1">
<table id="table_{{date}}" class="table table-striped table-bordered">
<thead class="bg-dark text-white">
<tr>
<th>Domain</th>
<th>First Seen</th>
<th>Last Check</th>
<th>Status</th>
</tr>
</thead>
<tbody>
{% for dict_domain in dict_domains[date] %}
<tr>
<td>
<a target="_blank" href="{{ url_for('crawler_splash.showDomain') }}?domain={{ dict_domain['domain'] }}">{{ dict_domain['domain'] }}</a>
<div>
{% for tag in dict_domain['tags'] %}
<a href="{{ url_for('tags_ui.get_obj_by_tags') }}?object_type=domain&ltags={{ tag }}">
<span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }}">{{ tag }}</span>
</a>
{% endfor %}
</div>
</td>
<td>{{dict_domain['first_seen']}}</td>
<td>{{dict_domain['last_check']}}</td>
<td>
{% if dict_domain['status'] %}
<div style="color:Green; display:inline-block">
<i class="fas fa-check-circle"></i> UP
</div>
{% else %}
<div style="color:Red; display:inline-block">
<i class="fas fa-times-circle"></i> DOWN
</div>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
{% endfor %}
</div>
</div>
</div>
</body>
<script>
$(document).ready(function(){
$("#page-Crawler").addClass("active");
$('#date-range-from').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
$('#date-range-to').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
{% for date in dict_domains %}
$('#table_{{date}}').DataTable({
"aLengthMenu": [[5, 15, 30, -1], [5, 15, 30, "All"]],
"iDisplayLength": 15,
"order": [[ 0, "asc" ]]
});
{% endfor %}
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
</script>
</html>

View file

@ -0,0 +1,338 @@
<!DOCTYPE html>
<html>
<head>
<title>AIL-Framework</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
<style>
.bar {
fill: steelblue;
}
.bar:hover{
fill: brown;
cursor: pointer;
}
.bar_stack:hover{
cursor: pointer;
}
.popover{
max-width: 100%;
}
.domain_name {
display:inline-block;
overflow: hidden;
white-space: nowrap;
text-overflow: ellipsis;
max-width: 400px;
}
</style>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
{% include 'crawler/crawler_disabled.html' %}
<div class="row">
<div class="col-12 col-xl-6">
<div class="table-responsive mt-1 table-hover table-borderless table-striped">
<table class="table">
<thead class="thead-dark">
<tr>
<th>Domain</th>
<th>First Seen</th>
<th>Last Check</th>
<th>Status</th>
</tr>
</thead>
<tbody id="tbody_last_crawled">
{% for domain in domains %}
<tr data-toggle="popover" data-trigger="hover"
title="<span class='badge badge-dark'>{{domain['domain']}}</span>"
data-content="epoch: {{domain['epoch']}}<br>last status: {{ domain['status'] }}">
<td><a target="_blank" class="domain_name" href="{{ url_for('crawler_splash.showDomain') }}?domain={{ domain['domain'] }}&epoch={{domain['epoch']}}">{{ domain['domain'] }}</a></td>
<td>{{domain['first_seen']}}</td>
<td>{{domain['last_check']}}</td>
<td>
{% if domain['status_epoch'] %}
<div style="color:Green; display:inline-block">
<i class="fas fa-check-circle"></i> UP
</div>
{% else %}
<div style="color:Red; display:inline-block">
<i class="fas fa-times-circle"></i> DOWN
</div>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<a href="{{ url_for('hiddenServices.blacklisted_domains') }}?type={{type}}">
<button type="button" class="btn btn-outline-danger">Show Blacklisted {{type_name}}s</button>
</a>
</div>
<div class="col-12 col-xl-6">
{% include 'crawler/show_domains_by_daterange.html' %}
<div id="barchart_type"></div>
</div>
</div>
</div>
</div>
</div>
</body>
<script>
var chart = {};
$(document).ready(function(){
$("#page-Crawler").addClass("active");
$("#nav_{{type}}_crawler").addClass("active");
$('#date-range-from').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
$('#date-range-to').dateRangePicker({
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
else
return '';
},
setValue: function(s,s1,s2){
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
}
});
chart.stackBarChart =barchart_type_stack("{{ url_for('crawler_splash.crawlers_last_domains_json') }}?type={{type}}", 'id');
chart.onResize();
$(window).on("resize", function() {
chart.onResize();
});
$('[data-toggle="popover"]').popover({
placement: 'top',
container: 'body',
html : true,
});
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
</script>
<script>
var margin = {top: 20, right: 90, bottom: 55, left: 0},
width = parseInt(d3.select('#barchart_type').style('width'), 10);
width = 1000 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var x = d3.scaleBand().rangeRound([0, width]).padding(0.1);
var y = d3.scaleLinear().rangeRound([height, 0]);
var xAxis = d3.axisBottom(x);
var yAxis = d3.axisLeft(y);
var color = d3.scaleOrdinal(d3.schemeSet3);
var svg = d3.select("#barchart_type").append("svg")
.attr("id", "thesvg")
.attr("viewBox", "0 0 "+width+" 500")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
function barchart_type_stack(url, id) {
d3.json(url)
.then(function(data){
var labelVar = 'date'; //A
var varNames = d3.keys(data[0])
.filter(function (key) { return key !== labelVar;}); //B
data.forEach(function (d) { //D
var y0 = 0;
d.mapping = varNames.map(function (name) {
return {
name: name,
label: d[labelVar],
y0: y0,
y1: y0 += +d[name]
};
});
d.total = d.mapping[d.mapping.length - 1].y1;
});
x.domain(data.map(function (d) { return (d.date); })); //E
y.domain([0, d3.max(data, function (d) { return d.total; })]);
svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis)
.selectAll("text")
.attr("class", "bar")
.on("click", function (d) { window.location.href = "#" })
.attr("transform", "rotate(-18)" )
//.attr("transform", "rotate(-40)" )
.style("text-anchor", "end");
svg.append("g")
.attr("class", "y axis")
.call(yAxis)
.append("text")
.attr("transform", "rotate(-90)")
.attr("y", 6)
.attr("dy", ".71em")
.style("text-anchor", "end");
var selection = svg.selectAll(".series")
.data(data)
.enter().append("g")
.attr("class", "series")
.attr("transform", function (d) { return "translate(" + x((d.date)) + ",0)"; });
selection.selectAll("rect")
.data(function (d) { return d.mapping; })
.enter().append("rect")
.attr("class", "bar_stack")
.attr("width", x.bandwidth())
.attr("y", function (d) { return y(d.y1); })
.attr("height", function (d) { return y(d.y0) - y(d.y1); })
.style("fill", function (d) { return color(d.name); })
.style("stroke", "grey")
.on("mouseover", function (d) { showPopover.call(this, d); })
.on("mouseout", function (d) { removePopovers(); })
.on("click", function(d){ window.location.href = "#" });
data.forEach(function(d) {
if(d.total != 0){
svg.append("text")
.attr("class", "bar")
.attr("dy", "-.35em")
.attr('x', x(d.date) + x.bandwidth()/2)
.attr('y', y(d.total))
.on("click", function () {window.location.href = "#" })
.style("text-anchor", "middle")
.text(d.total);
}
});
drawLegend(varNames);
});
}
function drawLegend (varNames) {
var legend = svg.selectAll(".legend")
.data(varNames.slice().reverse())
.enter().append("g")
.attr("class", "legend")
.attr("transform", function (d, i) { return "translate(0," + i * 20 + ")"; });
legend.append("rect")
.attr("x", 943)
.attr("width", 10)
.attr("height", 10)
.style("fill", color)
.style("stroke", "grey");
legend.append("text")
.attr("class", "svgText")
.attr("x", 941)
.attr("y", 6)
.attr("dy", ".35em")
.style("text-anchor", "end")
.text(function (d) { return d; });
}
function removePopovers () {
$('.popover').each(function() {
$(this).remove();
});
}
function showPopover (d) {
$(this).popover({
title: d.name,
placement: 'top',
container: 'body',
trigger: 'manual',
html : true,
content: function() {
return d.label +
"<br/>num: " + d3.format(",")(d.value ? d.value: d.y1 - d.y0); }
});
$(this).popover('show')
}
chart.onResize = function () {
var aspect = width / height, chart = $("#thesvg");
var targetWidth = chart.parent().width();
chart.attr("width", targetWidth);
chart.attr("height", targetWidth / 2);
}
window.chart = chart;
</script>

View file

@ -68,17 +68,17 @@
<table class="table table-sm"> <table class="table table-sm">
<tbody> <tbody>
<tr> <tr>
<td>Splash Manager URL</td> <td>Lacus URL</td>
<td>{{splash_manager_url}}</td> <td>{{lacus_url}}</td>
</tr> </tr>
<tr> <tr>
<td>API Key</td> {# <td>API Key</td>#}
{# <td>#}
{# {{api_key}}#}
{# <!-- <a class="ml-3" href="/settings/new_token"><i class="fa fa-random"></i></a> -->#}
{# </td>#}
<td> <td>
{{api_key}} <a href="{{ url_for('crawler_splash.crawler_lacus_settings_crawler_manager') }}">
<!-- <a class="ml-3" href="/settings/new_token"><i class="fa fa-random"></i></a> -->
</td>
<td>
<a href="{{ url_for('crawler_splash.crawler_splash_setings_crawler_manager') }}">
<button type="button" class="btn btn-info"> <button type="button" class="btn btn-info">
Edit <i class="fas fa-pencil-alt"></i> Edit <i class="fas fa-pencil-alt"></i>
</button> </button>
@ -92,126 +92,52 @@
</div> </div>
</div> </div>
<div {%if not is_manager_connected%}class="hidden"{%endif%}> <div class="card border-secondary">
<div class="card-body text-dark">
<div class="card border-secondary mb-4"> <h5 class="card-title">All Proxies:</h5>
<div class="card-body text-dark"> <table class="table table-striped">
<h5 class="card-title">All Splash Crawlers:</h5> <thead class="bg-info text-white">
<table class="table table-striped"> <tr>
<thead class="bg-info text-white"> <th>Proxy name</th>
<th> <th>URL</th>
Splash name <th>Crawler Type</th>
</th> <th>Description</th>
<th> <th></th>
Proxy </tr>
</th> </thead>
<th> <tbody>
Crawler type {% for proxy_name in all_proxies %}
</th> <tr>
<th> <td>
Description {{proxy_name}}
</th> </td>
<th></th> <td>
</thead> {{all_proxies[proxy_name]['url']}}
<tbody> </td>
{% for splash_name in all_splash %} <td>
<tr> {%if all_proxies[proxy_name]['crawler_type']=='tor'%}
<td> <i class="fas fa-user-secret"></i>
{{splash_name}} {%else%}
</td> <i class="fab fa-html5"></i>
<td> {%endif%}
{{all_splash[splash_name]['proxy']}} {{all_proxies[proxy_name]['crawler_type']}}
</td> </td>
<td> <td>
{%if all_splash[splash_name]['type']=='tor'%} {{all_proxies[proxy_name]['description']}}
<i class="fas fa-user-secret"></i> </td>
{%else%} <td>
<i class="fab fa-html5"> <div class="d-flex justify-content-end">
{%endif%} <!-- <button class="btn btn-outline-dark px-1 py-0">
{{all_splash[splash_name]['type']}} <i class="fas fa-pencil-alt"></i>
</td> </button> -->
<td> </div>
{{all_splash[splash_name]['description']}} </td>
</td> </tr>
<td> {% endfor %}
<div class="d-flex justify-content-end"> </tbody>
<!-- <button class="btn btn-outline-dark px-1 py-0"> </table>
<i class="fas fa-pencil-alt"></i> </div>
</button> --> </div>
</div>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
<div class="card border-secondary">
<div class="card-body text-dark">
<h5 class="card-title">All Proxies:</h5>
<table class="table table-striped">
<thead class="bg-info text-white">
<th>
Proxy name
</th>
<th>
Host
</th>
<th>
Port
</th>
<th>
Type
</th>
<th>
Crawler Type
</th>
<th>
Description
</th>
<th></th>
</thead>
<tbody>
{% for proxy_name in all_proxies %}
<tr>
<td>
{{proxy_name}}
</td>
<td>
{{all_proxies[proxy_name]['host']}}
</td>
<td>
{{all_proxies[proxy_name]['port']}}
</td>
<td>
{{all_proxies[proxy_name]['type']}}
</td>
<td>
{%if all_proxies[proxy_name]['crawler_type']=='tor'%}
<i class="fas fa-user-secret"></i>
{%else%}
<i class="fab fa-html5">
{%endif%}
{{all_proxies[proxy_name]['crawler_type']}}
</td>
<td>
{{all_proxies[proxy_name]['description']}}
</td>
<td>
<div class="d-flex justify-content-end">
<!-- <button class="btn btn-outline-dark px-1 py-0">
<i class="fas fa-pencil-alt"></i>
</button> -->
</div>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
</div> </div>
</div> </div>
@ -242,7 +168,7 @@
{{crawler_error_mess}} {{crawler_error_mess}}
</pre> </pre>
<a href="{{ url_for('crawler_splash.crawler_splash_setings_test_crawler') }}"> <a href="{{ url_for('crawler_splash.crawler_settings_crawler_test') }}">
<button type="button" class="btn btn-primary"> <button type="button" class="btn btn-primary">
ReRun Test <i class="fas fa-rocket"></i> ReRun Test <i class="fas fa-rocket"></i>
</button> </button>

View file

@ -0,0 +1,61 @@
<!DOCTYPE html>
<html>
<head>
<title>AIL-Framework</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="card my-2">
<div class="card-header bg-dark text-white">
<h3 class="card-title"> Lacus Config:</h3>
<form action="{{ url_for('crawler_splash.crawler_lacus_settings_crawler_manager') }}" method="post" enctype="multipart/form-data">
<div class="form-group">
<label for="splash_manager_url">Lacus Server URL</label>
<input type="text" class="form-control" id="splash_manager_url" required placeholder="https://lacus_url" name="lacus_url" {%if lacus_url%}value="{{lacus_url}}"{%endif%}>
</div>
{# <div class="form-group">#}
{# <label for="api_key">API Key</label>#}
{# <input type="text" class="form-control" id="api_key" placeholder="API Key" name="api_key" {%if api_key%}value="{{api_key}}"{%endif%}>#}
{# </div>#}
<button type="submit" class="btn btn-primary">Edit Lacus <i class="fas fa-pencil-alt"></i></button>
</form>
</div>
</div>
</div>
</div>
</div>
</body>
<script>
var to_refresh = false
$(document).ready(function(){
$("#page-Crawler").addClass("active");
$("#nav_settings").addClass("active");
});
</script>

View file

@ -1,55 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>AIL-Framework</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<form action="{{ url_for('crawler_splash.crawler_splash_setings_crawler_manager') }}" method="post" enctype="multipart/form-data">
<div class="form-group">
<label for="splash_manager_url">Splash Manager URL</label>
<input type="text" class="form-control" id="splash_manager_url" placeholder="https://splash_manager_url" name="splash_manager_url" {%if splash_manager_url%}value="{{splash_manager_url}}"{%endif%}>
</div>
<div class="form-group">
<label for="api_key">API Key</label>
<input type="text" class="form-control" id="api_key" placeholder="API Key" name="api_key" {%if api_key%}value="{{api_key}}"{%endif%}>
</div>
<button type="submit" class="btn btn-primary">Edit <i class="fas fa-pencil-alt"></i></button>
</form>
</div>
</div>
</div>
</body>
<script>
var to_refresh = false
$(document).ready(function(){
$("#page-Crawler").addClass("active");
$("#nav_settings").addClass("active");
});
</script>

Some files were not shown because too many files have changed in this diff Show more