chg: [merge] merge master into update branche

This commit is contained in:
Terrtia 2019-04-10 15:43:15 +02:00
commit 2589fc2161
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
28 changed files with 1453 additions and 165 deletions

3
.gitignore vendored
View file

@ -36,6 +36,9 @@ bin/packages/config.cfg.backup
configs/keys configs/keys
files files
# Pystemon archives
pystemon/archives
# installed files # installed files
nltk_data/ nltk_data/
doc/all_modules.txt doc/all_modules.txt

View file

@ -27,8 +27,18 @@ WORKDIR /opt/AIL
# Default to UTF-8 file.encoding # Default to UTF-8 file.encoding
ENV LANG C.UTF-8 ENV LANG C.UTF-8
ENV AIL_HOME /opt/AIL
ENV AIL_BIN ${AIL_HOME}/bin
ENV AIL_FLASK ${AIL_HOME}/var/www
ENV AIL_REDIS ${AIL_HOME}/redis/src
ENV AIL_ARDB ${AIL_HOME}/ardb/src
ENV AIL_VENV ${AIL_HOME}/AILENV
ENV PATH ${AIL_VENV}/bin:${AIL_HOME}:${AIL_REDIS}:${AIL_ARDB}:${AIL_BIN}:${AIL_FLASK}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
RUN ./pystemon/install.sh RUN ./pystemon/install.sh
RUN pip install -r /opt/pystemon/requirements.txt
RUN pip install -r /opt/AIL/crawler_requirements.txt
COPY docker_start.sh /docker_start.sh COPY docker_start.sh /docker_start.sh
ENTRYPOINT ["/bin/bash", "docker_start.sh"] ENTRYPOINT ["/bin/bash", "docker_start.sh"]

View file

@ -101,7 +101,6 @@ ARDB_DB
ZADD - 'base64_hash:'+hash paste * nb_seen_in_paste ZADD - 'base64_hash:'+hash paste * nb_seen_in_paste
ZADD - 'binary_hash:'+hash paste * nb_seen_in_paste ZADD - 'binary_hash:'+hash paste * nb_seen_in_paste
ZADD - 'hash_type:'+type date nb_seen
ZADD - 'base64_type:'+type date nb_seen ZADD - 'base64_type:'+type date nb_seen
ZADD - 'binary_type:'+type date nb_seen ZADD - 'binary_type:'+type date nb_seen

View file

@ -12,7 +12,7 @@ AIL is a modular framework to analyse potential information leaks from unstructu
<table> <table>
<tr> <tr>
<td>Latest Release</td> <td>Latest Release</td>
<td><a href="https://badge.fury.io/gh/CIRCL%2FAIL-Framework"><img src="https://badge.fury.io/gh/CIRCL%2FAIL-Framework.svg" alt="GitHub version" height="18"></a></td> <td><a href="https://github.com/CIRCL/AIL-framework/releases/latest"><img src="https://img.shields.io/github/release/CIRCL/AIL-framework/all.svg"></a></td>
</tr> </tr>
<tr> <tr>
<td>Contributors</td> <td>Contributors</td>
@ -168,6 +168,22 @@ Privacy and GDPR
[AIL information leaks analysis and the GDPR in the context of collection, analysis and sharing information leaks](https://www.circl.lu/assets/files/information-leaks-analysis-and-gdpr.pdf) document provides an overview how to use AIL in a lawfulness context especially in the scope of General Data Protection Regulation. [AIL information leaks analysis and the GDPR in the context of collection, analysis and sharing information leaks](https://www.circl.lu/assets/files/information-leaks-analysis-and-gdpr.pdf) document provides an overview how to use AIL in a lawfulness context especially in the scope of General Data Protection Regulation.
Research using AIL
------------------
If you write academic paper, relying or using AIL, it can be cited with the following BibTeX:
~~~~
@inproceedings{mokaddem2018ail,
title={AIL-The design and implementation of an Analysis Information Leak framework},
author={Mokaddem, Sami and Wagener, G{\'e}rard and Dulaunoy, Alexandre},
booktitle={2018 IEEE International Conference on Big Data (Big Data)},
pages={5049--5057},
year={2018},
organization={IEEE}
}
~~~~
Screenshots Screenshots
=========== ===========
@ -237,11 +253,11 @@ License
``` ```
Copyright (C) 2014 Jules Debra Copyright (C) 2014 Jules Debra
Copyright (C) 2014-2018 CIRCL - Computer Incident Response Center Luxembourg (c/o smile, security made in Lëtzebuerg, Groupement d'Intérêt Economique) Copyright (C) 2014-2019 CIRCL - Computer Incident Response Center Luxembourg (c/o smile, security made in Lëtzebuerg, Groupement d'Intérêt Economique)
Copyright (c) 2014-2018 Raphaël Vinot Copyright (c) 2014-2019 Raphaël Vinot
Copyright (c) 2014-2018 Alexandre Dulaunoy Copyright (c) 2014-2019 Alexandre Dulaunoy
Copyright (c) 2016-2018 Sami Mokaddem Copyright (c) 2016-2019 Sami Mokaddem
Copyright (c) 2018 Thirion Aurélien Copyright (c) 2018-2019 Thirion Aurélien
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by it under the terms of the GNU Affero General Public License as published by

83
bin/CVE_check.py Executable file
View file

@ -0,0 +1,83 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
from packages import Paste
from Helper import Process
import os
import re
import time
import redis
import configparser
from collections import defaultdict
def get_dict_cve(list_paste_cve, only_one_same_cve_by_paste=False):
dict_keyword = {}
for paste_cve in list_paste_cve:
paste_content = Paste.Paste(paste_cve).get_p_content()
cve_list = reg_cve.findall(paste_content)
if only_one_same_cve_by_paste:
cve_list = set(cve_list)
for cve in reg_cve.findall(paste_content):
try:
dict_keyword[cve] += 1
except KeyError:
dict_keyword[cve] = 1
print('------------------------------------------------')
if dict_keyword:
res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)]
for item in res:
pass
print(item)
if __name__ == '__main__':
# CONFIG #
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
cfg = configparser.ConfigParser()
cfg.read(configfile)
serv_metadata = redis.StrictRedis(
host=cfg.get("ARDB_Metadata", "host"),
port=cfg.getint("ARDB_Metadata", "port"),
db=cfg.getint("ARDB_Metadata", "db"),
decode_responses=True)
serv_tags = redis.StrictRedis(
host=cfg.get("ARDB_Tags", "host"),
port=cfg.get("ARDB_Tags", "port"),
db=cfg.get("ARDB_Tags", "db"),
decode_responses=True)
reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,7}')
#all_past_cve = serv_tags.smembers('infoleak:automatic-detection="cve"')
#all_past_cve_regular = serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"')
#all_past_cve_crawler = serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"')
#print('{} + {} = {}'.format(len(all_past_cve_regular), len(all_past_cve_crawler), len(all_past_cve)))
print('ALL_CVE')
get_dict_cve(serv_tags.smembers('infoleak:automatic-detection="cve"'), True)
print()
print()
print()
print('REGULAR_CVE')
get_dict_cve(serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True)
print()
print()
print()
print('CRAWLER_CVE')
get_dict_cve(serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True)

View file

@ -10,6 +10,8 @@ import time
import subprocess import subprocess
import requests import requests
from pyfaup.faup import Faup
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
from Helper import Process from Helper import Process
from pubsublogger import publisher from pubsublogger import publisher
@ -18,10 +20,13 @@ def on_error_send_message_back_in_queue(type_hidden_service, domain, message):
# send this msg back in the queue # send this msg back in the queue
if not r_onion.sismember('{}_domain_crawler_queue'.format(type_hidden_service), domain): if not r_onion.sismember('{}_domain_crawler_queue'.format(type_hidden_service), domain):
r_onion.sadd('{}_domain_crawler_queue'.format(type_hidden_service), domain) r_onion.sadd('{}_domain_crawler_queue'.format(type_hidden_service), domain)
r_onion.sadd('{}_crawler_queue'.format(type_hidden_service), message) r_onion.sadd('{}_crawler_priority_queue'.format(type_hidden_service), message)
def crawl_onion(url, domain, date, date_month, message): def crawl_onion(url, domain, date, date_month, message):
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'crawling_domain', domain)
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
#if not r_onion.sismember('full_onion_up', domain) and not r_onion.sismember('onion_down:'+date , domain): #if not r_onion.sismember('full_onion_up', domain) and not r_onion.sismember('onion_down:'+date , domain):
super_father = r_serv_metadata.hget('paste_metadata:'+paste, 'super_father') super_father = r_serv_metadata.hget('paste_metadata:'+paste, 'super_father')
if super_father is None: if super_father is None:
@ -37,19 +42,21 @@ def crawl_onion(url, domain, date, date_month, message):
# TODO: relaunch docker or send error message # TODO: relaunch docker or send error message
nb_retry += 1 nb_retry += 1
if nb_retry == 30: if nb_retry == 6:
on_error_send_message_back_in_queue(type_hidden_service, domain, message) on_error_send_message_back_in_queue(type_hidden_service, domain, message)
publisher.error('{} SPASH DOWN'.format(splash_url)) publisher.error('{} SPASH DOWN'.format(splash_url))
print('--------------------------------------') print('--------------------------------------')
print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' \033[91m DOCKER SPLASH DOWN\033[0m')
print(' {} DOWN'.format(splash_url)) print(' {} DOWN'.format(splash_url))
exit(1) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'SPLASH DOWN')
nb_retry == 0
print(' \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m') print(' \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m')
print(' Retry({}) in 10 seconds'.format(nb_retry)) print(' Retry({}) in 10 seconds'.format(nb_retry))
time.sleep(10) time.sleep(10)
if r.status_code == 200: if r.status_code == 200:
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling')
process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', splash_url, type_hidden_service, url, domain, paste, super_father], process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', splash_url, type_hidden_service, url, domain, paste, super_father],
stdout=subprocess.PIPE) stdout=subprocess.PIPE)
while process.poll() is None: while process.poll() is None:
@ -67,6 +74,7 @@ def crawl_onion(url, domain, date, date_month, message):
print('') print('')
print(' PROXY DOWN OR BAD CONFIGURATION\033[0m'.format(splash_url)) print(' PROXY DOWN OR BAD CONFIGURATION\033[0m'.format(splash_url))
print('------------------------------------------------------------------------') print('------------------------------------------------------------------------')
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Error')
exit(-2) exit(-2)
else: else:
print(process.stdout.read()) print(process.stdout.read())
@ -76,6 +84,7 @@ def crawl_onion(url, domain, date, date_month, message):
print('--------------------------------------') print('--------------------------------------')
print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' \033[91m DOCKER SPLASH DOWN\033[0m')
print(' {} DOWN'.format(splash_url)) print(' {} DOWN'.format(splash_url))
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling')
exit(1) exit(1)
@ -119,6 +128,7 @@ if __name__ == '__main__':
print('splash url: {}'.format(splash_url)) print('splash url: {}'.format(splash_url))
crawler_depth_limit = p.config.getint("Crawler", "crawler_depth_limit") crawler_depth_limit = p.config.getint("Crawler", "crawler_depth_limit")
faup = Faup()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
@ -140,6 +150,10 @@ if __name__ == '__main__':
db=p.config.getint("ARDB_Onion", "db"), db=p.config.getint("ARDB_Onion", "db"),
decode_responses=True) decode_responses=True)
r_cache.sadd('all_crawler:{}'.format(type_hidden_service), splash_port)
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting')
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
# load domains blacklist # load domains blacklist
try: try:
with open(os.environ['AIL_BIN']+'/torcrawler/blacklist_onion.txt', 'r') as f: with open(os.environ['AIL_BIN']+'/torcrawler/blacklist_onion.txt', 'r') as f:
@ -152,6 +166,10 @@ if __name__ == '__main__':
while True: while True:
# Priority Queue - Recovering the streamed message informations.
message = r_onion.spop('{}_crawler_priority_queue'.format(type_hidden_service))
if message is None:
# Recovering the streamed message informations. # Recovering the streamed message informations.
message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service)) message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service))
@ -173,6 +191,8 @@ if __name__ == '__main__':
domain_url = 'http://{}'.format(domain) domain_url = 'http://{}'.format(domain)
print()
print()
print('\033[92m------------------START CRAWLER------------------\033[0m') print('\033[92m------------------START CRAWLER------------------\033[0m')
print('crawler type: {}'.format(type_hidden_service)) print('crawler type: {}'.format(type_hidden_service))
print('\033[92m-------------------------------------------------\033[0m') print('\033[92m-------------------------------------------------\033[0m')
@ -180,12 +200,24 @@ if __name__ == '__main__':
print('domain: {}'.format(domain)) print('domain: {}'.format(domain))
print('domain_url: {}'.format(domain_url)) print('domain_url: {}'.format(domain_url))
if not r_onion.sismember('blacklist_{}'.format(type_hidden_service), domain): faup.decode(domain)
onion_domain=faup.get()['domain'].decode()
if not r_onion.sismember('blacklist_{}'.format(type_hidden_service), domain) and not r_onion.sismember('blacklist_{}'.format(type_hidden_service), onion_domain):
date = datetime.datetime.now().strftime("%Y%m%d") date = datetime.datetime.now().strftime("%Y%m%d")
date_month = datetime.datetime.now().strftime("%Y%m") date_month = datetime.datetime.now().strftime("%Y%m")
if not r_onion.sismember('month_{}_up:{}'.format(type_hidden_service, date_month), domain) and not r_onion.sismember('{}_down:{}'.format(type_hidden_service, date), domain): if not r_onion.sismember('month_{}_up:{}'.format(type_hidden_service, date_month), domain) and not r_onion.sismember('{}_down:{}'.format(type_hidden_service, date), domain):
# first seen
if not r_onion.hexists('{}_metadata:{}'.format(type_hidden_service, domain), 'first_seen'):
r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'first_seen', date)
# last_father
r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'paste_parent', paste)
# last check
r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'last_check', date)
crawl_onion(url, domain, date, date_month, message) crawl_onion(url, domain, date, date_month, message)
if url != domain_url: if url != domain_url:
@ -198,21 +230,12 @@ if __name__ == '__main__':
r_onion.sadd('{}_down:{}'.format(type_hidden_service, date), domain) r_onion.sadd('{}_down:{}'.format(type_hidden_service, date), domain)
#r_onion.sadd('{}_down_link:{}'.format(type_hidden_service, date), url) #r_onion.sadd('{}_down_link:{}'.format(type_hidden_service, date), url)
#r_onion.hincrby('{}_link_down'.format(type_hidden_service), url, 1) #r_onion.hincrby('{}_link_down'.format(type_hidden_service), url, 1)
if not r_onion.exists('{}_metadata:{}'.format(type_hidden_service, domain)):
r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'first_seen', date)
r_onion.hset('{}_metadata:{}'.format(type_hidden_service,domain), 'last_seen', date)
else: else:
#r_onion.hincrby('{}_link_up'.format(type_hidden_service), url, 1) #r_onion.hincrby('{}_link_up'.format(type_hidden_service), url, 1)
if r_onion.sismember('month_{}_up:{}'.format(type_hidden_service, date_month), domain) and r_serv_metadata.exists('paste_children:'+paste): if r_onion.sismember('month_{}_up:{}'.format(type_hidden_service, date_month), domain) and r_serv_metadata.exists('paste_children:'+paste):
msg = 'infoleak:automatic-detection="{}";{}'.format(type_hidden_service, paste) msg = 'infoleak:automatic-detection="{}";{}'.format(type_hidden_service, paste)
p.populate_set_out(msg, 'Tags') p.populate_set_out(msg, 'Tags')
# last check
r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'last_check', date)
# last_father
r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'paste_parent', paste)
# add onion screenshot history # add onion screenshot history
# add crawled days # add crawled days
if r_onion.lindex('{}_history:{}'.format(type_hidden_service, domain), 0) != date: if r_onion.lindex('{}_history:{}'.format(type_hidden_service, domain), 0) != date:
@ -243,6 +266,14 @@ if __name__ == '__main__':
r_onion.lpush('last_{}'.format(type_hidden_service), domain) r_onion.lpush('last_{}'.format(type_hidden_service), domain)
r_onion.ltrim('last_{}'.format(type_hidden_service), 0, 15) r_onion.ltrim('last_{}'.format(type_hidden_service), 0, 15)
#update crawler status
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting')
r_cache.hdel('metadata_crawler:{}'.format(splash_port), 'crawling_domain')
else:
print(' Blacklisted Onion')
print()
print()
else: else:
continue continue
else: else:

View file

@ -23,23 +23,17 @@ Requirements
import base64 import base64
import os import os
import time import time
import uuid
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process
import magic import magic
import io
#import gzip
''' def rreplace(s, old, new, occurrence):
def gunzip_bytes_obj(bytes_obj): li = s.rsplit(old, occurrence)
in_ = io.BytesIO() return new.join(li)
in_.write(bytes_obj)
in_.seek(0)
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
gunzipped_bytes_obj = fo.read()
return gunzipped_bytes_obj.decode()'''
if __name__ == '__main__': if __name__ == '__main__':
publisher.port = 6380 publisher.port = 6380
@ -79,6 +73,12 @@ if __name__ == '__main__':
processed_paste = 0 processed_paste = 0
time.sleep(1) time.sleep(1)
continue continue
file_name_paste = paste.split('/')[-1]
if len(file_name_paste)>255:
new_file_name_paste = '{}{}.gz'.format(file_name_paste[:215], str(uuid.uuid4()))
paste = rreplace(paste, file_name_paste, new_file_name_paste, 1)
# Creating the full filepath # Creating the full filepath
filename = os.path.join(PASTES_FOLDER, paste) filename = os.path.join(PASTES_FOLDER, paste)

View file

@ -31,7 +31,7 @@ lastTimeKillCommand = {}
current_selected_value = 0 current_selected_value = 0
current_selected_queue = "" current_selected_queue = ""
current_selected_action = "" current_selected_action = ""
current_selected_action = 0 current_selected_amount = 0
# Map PID to Queue name (For restart and killing) # Map PID to Queue name (For restart and killing)
PID_NAME_DICO = {} PID_NAME_DICO = {}
@ -480,7 +480,10 @@ class Show_paste(Frame):
self.label_list[i]._text = "" self.label_list[i]._text = ""
except Exception as e: except Exception as e:
if current_selected_value in COMPLETE_PASTE_PATH_PER_PID:
self.label_list[0]._text = "Error while displaying the paste: " + COMPLETE_PASTE_PATH_PER_PID[current_selected_value] self.label_list[0]._text = "Error while displaying the paste: " + COMPLETE_PASTE_PATH_PER_PID[current_selected_value]
else:
self.label_list[0]._text = "Error Generic exception caught"
self.label_list[1]._text = str(e) self.label_list[1]._text = str(e)
for i in range(2,self.num_label): for i in range(2,self.num_label):
self.label_list[i]._text = "" self.label_list[i]._text = ""

View file

@ -29,10 +29,18 @@ import os
import base64 import base64
import subprocess import subprocess
import redis import redis
import signal
import re import re
from Helper import Process from Helper import Process
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
def fetch(p, r_cache, urls, domains, path): def fetch(p, r_cache, urls, domains, path):
failed = [] failed = []
@ -113,6 +121,8 @@ if __name__ == "__main__":
message = p.get_from_set() message = p.get_from_set()
prec_filename = None prec_filename = None
max_execution_time = p.config.getint("Onion", "max_execution_time")
# send to crawler: # send to crawler:
activate_crawler = p.config.get("Crawler", "activate_crawler") activate_crawler = p.config.get("Crawler", "activate_crawler")
if activate_crawler == 'True': if activate_crawler == 'True':
@ -130,6 +140,7 @@ if __name__ == "__main__":
while True: while True:
message = p.get_from_set()
if message is not None: if message is not None:
print(message) print(message)
filename, score = message.split() filename, score = message.split()
@ -140,6 +151,9 @@ if __name__ == "__main__":
urls = [] urls = []
PST = Paste.Paste(filename) PST = Paste.Paste(filename)
# max execution time on regex
signal.alarm(max_execution_time)
try:
for x in PST.get_regex(url_regex): for x in PST.get_regex(url_regex):
print(x) print(x)
# Extracting url with regex # Extracting url with regex
@ -150,6 +164,13 @@ if __name__ == "__main__":
print(url) print(url)
domains_list.append(domain) domains_list.append(domain)
urls.append(url) urls.append(url)
except TimeoutException:
encoded_list = []
p.incr_module_timeout_statistic()
print ("{0} processing timeout".format(PST.p_path))
continue
signal.alarm(0)
''' '''
for x in PST.get_regex(i2p_regex): for x in PST.get_regex(i2p_regex):
@ -177,8 +198,12 @@ if __name__ == "__main__":
print(len(domains_list)) print(len(domains_list))
if len(domains_list) > 0: if len(domains_list) > 0:
if not activate_crawler:
publisher.warning('{}Detected {} .onion(s);{}'.format( publisher.warning('{}Detected {} .onion(s);{}'.format(
to_print, len(domains_list),PST.p_rel_path)) to_print, len(domains_list),PST.p_rel_path))
else:
publisher.info('{}Detected {} .onion(s);{}'.format(
to_print, len(domains_list),PST.p_rel_path))
now = datetime.datetime.now() now = datetime.datetime.now()
path = os.path.join('onions', str(now.year).zfill(4), path = os.path.join('onions', str(now.year).zfill(4),
str(now.month).zfill(2), str(now.month).zfill(2),
@ -199,11 +224,19 @@ if __name__ == "__main__":
else: else:
continue continue
# too many subdomain
if len(domain.split('.')) > 5:
continue
if not r_onion.sismember('month_onion_up:{}'.format(date_month), domain) and not r_onion.sismember('onion_down:'+date , domain): if not r_onion.sismember('month_onion_up:{}'.format(date_month), domain) and not r_onion.sismember('onion_down:'+date , domain):
if not r_onion.sismember('onion_domain_crawler_queue', domain): if not r_onion.sismember('onion_domain_crawler_queue', domain):
print('send to onion crawler') print('send to onion crawler')
r_onion.sadd('onion_domain_crawler_queue', domain) r_onion.sadd('onion_domain_crawler_queue', domain)
msg = '{};{}'.format(url,PST.p_rel_path) msg = '{};{}'.format(url,PST.p_rel_path)
if not r_onion.hexists('onion_metadata:{}'.format(domain), 'first_seen'):
r_onion.sadd('onion_crawler_priority_queue', msg)
print('send to priority queue')
else:
r_onion.sadd('onion_crawler_queue', msg) r_onion.sadd('onion_crawler_queue', msg)
#p.populate_set_out(msg, 'Crawler') #p.populate_set_out(msg, 'Crawler')
@ -222,4 +255,3 @@ if __name__ == "__main__":
publisher.debug("Script url is Idling 10s") publisher.debug("Script url is Idling 10s")
#print('Sleeping') #print('Sleeping')
time.sleep(10) time.sleep(10)
message = p.get_from_set()

View file

@ -67,7 +67,7 @@ while True:
print(paste) print(paste)
with open(pystemonpath+paste, 'rb') as f: #.read() with open(pystemonpath+paste, 'rb') as f: #.read()
messagedata = f.read() messagedata = f.read()
path_to_send = pastes_directory+paste path_to_send = os.path.join(pastes_directory,paste)
s = b' '.join( [ topic.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] ) s = b' '.join( [ topic.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] )
socket.send(s) socket.send(s)

View file

@ -0,0 +1,253 @@
[Directories]
bloomfilters = Blooms
dicofilters = Dicos
pastes = PASTES
hash = HASHS
crawled = crawled
crawled_screenshot = CRAWLED_SCREENSHOT
wordtrending_csv = var/www/static/csv/wordstrendingdata
wordsfile = files/wordfile
protocolstrending_csv = var/www/static/csv/protocolstrendingdata
protocolsfile = files/protocolsfile
tldstrending_csv = var/www/static/csv/tldstrendingdata
tldsfile = faup/src/data/mozilla.tlds
domainstrending_csv = var/www/static/csv/domainstrendingdata
pystemonpath = /opt/pystemon/
sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt
##### Notifications ######
[Notifications]
ail_domain = http://localhost:7000
sender = sender@example.com
sender_host = smtp.example.com
sender_port = 1337
sender_pw = None
# optional for using with authenticated SMTP over SSL
# sender_pw = securepassword
##### Flask #####
[Flask]
#Proxying requests to the app
baseUrl = /
#Number of logs to display in the dashboard
max_dashboard_logs = 15
#Maximum number of character to display in the toolip
max_preview_char = 250
#Maximum number of character to display in the modal
max_preview_modal = 800
#Default number of header to display in trending graphs
default_display = 10
#Number of minutes displayed for the number of processed pastes.
minute_processed_paste = 10
#Maximum line length authorized to make a diff between duplicates
DiffMaxLineLength = 10000
#### Modules ####
[BankAccount]
max_execution_time = 60
[Categ]
#Minimum number of match between the paste and the category file
matchingThreshold=1
[Credential]
#Minimum length that a credential must have to be considered as such
minimumLengthThreshold=3
#Will be pushed as alert if the number of credentials is greater to that number
criticalNumberToAlert=8
#Will be considered as false positive if less that X matches from the top password list
minTopPassList=5
[Curve]
max_execution_time = 90
[Onion]
max_execution_time = 180
[Base64]
path = Base64/
max_execution_time = 60
[Binary]
path = Base64/
max_execution_time = 60
[Hex]
path = Base64/
max_execution_time = 60
[Modules_Duplicates]
#Number of month to look back
maximum_month_range = 3
#The value where two pastes are considerate duplicate for ssdeep.
threshold_duplicate_ssdeep = 50
#The value where two pastes are considerate duplicate for tlsh.
threshold_duplicate_tlsh = 52
#Minimum size of the paste considered
min_paste_size = 0.3
[Module_ModuleInformation]
#Threshold to deduce if a module is stuck or not, in seconds.
threshold_stucked_module=600
[Module_Mixer]
#Define the configuration of the mixer, possible value: 1, 2 or 3
operation_mode = 3
#Define the time that a paste will be considerate duplicate. in seconds (1day = 86400)
ttl_duplicate = 86400
default_unnamed_feed_name = unnamed_feeder
[RegexForTermsFrequency]
max_execution_time = 60
##### Redis #####
[Redis_Cache]
host = localhost
port = 6379
db = 0
[Redis_Log]
host = localhost
port = 6380
db = 0
[Redis_Log_submit]
host = localhost
port = 6380
db = 1
[Redis_Queues]
host = localhost
port = 6381
db = 0
[Redis_Data_Merging]
host = localhost
port = 6379
db = 1
[Redis_Paste_Name]
host = localhost
port = 6379
db = 2
[Redis_Mixer_Cache]
host = localhost
port = 6381
db = 1
##### ARDB #####
[ARDB_Curve]
host = localhost
port = 6382
db = 1
[ARDB_Sentiment]
host = localhost
port = 6382
db = 4
[ARDB_TermFreq]
host = localhost
port = 6382
db = 2
[ARDB_TermCred]
host = localhost
port = 6382
db = 5
[ARDB_DB]
host = localhost
port = 6382
db = 0
[ARDB_Trending]
host = localhost
port = 6382
db = 3
[ARDB_Hashs]
host = localhost
db = 1
[ARDB_Tags]
host = localhost
port = 6382
db = 6
[ARDB_Metadata]
host = localhost
port = 6382
db = 7
[ARDB_Statistics]
host = localhost
port = 6382
db = 8
[ARDB_Onion]
host = localhost
port = 6382
db = 9
[Url]
cc_critical = DE
[DomClassifier]
cc = DE
cc_tld = r'\.de$'
dns = 8.8.8.8
[Mail]
dns = 8.8.8.8
[Web]
dns = 149.13.33.69
# Indexer configuration
[Indexer]
type = whoosh
path = indexdir
register = indexdir/all_index.txt
#size in Mb
index_max_size = 2000
[ailleakObject]
maxDuplicateToPushToMISP=10
###############################################################################
# For multiple feed, add them with "," without space
# e.g.: tcp://127.0.0.1:5556,tcp://127.0.0.1:5557
[ZMQ_Global]
#address = tcp://crf.circl.lu:5556
address = tcp://127.0.0.1:5556,tcp://crf.circl.lu:5556
channel = 102
bind = tcp://127.0.0.1:5556
[ZMQ_Url]
address = tcp://127.0.0.1:5004
channel = urls
[ZMQ_FetchedOnion]
address = tcp://127.0.0.1:5005
channel = FetchedOnion
[RedisPubSub]
host = localhost
port = 6381
db = 0
[Crawler]
activate_crawler = False
crawler_depth_limit = 1
splash_url_onion = http://172.17.0.1
splash_onion_port = 8050

View file

@ -68,6 +68,9 @@ minTopPassList=5
[Curve] [Curve]
max_execution_time = 90 max_execution_time = 90
[Onion]
max_execution_time = 180
[Base64] [Base64]
path = Base64/ path = Base64/
max_execution_time = 60 max_execution_time = 60

View file

@ -10,10 +10,12 @@ import datetime
import base64 import base64
import redis import redis
import json import json
import time
from scrapy.spidermiddlewares.httperror import HttpError from scrapy.spidermiddlewares.httperror import HttpError
from twisted.internet.error import DNSLookupError from twisted.internet.error import DNSLookupError
from twisted.internet.error import TimeoutError from twisted.internet.error import TimeoutError
from twisted.web._newclient import ResponseNeverReceived
from scrapy import Spider from scrapy import Spider
from scrapy.linkextractors import LinkExtractor from scrapy.linkextractors import LinkExtractor
@ -39,6 +41,8 @@ class TorSplashCrawler():
'SPIDER_MIDDLEWARES': {'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,}, 'SPIDER_MIDDLEWARES': {'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,},
'DUPEFILTER_CLASS': 'scrapy_splash.SplashAwareDupeFilter', 'DUPEFILTER_CLASS': 'scrapy_splash.SplashAwareDupeFilter',
'HTTPERROR_ALLOW_ALL': True, 'HTTPERROR_ALLOW_ALL': True,
'RETRY_TIMES': 2,
'CLOSESPIDER_PAGECOUNT': 50,
'DEPTH_LIMIT': crawler_depth_limit 'DEPTH_LIMIT': crawler_depth_limit
}) })
@ -97,7 +101,7 @@ class TorSplashCrawler():
yield SplashRequest( yield SplashRequest(
self.start_urls, self.start_urls,
self.parse, self.parse,
#errback=self.errback_catcher, errback=self.errback_catcher,
endpoint='render.json', endpoint='render.json',
meta={'father': self.original_paste}, meta={'father': self.original_paste},
args={ 'html': 1, args={ 'html': 1,
@ -122,6 +126,10 @@ class TorSplashCrawler():
print('Connection to proxy refused') print('Connection to proxy refused')
else: else:
#avoid filename too big
if len(self.domains[0]) > 215:
UUID = self.domains[0][-215:]+str(uuid.uuid4())
else:
UUID = self.domains[0]+str(uuid.uuid4()) UUID = self.domains[0]+str(uuid.uuid4())
filename_paste = os.path.join(self.crawled_paste_filemame, UUID) filename_paste = os.path.join(self.crawled_paste_filemame, UUID)
relative_filename_paste = os.path.join(self.crawler_path, UUID) relative_filename_paste = os.path.join(self.crawler_path, UUID)
@ -174,7 +182,7 @@ class TorSplashCrawler():
yield SplashRequest( yield SplashRequest(
link.url, link.url,
self.parse, self.parse,
#errback=self.errback_catcher, errback=self.errback_catcher,
endpoint='render.json', endpoint='render.json',
meta={'father': relative_filename_paste}, meta={'father': relative_filename_paste},
args={ 'html': 1, args={ 'html': 1,
@ -184,17 +192,39 @@ class TorSplashCrawler():
'wait': 10} 'wait': 10}
) )
'''
def errback_catcher(self, failure): def errback_catcher(self, failure):
# catch all errback failures, # catch all errback failures,
self.logger.error(repr(failure)) self.logger.error(repr(failure))
if failure.check(ResponseNeverReceived):
request = failure.request
url = request.meta['splash']['args']['url']
father = request.meta['father']
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url)
time.sleep(10)
yield SplashRequest(
url,
self.parse,
errback=self.errback_catcher,
endpoint='render.json',
meta={'father': father},
args={ 'html': 1,
'png': 1,
'render_all': 1,
'har': 1,
'wait': 10}
)
else:
print('failure') print('failure')
#print(failure) #print(failure)
print(failure.type) print(failure.type)
#print(failure.request.meta['item']) #print(failure.request.meta['item'])
'''
#if isinstance(failure.value, HttpError): #if isinstance(failure.value, HttpError):
if failure.check(HttpError): elif failure.check(HttpError):
# you can get the response # you can get the response
response = failure.value.response response = failure.value.response
print('HttpError') print('HttpError')

View file

@ -3,3 +3,5 @@ facebookcorewwwi.onion
graylady3jvrrxbe.onion graylady3jvrrxbe.onion
expyuzz4wqqyqhjn.onion expyuzz4wqqyqhjn.onion
dccbbv6cooddgcrq.onion dccbbv6cooddgcrq.onion
pugljpwjhbiagkrn.onion
jld3zkuo4b5mbios.onion

View file

@ -5,12 +5,15 @@ usage() { echo "Usage: sudo $0 [-f <config_absolute_path>] [-p <port_start>] [-n
echo " -p: number of the first splash server port number. This number is incremented for the others splash server"; echo " -p: number of the first splash server port number. This number is incremented for the others splash server";
echo " -n: number of splash servers to start"; echo " -n: number of splash servers to start";
echo ""; echo "";
echo " -options:";
echo " -u: max unbound in-memory cache (Mb, Restart Splash when full, default=3000 Mb)";
echo "";
echo "example:"; echo "example:";
echo "sudo ./launch_splash_crawler.sh -f /home/my_user/AIL-framework/configs/docker/splash_onion/etc/splash/proxy-profiles/ -p 8050 -n 3"; echo "sudo ./launch_splash_crawler.sh -f /home/my_user/AIL-framework/configs/docker/splash_onion/etc/splash/proxy-profiles/ -p 8050 -n 3";
exit 1; exit 1;
} }
while getopts ":p:f:n:" o; do while getopts ":p:f:n:u:" o; do
case "${o}" in case "${o}" in
p) p)
p=${OPTARG} p=${OPTARG}
@ -21,6 +24,9 @@ while getopts ":p:f:n:" o; do
n) n)
n=${OPTARG} n=${OPTARG}
;; ;;
u)
u=${OPTARG}
;;
*) *)
usage usage
;; ;;
@ -28,6 +34,10 @@ while getopts ":p:f:n:" o; do
done done
shift $((OPTIND-1)) shift $((OPTIND-1))
if [ -z "${u}" ]; then
u=3000;
fi
if [ -z "${p}" ] || [ -z "${f}" ] || [ -z "${n}" ]; then if [ -z "${p}" ] || [ -z "${f}" ] || [ -z "${n}" ]; then
usage; usage;
fi fi
@ -52,7 +62,7 @@ sleep 0.1
for ((i=0;i<=$((${n} - 1));i++)); do for ((i=0;i<=$((${n} - 1));i++)); do
port_number=$((${p} + $i)) port_number=$((${p} + $i))
screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -p '$port_number':8050 --cpus=1 --memory=4.5G -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash; read x' screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -d -p '$port_number':8050 --restart=always --cpus=1 --memory=4.5G -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash --maxrss '$u'; read x'
sleep 0.1 sleep 0.1
printf "$GREEN Splash server launched on port $port_number$DEFAULT\n" printf "$GREEN Splash server launched on port $port_number$DEFAULT\n"
done done

View file

@ -61,7 +61,7 @@ tcp-backlog 511
# Examples: # Examples:
# #
# bind 192.168.1.100 10.0.0.1 # bind 192.168.1.100 10.0.0.1
# bind 127.0.0.1 bind 127.0.0.1
# Specify the path for the Unix socket that will be used to listen for # Specify the path for the Unix socket that will be used to listen for
# incoming connections. There is no default, so Redis will not listen # incoming connections. There is no default, so Redis will not listen

View file

@ -61,7 +61,7 @@ tcp-backlog 511
# Examples: # Examples:
# #
# bind 192.168.1.100 10.0.0.1 # bind 192.168.1.100 10.0.0.1
# bind 127.0.0.1 bind 127.0.0.1
# Specify the path for the Unix socket that will be used to listen for # Specify the path for the Unix socket that will be used to listen for
# incoming connections. There is no default, so Redis will not listen # incoming connections. There is no default, so Redis will not listen

View file

@ -61,7 +61,7 @@ tcp-backlog 511
# Examples: # Examples:
# #
# bind 192.168.1.100 10.0.0.1 # bind 192.168.1.100 10.0.0.1
# bind 127.0.0.1 bind 127.0.0.1
# Specify the path for the Unix socket that will be used to listen for # Specify the path for the Unix socket that will be used to listen for
# incoming connections. There is no default, so Redis will not listen # incoming connections. There is no default, so Redis will not listen

Binary file not shown.

523
docker-compose.yml Normal file
View file

@ -0,0 +1,523 @@
version: '3'
services:
ardb:
entrypoint:
- ardb-server
- /opt/AIL/configs/6382.conf
healthcheck:
test: ["CMD", "redis-cli", "-p", "6382", "ping"]
interval: 30s
timeout: 10s
retries: 5
network_mode: service:flask
image: ail-framework
volumes:
- ./configs:/opt/AIL/configs:ro
crawler:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Crawler.py
- onion
- "8050"
network_mode: service:flask
image: ail-framework
volumes:
- ./CRAWLED_SCREENSHOT/:/opt/AIL/CRAWLED_SCREENSHOT
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
flask:
build: .
entrypoint:
- /opt/AIL/var/www/Flask_server.py
ports:
- "7000:7000"
image: ail-framework
volumes:
- ./CRAWLED_SCREENSHOT/:/opt/AIL/CRAWLED_SCREENSHOT
- ./PASTES/:/opt/AIL/PASTES
- ./indexdir:/opt/AIL/indexdir
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/var/www
log-queue:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/AILENV/bin/log_subscriber
- -p
- "6380"
- -c
- Queing
- -l
- /opt/AIL/logs/
network_mode: service:flask
image: ail-framework
volumes:
- ./configs:/opt/AIL/configs:ro
log-script:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/AILENV/bin/log_subscriber
- -p
- "6380"
- -c
- Script
- -l
- /opt/AIL/logs/
network_mode: service:flask
image: ail-framework
volumes:
- ./configs:/opt/AIL/configs:ro
pystemon:
depends_on:
- redis-log
entrypoint:
- /opt/pystemon/pystemon.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./pystemon/archives:/opt/pystemon/archive
- ./pystemon/proxies.txt:/opt/pystemon/proxies.txt:ro
- ./pystemon/pystemon.yaml:/opt/pystemon/pystemon.yaml:ro
working_dir: /opt/pystemon
pystemon-feeder:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/feeder/pystemon-feeder.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./pystemon/archives:/opt/pystemon/archive
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
queues:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/launch_queues.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
redis-cache:
entrypoint:
- redis-server
- /opt/AIL/configs/6379.conf
healthcheck:
test: ["CMD", "redis-cli", "-p", "6379", "ping"]
interval: 30s
timeout: 10s
retries: 5
image: ail-framework
network_mode: service:flask
volumes:
- ./configs:/opt/AIL/configs:ro
redis-log:
entrypoint:
- redis-server
- /opt/AIL/configs/6380.conf
healthcheck:
test: ["CMD", "redis-cli", "-p", "6380", "ping"]
interval: 30s
timeout: 10s
retries: 5
network_mode: service:flask
image: ail-framework
volumes:
- ./configs:/opt/AIL/configs:ro
redis-mixer-cache:
entrypoint:
- redis-server
- /opt/AIL/configs/6381.conf
healthcheck:
test: ["CMD", "redis-cli", "-p", "6381", "ping"]
interval: 30s
timeout: 10s
retries: 5
image: ail-framework
network_mode: service:flask
volumes:
- ./configs:/opt/AIL/configs:ro
script-alerthandler:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/alertHandler.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-apikey:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/ApiKey.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-bankaccount:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/BankAccount.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-bitcoin:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Bitcoin.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-categ:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Categ.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-credential:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Credential.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-creditcards:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/CreditCards.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-curve:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Curve.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-curvemanagetopsets:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/CurveManageTopSets.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-cve:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Cve.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-decoder:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Decoder.py
network_mode: service:flask
image: ail-framework
volumes:
- ./HASHS:/opt/AIL/HASHS
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-domclassifier:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/DomClassifier.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-duplicates:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Duplicates.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-global:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Global.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-indexer:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Indexer.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./indexdir:/opt/AIL/indexdir
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-keys:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Keys.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-libinjection:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/LibInjection.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-lines:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Lines.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-mail:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Mail.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-mispthehivefeeder:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/MISP_The_Hive_feeder.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-mixer:
depends_on:
- redis-mixer-cache
entrypoint:
- /opt/AIL/bin/Mixer.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-modulestats:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/ModuleStats.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-onion:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Onion.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-phone:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Phone.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-regexfortermsfrequency:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/RegexForTermsFrequency.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-release:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Release.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-sentimentanalysis:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/SentimentAnalysis.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-setfortermsfrequency:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/SetForTermsFrequency.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-sqlinjectiondetection:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/SQLInjectionDetection.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-submitpaste:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/submit_paste.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-tags:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Tags.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-tokenize:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Tokenize.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-web:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Web.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-webstats:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/WebStats.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin

View file

@ -95,6 +95,7 @@ popd
mkdir -p $AIL_HOME/PASTES mkdir -p $AIL_HOME/PASTES
pip3 install -U pip pip3 install -U pip
pip3 install 'git+https://github.com/D4-project/BGP-Ranking.git/@7e698f87366e6f99b4d0d11852737db28e3ddc62#egg=pybgpranking&subdirectory=client'
pip3 install -U -r pip3_packages_requirement.txt pip3 install -U -r pip3_packages_requirement.txt
# Pyfaup # Pyfaup

1
pystemon/proxies.txt Normal file
View file

@ -0,0 +1 @@
http://127.0.0.1:8080

View file

@ -30,6 +30,12 @@ r_serv = redis.StrictRedis(
db=cfg.getint("Redis_Queues", "db"), db=cfg.getint("Redis_Queues", "db"),
decode_responses=True) decode_responses=True)
r_cache = redis.StrictRedis(
host=cfg.get("Redis_Cache", "host"),
port=cfg.getint("Redis_Cache", "port"),
db=cfg.getint("Redis_Cache", "db"),
decode_responses=True)
r_serv_log = redis.StrictRedis( r_serv_log = redis.StrictRedis(
host=cfg.get("Redis_Log", "host"), host=cfg.get("Redis_Log", "host"),
port=cfg.getint("Redis_Log", "port"), port=cfg.getint("Redis_Log", "port"),

View file

@ -101,7 +101,8 @@ def all_hash_search():
date_to = request.form.get('date_to') date_to = request.form.get('date_to')
type = request.form.get('type') type = request.form.get('type')
encoding = request.form.get('encoding') encoding = request.form.get('encoding')
return redirect(url_for('hashDecoded.hashDecoded_page', date_from=date_from, date_to=date_to, type=type, encoding=encoding)) show_decoded_files = request.form.get('show_decoded_files')
return redirect(url_for('hashDecoded.hashDecoded_page', date_from=date_from, date_to=date_to, type=type, encoding=encoding, show_decoded_files=show_decoded_files))
@hashDecoded.route("/hashDecoded/", methods=['GET']) @hashDecoded.route("/hashDecoded/", methods=['GET'])
def hashDecoded_page(): def hashDecoded_page():
@ -109,6 +110,7 @@ def hashDecoded_page():
date_to = request.args.get('date_to') date_to = request.args.get('date_to')
type = request.args.get('type') type = request.args.get('type')
encoding = request.args.get('encoding') encoding = request.args.get('encoding')
show_decoded_files = request.args.get('show_decoded_files')
if type == 'All types': if type == 'All types':
type = None type = None
@ -161,6 +163,8 @@ def hashDecoded_page():
daily_date = None daily_date = None
l_64 = set() l_64 = set()
if show_decoded_files:
show_decoded_files = True
for date in date_range: for date in date_range:
if encoding is None: if encoding is None:
l_hash = r_serv_metadata.zrange('hash_date:' +date, 0, -1) l_hash = r_serv_metadata.zrange('hash_date:' +date, 0, -1)
@ -214,7 +218,7 @@ def hashDecoded_page():
l_type = r_serv_metadata.smembers('hash_all_type') l_type = r_serv_metadata.smembers('hash_all_type')
return render_template("hashDecoded.html", l_64=b64_metadata, vt_enabled=vt_enabled, l_type=l_type, type=type, daily_type_chart=daily_type_chart, daily_date=daily_date, return render_template("hashDecoded.html", l_64=b64_metadata, vt_enabled=vt_enabled, l_type=l_type, type=type, daily_type_chart=daily_type_chart, daily_date=daily_date,
encoding=encoding, all_encoding=all_encoding, date_from=date_from, date_to=date_to) encoding=encoding, all_encoding=all_encoding, date_from=date_from, date_to=date_to, show_decoded_files=show_decoded_files)
@hashDecoded.route('/hashDecoded/hash_by_type') @hashDecoded.route('/hashDecoded/hash_by_type')
def hash_by_type(): def hash_by_type():
@ -400,6 +404,63 @@ def decoder_type_json():
to_json.append({'name': decoder, 'value': nb_decoded[decoder]}) to_json.append({'name': decoder, 'value': nb_decoded[decoder]})
return jsonify(to_json) return jsonify(to_json)
@hashDecoded.route('/hashDecoded/top5_type_json')
def top5_type_json():
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
typ = request.args.get('type')
decoder = request.args.get('encoding')
if decoder == 'All encoding' or decoder is None:
all_decoder = r_serv_metadata.smembers('all_decoder')
else:
if not r_serv_metadata.sismember('all_decoder', decoder):
return jsonify({'Error': 'This decoder do not exist'})
else:
all_decoder = [decoder]
if typ == 'All types' or typ is None or typ=='None':
all_type = r_serv_metadata.smembers('hash_all_type')
else:
typ = typ.replace(' ', '+')
if not r_serv_metadata.sismember('hash_all_type', typ):
return jsonify({'Error': 'This type do not exist'})
else:
all_type = [typ]
date_range = []
if date_from is not None and date_to is not None:
#change format
try:
if len(date_from) != 8:
date_from = date_from[0:4] + date_from[5:7] + date_from[8:10]
date_to = date_to[0:4] + date_to[5:7] + date_to[8:10]
date_range = substract_date(date_from, date_to)
except:
pass
if not date_range:
date_range.append(datetime.date.today().strftime("%Y%m%d"))
# TODO replace with ZUNIONSTORE
nb_types_decoded = {}
for date in date_range:
for typ in all_type:
for decoder in all_decoder:
nb_decoded = r_serv_metadata.zscore('{}_type:{}'.format(decoder, typ), date)
if nb_decoded is not None:
if typ in nb_types_decoded:
nb_types_decoded[typ] = nb_types_decoded[typ] + int(nb_decoded)
else:
nb_types_decoded[typ] = int(nb_decoded)
to_json = []
top5_types = sorted(nb_types_decoded, key=nb_types_decoded.get, reverse=True)[:5]
for typ in top5_types:
to_json.append({'name': typ, 'value': nb_types_decoded[typ]})
return jsonify(to_json)
@hashDecoded.route('/hashDecoded/daily_type_json') @hashDecoded.route('/hashDecoded/daily_type_json')
def daily_type_json(): def daily_type_json():

View file

@ -121,7 +121,14 @@
{% endif %} {% endif %}
{% endfor %} {% endfor %}
</select> </select>
<br> <div class="checkbox">
<label>
<input type="checkbox" name="show_decoded_files" value="True" {% if show_decoded_files %}checked{% endif %}>
<div style="color:#286090; display:inline-block">
Show decoded files <i class="fa fa-file"></i>
</div>
</label>
</div>
<button class="btn btn-primary" style="text-align:center;"> <button class="btn btn-primary" style="text-align:center;">
<i class="fa fa-files-o"></i> Search <i class="fa fa-files-o"></i> Search
</button> </button>
@ -129,6 +136,8 @@
</div> </div>
</div> </div>
<div id="pie_chart_encoded"> <div id="pie_chart_encoded">
</div>
<div id="pie_chart_top5_types">
</div> </div>
</div> </div>
</div> </div>
@ -189,12 +198,14 @@
</tbody> </tbody>
</table> </table>
{% else %} {% else %}
{% if show_decoded_files %}
{% if date_from|string == date_to|string %} {% if date_from|string == date_to|string %}
<h3> {{ date_from }}, No Hashes</h3> <h3> {{ date_from }}, No Hashes</h3>
{% else %} {% else %}
<h3> {{ date_from }} to {{ date_to }}, No Hashes</h3> <h3> {{ date_from }} to {{ date_to }}, No Hashes</h3>
{% endif %} {% endif %}
{% endif %} {% endif %}
{% endif %}
</div> </div>
</div> </div>
@ -248,9 +259,12 @@
{% elif daily_type_chart %} {% elif daily_type_chart %}
chart.stackBarChart =barchart_type_stack("{{ url_for('hashDecoded.range_type_json') }}?date_from={{daily_date}}&date_to={{daily_date}}", 'id'); chart.stackBarChart =barchart_type_stack("{{ url_for('hashDecoded.range_type_json') }}?date_from={{daily_date}}&date_to={{daily_date}}", 'id');
{% else %} {% else %}
chart.stackBarChart = barchart_type_stack("{{ url_for('hashDecoded.range_type_json') }}?date_from={{date_from}}&date_to={{date_to}}", 'id') chart.stackBarChart = barchart_type_stack("{{ url_for('hashDecoded.range_type_json') }}?date_from={{date_from}}&date_to={{date_to}}", 'id');
{% endif %} {% endif %}
draw_pie_chart("pie_chart_encoded" ,"{{ url_for('hashDecoded.decoder_type_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}", "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}&encoding=");
draw_pie_chart("pie_chart_top5_types" ,"{{ url_for('hashDecoded.top5_type_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}", "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type=");
chart.onResize(); chart.onResize();
$(window).on("resize", function() { $(window).on("resize", function() {
chart.onResize(); chart.onResize();
@ -498,6 +512,7 @@ window.chart = chart;
</script> </script>
<script> <script>
function draw_pie_chart(id, url_json, pie_on_click_url) {
var width_pie = 200; var width_pie = 200;
var height_pie = 200; var height_pie = 200;
@ -512,7 +527,7 @@ var div_pie = d3.select("body").append("div")
.attr("class", "tooltip") .attr("class", "tooltip")
.style("opacity", 0); .style("opacity", 0);
var svg_pie = d3.select("#pie_chart_encoded") var svg_pie = d3.select("#"+id)
.append('svg') .append('svg')
.attr("width", '100%') .attr("width", '100%')
.attr("height", '100%') .attr("height", '100%')
@ -527,7 +542,7 @@ var arc_pie = d3.arc()
.innerRadius(0) .innerRadius(0)
.outerRadius(radius_pie); .outerRadius(radius_pie);
d3.json("{{ url_for('hashDecoded.decoder_type_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}") d3.json(url_json)
.then(function(data){ .then(function(data){
var pie_pie = d3.pie() var pie_pie = d3.pie()
@ -544,11 +559,12 @@ d3.json("{{ url_for('hashDecoded.decoder_type_json') }}?date_from={{date_from}}&
.attr('class', 'pie_path') .attr('class', 'pie_path')
.on("mouseover", mouseovered_pie) .on("mouseover", mouseovered_pie)
.on("mouseout", mouseouted_pie) .on("mouseout", mouseouted_pie)
.on("click", function (d) {window.location.href = "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}&encoding="+d.data.name }) .on("click", function (d) {window.location.href = pie_on_click_url+d.data.name })
.style('opacity', opacity_pie) .style('opacity', opacity_pie)
.style('stroke', 'white'); .style('stroke', 'white');
}); });
function mouseovered_pie(d) { function mouseovered_pie(d) {
// tooltip // tooltip
@ -571,6 +587,8 @@ function mouseouted_pie() {
.duration(500) .duration(500)
.style("opacity", 0); .style("opacity", 0);
} }
}
</script> </script>

View file

@ -19,6 +19,7 @@ import Flask_config
app = Flask_config.app app = Flask_config.app
cfg = Flask_config.cfg cfg = Flask_config.cfg
baseUrl = Flask_config.baseUrl baseUrl = Flask_config.baseUrl
r_cache = Flask_config.r_cache
r_serv_onion = Flask_config.r_serv_onion r_serv_onion = Flask_config.r_serv_onion
r_serv_metadata = Flask_config.r_serv_metadata r_serv_metadata = Flask_config.r_serv_metadata
bootstrap_label = Flask_config.bootstrap_label bootstrap_label = Flask_config.bootstrap_label
@ -90,7 +91,11 @@ def hiddenServices_page():
metadata_onion = {} metadata_onion = {}
metadata_onion['domain'] = onion metadata_onion['domain'] = onion
metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check') metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check')
if metadata_onion['last_check'] is None:
metadata_onion['last_check'] = '********'
metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen') metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen')
if metadata_onion['first_seen'] is None:
metadata_onion['first_seen'] = '********'
if get_onion_status(onion, metadata_onion['last_check']): if get_onion_status(onion, metadata_onion['last_check']):
metadata_onion['status_text'] = 'UP' metadata_onion['status_text'] = 'UP'
metadata_onion['status_color'] = 'Green' metadata_onion['status_color'] = 'Green'
@ -101,7 +106,71 @@ def hiddenServices_page():
metadata_onion['status_icon'] = 'fa-times-circle' metadata_onion['status_icon'] = 'fa-times-circle'
list_onion.append(metadata_onion) list_onion.append(metadata_onion)
return render_template("hiddenServices.html", last_onions=list_onion, statDomains=statDomains) crawler_metadata=[]
all_onion_crawler = r_cache.smembers('all_crawler:onion')
for crawler in all_onion_crawler:
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
crawler_info = '{} - {}'.format(crawler, started_time)
if status_info=='Waiting' or status_info=='Crawling':
status=True
else:
status=False
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
return render_template("hiddenServices.html", last_onions=list_onion, statDomains=statDomains,
crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
@hiddenServices.route("/hiddenServices/last_crawled_domains_with_stats_json", methods=['GET'])
def last_crawled_domains_with_stats_json():
last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
list_onion = []
now = datetime.datetime.now()
date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d"))
statDomains = {}
statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
for onion in last_onions:
metadata_onion = {}
metadata_onion['domain'] = onion
metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check')
if metadata_onion['last_check'] is None:
metadata_onion['last_check'] = '********'
metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen')
if metadata_onion['first_seen'] is None:
metadata_onion['first_seen'] = '********'
if get_onion_status(onion, metadata_onion['last_check']):
metadata_onion['status_text'] = 'UP'
metadata_onion['status_color'] = 'Green'
metadata_onion['status_icon'] = 'fa-check-circle'
else:
metadata_onion['status_text'] = 'DOWN'
metadata_onion['status_color'] = 'Red'
metadata_onion['status_icon'] = 'fa-times-circle'
list_onion.append(metadata_onion)
crawler_metadata=[]
all_onion_crawler = r_cache.smembers('all_crawler:onion')
for crawler in all_onion_crawler:
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
crawler_info = '{} - {}'.format(crawler, started_time)
if status_info=='Waiting' or status_info=='Crawling':
status=True
else:
status=False
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
return jsonify({'last_onions': list_onion, 'statDomains': statDomains, 'crawler_metadata':crawler_metadata})
@hiddenServices.route("/hiddenServices/get_onions_by_daterange", methods=['POST']) @hiddenServices.route("/hiddenServices/get_onions_by_daterange", methods=['POST'])
def get_onions_by_daterange(): def get_onions_by_daterange():
@ -199,8 +268,12 @@ def onion_domain():
# # TODO: FIXME return 404 # # TODO: FIXME return 404
last_check = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'last_check') last_check = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'last_check')
if last_check is None:
last_check = '********'
last_check = '{}/{}/{}'.format(last_check[0:4], last_check[4:6], last_check[6:8]) last_check = '{}/{}/{}'.format(last_check[0:4], last_check[4:6], last_check[6:8])
first_seen = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'first_seen') first_seen = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'first_seen')
if first_seen is None:
first_seen = '********'
first_seen = '{}/{}/{}'.format(first_seen[0:4], first_seen[4:6], first_seen[6:8]) first_seen = '{}/{}/{}'.format(first_seen[0:4], first_seen[4:6], first_seen[6:8])
origin_paste = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'paste_parent') origin_paste = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'paste_parent')

View file

@ -66,7 +66,7 @@
<th>Status</th> <th>Status</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody id="tbody_last_crawled">
{% for metadata_onion in last_onions %} {% for metadata_onion in last_onions %}
<tr> <tr>
<td><a target="_blank" href="{{ url_for('hiddenServices.onion_domain') }}?onion_domain={{ metadata_onion['domain'] }}">{{ metadata_onion['domain'] }}</a></td> <td><a target="_blank" href="{{ url_for('hiddenServices.onion_domain') }}?onion_domain={{ metadata_onion['domain'] }}">{{ metadata_onion['domain'] }}</a></td>
@ -142,7 +142,6 @@
</div> </div>
</div> </div>
<div class="panel panel-info"> <div class="panel panel-info">
<div class="panel-heading"> <div class="panel-heading">
<i class="fa fa-eye-slash"></i> Domains Crawled Today <i class="fa fa-eye-slash"></i> Domains Crawled Today
@ -152,41 +151,81 @@
<tbody> <tbody>
<tr> <tr>
<td> <td>
<a target="_blank" href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?date_from={{ date_from }}&domains_up=True">
<div style="color:Green; display:inline-block"> <div style="color:Green; display:inline-block">
<i class="fa fa-check-circle fa-2x"></i> <i class="fa fa-check-circle fa-2x"></i>
Domains UP Domains UP
</div> </div>
</a>
</td> </td>
<td> <td>
<a target="_blank" href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?date_from={{ date_from }}&domains_up=True">
<div style="color:Green; display:inline-block"> <div style="color:Green; display:inline-block">
{{ statDomains['domains_up'] }} <div id="text_domain_up">{{ statDomains['domains_up'] }}</div>
</div> </div>
</a>
</td> </td>
</tr> </tr>
<tr> <tr>
<td> <td>
<a target="_blank" href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?date_from={{ date_from }}&domains_down=True">
<div style="color:Red; display:inline-block"> <div style="color:Red; display:inline-block">
<i class="fa fa-times-circle fa-2x"></i> <i class="fa fa-times-circle fa-2x"></i>
Domains DOWN Domains DOWN
</div> </div>
</a>
</td> </td>
<td> <td>
<a target="_blank" href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?date_from={{ date_from }}&domains_down=True">
<div style="color:Red; display:inline-block"> <div style="color:Red; display:inline-block">
{{ statDomains['domains_down'] }} <div id="text_domain_down">{{ statDomains['domains_down'] }}</div>
</div> </div>
</a>
</td> </td>
</tr> </tr>
<tr> <tr>
<td>Crawled Domains</td> <td>
<td>{{ statDomains['total'] }}</td> <a target="_blank" href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?date_from={{ date_from }}&domains_up=True&domains_down=True">
Crawled Domains
</a>
</td>
<td>
<a target="_blank" href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?date_from={{ date_from }}&domains_up=True&domains_down=True">
<div id="text_total_domains">{{ statDomains['total'] }}</div>
</a>
</td>
</tr> </tr>
<tr> <tr>
<td>Domains in Queue</td> <td>Domains in Queue</td>
<td>{{ statDomains['domains_queue'] }}</td> <td><div id="text_domain_queue">{{ statDomains['domains_queue'] }}</div></td>
</tr> </tr>
</tbody> </tbody>
</table> </table>
</div> </div>
<div class="panel panel-info" {%if not crawler_metadata%}hidden{%endif%} id="panel_crawler">
<div class="panel-heading">
Crawlers Status
</div>
<table class="table table-hover table-striped">
<tbody id="tbody_crawler_info">
{% for crawler in crawler_metadata %}
<tr>
<td>
<i class="fa fa-{%if crawler['status']%}check{%else%}times{%endif%}-circle fa-2x" style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};"></i> {{crawler['crawler_info']}}
</td>
<td>
{{crawler['crawling_domain']}}
</td>
<td style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};">
{{crawler['status_info']}}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div> </div>
</div> </div>
@ -196,6 +235,7 @@
<script> <script>
var all_graph = {}; var all_graph = {};
var to_refresh = false
$(document).ready(function(){ $(document).ready(function(){
activePage = "page-hiddenServices" activePage = "page-hiddenServices"
$("#"+activePage).addClass("active"); $("#"+activePage).addClass("active");
@ -232,10 +272,22 @@
}); });
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hiddenServices.domain_crawled_7days_json') }}?type=onion"); all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hiddenServices.domain_crawled_7days_json') }}?type=onion");
$( window ).focus(function() {
to_refresh = true
refresh_list_crawled();
});
$( window ).blur(function() {
to_refresh = false
});
to_refresh = true
refresh_list_crawled();
}); });
$(window).on("resize", function() { $(window).on("resize", function() {
all_graph.onResize(); all_graph.onResize();
}); });
</script> </script>
<script> <script>
@ -319,6 +371,80 @@ d3.json(url)
}); });
}); });
} }
</script>
<script>
function refresh_list_crawled(){
$.getJSON("{{ url_for('hiddenServices.last_crawled_domains_with_stats_json') }}",
function(data) {
var tableRef = document.getElementById('tbody_last_crawled');
$("#tbody_last_crawled").empty()
for (var i = 0; i < data.last_onions.length; i++) {
var data_domain = data.last_onions[i]
var newRow = tableRef.insertRow(tableRef.rows.length);
var newCell = newRow.insertCell(0);
newCell.innerHTML = "<td><a target=\"_blank\" href=\"{{ url_for('hiddenServices.onion_domain') }}?onion_domain="+data_domain['domain']+"\">"+data_domain['domain']+"</a></td>";
newCell = newRow.insertCell(1);
newCell.innerHTML = "<td>"+data_domain['first_seen'].substr(0, 4)+"/"+data_domain['first_seen'].substr(4, 2)+"/"+data_domain['first_seen'].substr(6, 2)+"</td>"
newCell = newRow.insertCell(2);
newCell.innerHTML = "<td>"+data_domain['last_check'].substr(0, 4)+"/"+data_domain['last_check'].substr(4, 2)+"/"+data_domain['last_check'].substr(6, 2)+"</td>"
newCell = newRow.insertCell(3);
newCell.innerHTML = "<td><div style=\"color:"+data_domain['status_color']+"; display:inline-block\"><i class=\"fa "+data_domain['status_icon']+" fa-2x\"></i>"+data_domain['status_text']+"</div></td>"
}
var statDomains = data.statDomains
document.getElementById('text_domain_up').innerHTML = statDomains['domains_up']
document.getElementById('text_domain_down').innerHTML = statDomains['domains_down']
document.getElementById('text_domain_queue').innerHTML = statDomains['domains_queue']
document.getElementById('text_total_domains').innerHTML = statDomains['total']
if(data.crawler_metadata.length!=0){
$("#tbody_crawler_info").empty();
var tableRef = document.getElementById('tbody_crawler_info');
for (var i = 0; i < data.crawler_metadata.length; i++) {
var crawler = data.crawler_metadata[i];
var newRow = tableRef.insertRow(tableRef.rows.length);
var text_color;
var icon;
if(crawler['status']){
text_color = 'Green';
icon = 'check';
} else {
text_color = 'Red';
icon = 'times';
}
var newCell = newRow.insertCell(0);
newCell.innerHTML = "<td><i class=\"fa fa-"+icon+"-circle fa-2x\" style=\"color:"+text_color+";\"></i>"+crawler['crawler_info']+"</td>";
newCell = newRow.insertCell(1);
newCell.innerHTML = "<td><a target=\"_blank\" href=\"{{ url_for('hiddenServices.onion_domain') }}?onion_domain="+crawler['crawling_domain']+"\">"+crawler['crawling_domain']+"</a></td>";
newCell = newRow.insertCell(2);
newCell.innerHTML = "<td><div style=\"color:"+text_color+";\">"+crawler['status_info']+"</div></td>";
$("#panel_crawler").show();
}
} else {
$("#panel_crawler").hide();
}
}
);
if (to_refresh) {
setTimeout("refresh_list_crawled()", 10000);
}
}
</script> </script>
</body> </body>

View file

@ -60,10 +60,10 @@ wget https://cdn.datatables.net/plug-ins/1.10.7/integration/bootstrap/3/dataTabl
wget https://cdn.datatables.net/plug-ins/1.10.7/integration/bootstrap/3/dataTables.bootstrap.js -O ./static/js/dataTables.bootstrap.js wget https://cdn.datatables.net/plug-ins/1.10.7/integration/bootstrap/3/dataTables.bootstrap.js -O ./static/js/dataTables.bootstrap.js
#Ressource for graph #Ressource for graph
wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.js -O ./static/js/jquery.flot.js wget https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb6109df5c1e8003/jquery.flot.js -O ./static/js/jquery.flot.js
wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.pie.js -O ./static/js/jquery.flot.pie.js wget https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb6109df5c1e8003/jquery.flot.pie.js -O ./static/js/jquery.flot.pie.js
wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.time.js -O ./static/js/jquery.flot.time.js wget https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb6109df5c1e8003/jquery.flot.time.js -O ./static/js/jquery.flot.time.js
wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.stack.js -O ./static/js/jquery.flot.stack.js wget https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb6109df5c1e8003/jquery.flot.stack.js -O ./static/js/jquery.flot.stack.js
#Ressources for sparkline and canvasJS and slider #Ressources for sparkline and canvasJS and slider
wget http://omnipotent.net/jquery.sparkline/2.1.2/jquery.sparkline.min.js -O ./static/js/jquery.sparkline.min.js wget http://omnipotent.net/jquery.sparkline/2.1.2/jquery.sparkline.min.js -O ./static/js/jquery.sparkline.min.js
@ -83,8 +83,12 @@ pushd static/image
wget https://www.circl.lu/assets/images/logos/AIL.png -O AIL.png wget https://www.circl.lu/assets/images/logos/AIL.png -O AIL.png
popd popd
if ! [[ -n "$AIL_HOME" ]]
then
#active virtualenv #active virtualenv
source ./../../AILENV/bin/activate source ./../../AILENV/bin/activate
fi
#Update MISP Taxonomies and Galaxies #Update MISP Taxonomies and Galaxies
python3 -m pip install git+https://github.com/MISP/PyTaxonomies --upgrade python3 -m pip install git+https://github.com/MISP/PyTaxonomies --upgrade
python3 -m pip install git+https://github.com/MISP/PyMISPGalaxies --upgrade python3 -m pip install git+https://github.com/MISP/PyMISPGalaxies --upgrade