chg: [merge] merge master into update branche

This commit is contained in:
Terrtia 2019-04-10 15:43:15 +02:00
commit 2589fc2161
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
28 changed files with 1453 additions and 165 deletions

3
.gitignore vendored
View file

@ -36,6 +36,9 @@ bin/packages/config.cfg.backup
configs/keys
files
# Pystemon archives
pystemon/archives
# installed files
nltk_data/
doc/all_modules.txt

View file

@ -27,8 +27,18 @@ WORKDIR /opt/AIL
# Default to UTF-8 file.encoding
ENV LANG C.UTF-8
ENV AIL_HOME /opt/AIL
ENV AIL_BIN ${AIL_HOME}/bin
ENV AIL_FLASK ${AIL_HOME}/var/www
ENV AIL_REDIS ${AIL_HOME}/redis/src
ENV AIL_ARDB ${AIL_HOME}/ardb/src
ENV AIL_VENV ${AIL_HOME}/AILENV
ENV PATH ${AIL_VENV}/bin:${AIL_HOME}:${AIL_REDIS}:${AIL_ARDB}:${AIL_BIN}:${AIL_FLASK}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
RUN ./pystemon/install.sh
RUN pip install -r /opt/pystemon/requirements.txt
RUN pip install -r /opt/AIL/crawler_requirements.txt
COPY docker_start.sh /docker_start.sh
ENTRYPOINT ["/bin/bash", "docker_start.sh"]

View file

@ -101,7 +101,6 @@ ARDB_DB
ZADD - 'base64_hash:'+hash paste * nb_seen_in_paste
ZADD - 'binary_hash:'+hash paste * nb_seen_in_paste
ZADD - 'hash_type:'+type date nb_seen
ZADD - 'base64_type:'+type date nb_seen
ZADD - 'binary_type:'+type date nb_seen

View file

@ -12,7 +12,7 @@ AIL is a modular framework to analyse potential information leaks from unstructu
<table>
<tr>
<td>Latest Release</td>
<td><a href="https://badge.fury.io/gh/CIRCL%2FAIL-Framework"><img src="https://badge.fury.io/gh/CIRCL%2FAIL-Framework.svg" alt="GitHub version" height="18"></a></td>
<td><a href="https://github.com/CIRCL/AIL-framework/releases/latest"><img src="https://img.shields.io/github/release/CIRCL/AIL-framework/all.svg"></a></td>
</tr>
<tr>
<td>Contributors</td>
@ -168,6 +168,22 @@ Privacy and GDPR
[AIL information leaks analysis and the GDPR in the context of collection, analysis and sharing information leaks](https://www.circl.lu/assets/files/information-leaks-analysis-and-gdpr.pdf) document provides an overview how to use AIL in a lawfulness context especially in the scope of General Data Protection Regulation.
Research using AIL
------------------
If you write academic paper, relying or using AIL, it can be cited with the following BibTeX:
~~~~
@inproceedings{mokaddem2018ail,
title={AIL-The design and implementation of an Analysis Information Leak framework},
author={Mokaddem, Sami and Wagener, G{\'e}rard and Dulaunoy, Alexandre},
booktitle={2018 IEEE International Conference on Big Data (Big Data)},
pages={5049--5057},
year={2018},
organization={IEEE}
}
~~~~
Screenshots
===========
@ -237,11 +253,11 @@ License
```
Copyright (C) 2014 Jules Debra
Copyright (C) 2014-2018 CIRCL - Computer Incident Response Center Luxembourg (c/o smile, security made in Lëtzebuerg, Groupement d'Intérêt Economique)
Copyright (c) 2014-2018 Raphaël Vinot
Copyright (c) 2014-2018 Alexandre Dulaunoy
Copyright (c) 2016-2018 Sami Mokaddem
Copyright (c) 2018 Thirion Aurélien
Copyright (C) 2014-2019 CIRCL - Computer Incident Response Center Luxembourg (c/o smile, security made in Lëtzebuerg, Groupement d'Intérêt Economique)
Copyright (c) 2014-2019 Raphaël Vinot
Copyright (c) 2014-2019 Alexandre Dulaunoy
Copyright (c) 2016-2019 Sami Mokaddem
Copyright (c) 2018-2019 Thirion Aurélien
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by

83
bin/CVE_check.py Executable file
View file

@ -0,0 +1,83 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
from packages import Paste
from Helper import Process
import os
import re
import time
import redis
import configparser
from collections import defaultdict
def get_dict_cve(list_paste_cve, only_one_same_cve_by_paste=False):
dict_keyword = {}
for paste_cve in list_paste_cve:
paste_content = Paste.Paste(paste_cve).get_p_content()
cve_list = reg_cve.findall(paste_content)
if only_one_same_cve_by_paste:
cve_list = set(cve_list)
for cve in reg_cve.findall(paste_content):
try:
dict_keyword[cve] += 1
except KeyError:
dict_keyword[cve] = 1
print('------------------------------------------------')
if dict_keyword:
res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)]
for item in res:
pass
print(item)
if __name__ == '__main__':
# CONFIG #
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
cfg = configparser.ConfigParser()
cfg.read(configfile)
serv_metadata = redis.StrictRedis(
host=cfg.get("ARDB_Metadata", "host"),
port=cfg.getint("ARDB_Metadata", "port"),
db=cfg.getint("ARDB_Metadata", "db"),
decode_responses=True)
serv_tags = redis.StrictRedis(
host=cfg.get("ARDB_Tags", "host"),
port=cfg.get("ARDB_Tags", "port"),
db=cfg.get("ARDB_Tags", "db"),
decode_responses=True)
reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,7}')
#all_past_cve = serv_tags.smembers('infoleak:automatic-detection="cve"')
#all_past_cve_regular = serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"')
#all_past_cve_crawler = serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"')
#print('{} + {} = {}'.format(len(all_past_cve_regular), len(all_past_cve_crawler), len(all_past_cve)))
print('ALL_CVE')
get_dict_cve(serv_tags.smembers('infoleak:automatic-detection="cve"'), True)
print()
print()
print()
print('REGULAR_CVE')
get_dict_cve(serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True)
print()
print()
print()
print('CRAWLER_CVE')
get_dict_cve(serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True)

View file

@ -10,6 +10,8 @@ import time
import subprocess
import requests
from pyfaup.faup import Faup
sys.path.append(os.environ['AIL_BIN'])
from Helper import Process
from pubsublogger import publisher
@ -18,10 +20,13 @@ def on_error_send_message_back_in_queue(type_hidden_service, domain, message):
# send this msg back in the queue
if not r_onion.sismember('{}_domain_crawler_queue'.format(type_hidden_service), domain):
r_onion.sadd('{}_domain_crawler_queue'.format(type_hidden_service), domain)
r_onion.sadd('{}_crawler_queue'.format(type_hidden_service), message)
r_onion.sadd('{}_crawler_priority_queue'.format(type_hidden_service), message)
def crawl_onion(url, domain, date, date_month, message):
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'crawling_domain', domain)
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
#if not r_onion.sismember('full_onion_up', domain) and not r_onion.sismember('onion_down:'+date , domain):
super_father = r_serv_metadata.hget('paste_metadata:'+paste, 'super_father')
if super_father is None:
@ -37,19 +42,21 @@ def crawl_onion(url, domain, date, date_month, message):
# TODO: relaunch docker or send error message
nb_retry += 1
if nb_retry == 30:
if nb_retry == 6:
on_error_send_message_back_in_queue(type_hidden_service, domain, message)
publisher.error('{} SPASH DOWN'.format(splash_url))
print('--------------------------------------')
print(' \033[91m DOCKER SPLASH DOWN\033[0m')
print(' {} DOWN'.format(splash_url))
exit(1)
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'SPLASH DOWN')
nb_retry == 0
print(' \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m')
print(' Retry({}) in 10 seconds'.format(nb_retry))
time.sleep(10)
if r.status_code == 200:
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling')
process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', splash_url, type_hidden_service, url, domain, paste, super_father],
stdout=subprocess.PIPE)
while process.poll() is None:
@ -67,6 +74,7 @@ def crawl_onion(url, domain, date, date_month, message):
print('')
print(' PROXY DOWN OR BAD CONFIGURATION\033[0m'.format(splash_url))
print('------------------------------------------------------------------------')
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Error')
exit(-2)
else:
print(process.stdout.read())
@ -76,6 +84,7 @@ def crawl_onion(url, domain, date, date_month, message):
print('--------------------------------------')
print(' \033[91m DOCKER SPLASH DOWN\033[0m')
print(' {} DOWN'.format(splash_url))
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling')
exit(1)
@ -119,6 +128,7 @@ if __name__ == '__main__':
print('splash url: {}'.format(splash_url))
crawler_depth_limit = p.config.getint("Crawler", "crawler_depth_limit")
faup = Faup()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
@ -140,6 +150,10 @@ if __name__ == '__main__':
db=p.config.getint("ARDB_Onion", "db"),
decode_responses=True)
r_cache.sadd('all_crawler:{}'.format(type_hidden_service), splash_port)
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting')
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
# load domains blacklist
try:
with open(os.environ['AIL_BIN']+'/torcrawler/blacklist_onion.txt', 'r') as f:
@ -152,6 +166,10 @@ if __name__ == '__main__':
while True:
# Priority Queue - Recovering the streamed message informations.
message = r_onion.spop('{}_crawler_priority_queue'.format(type_hidden_service))
if message is None:
# Recovering the streamed message informations.
message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service))
@ -173,6 +191,8 @@ if __name__ == '__main__':
domain_url = 'http://{}'.format(domain)
print()
print()
print('\033[92m------------------START CRAWLER------------------\033[0m')
print('crawler type: {}'.format(type_hidden_service))
print('\033[92m-------------------------------------------------\033[0m')
@ -180,12 +200,24 @@ if __name__ == '__main__':
print('domain: {}'.format(domain))
print('domain_url: {}'.format(domain_url))
if not r_onion.sismember('blacklist_{}'.format(type_hidden_service), domain):
faup.decode(domain)
onion_domain=faup.get()['domain'].decode()
if not r_onion.sismember('blacklist_{}'.format(type_hidden_service), domain) and not r_onion.sismember('blacklist_{}'.format(type_hidden_service), onion_domain):
date = datetime.datetime.now().strftime("%Y%m%d")
date_month = datetime.datetime.now().strftime("%Y%m")
if not r_onion.sismember('month_{}_up:{}'.format(type_hidden_service, date_month), domain) and not r_onion.sismember('{}_down:{}'.format(type_hidden_service, date), domain):
# first seen
if not r_onion.hexists('{}_metadata:{}'.format(type_hidden_service, domain), 'first_seen'):
r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'first_seen', date)
# last_father
r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'paste_parent', paste)
# last check
r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'last_check', date)
crawl_onion(url, domain, date, date_month, message)
if url != domain_url:
@ -198,21 +230,12 @@ if __name__ == '__main__':
r_onion.sadd('{}_down:{}'.format(type_hidden_service, date), domain)
#r_onion.sadd('{}_down_link:{}'.format(type_hidden_service, date), url)
#r_onion.hincrby('{}_link_down'.format(type_hidden_service), url, 1)
if not r_onion.exists('{}_metadata:{}'.format(type_hidden_service, domain)):
r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'first_seen', date)
r_onion.hset('{}_metadata:{}'.format(type_hidden_service,domain), 'last_seen', date)
else:
#r_onion.hincrby('{}_link_up'.format(type_hidden_service), url, 1)
if r_onion.sismember('month_{}_up:{}'.format(type_hidden_service, date_month), domain) and r_serv_metadata.exists('paste_children:'+paste):
msg = 'infoleak:automatic-detection="{}";{}'.format(type_hidden_service, paste)
p.populate_set_out(msg, 'Tags')
# last check
r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'last_check', date)
# last_father
r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'paste_parent', paste)
# add onion screenshot history
# add crawled days
if r_onion.lindex('{}_history:{}'.format(type_hidden_service, domain), 0) != date:
@ -243,6 +266,14 @@ if __name__ == '__main__':
r_onion.lpush('last_{}'.format(type_hidden_service), domain)
r_onion.ltrim('last_{}'.format(type_hidden_service), 0, 15)
#update crawler status
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting')
r_cache.hdel('metadata_crawler:{}'.format(splash_port), 'crawling_domain')
else:
print(' Blacklisted Onion')
print()
print()
else:
continue
else:

View file

@ -23,23 +23,17 @@ Requirements
import base64
import os
import time
import uuid
from pubsublogger import publisher
from Helper import Process
import magic
import io
#import gzip
'''
def gunzip_bytes_obj(bytes_obj):
in_ = io.BytesIO()
in_.write(bytes_obj)
in_.seek(0)
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
gunzipped_bytes_obj = fo.read()
def rreplace(s, old, new, occurrence):
li = s.rsplit(old, occurrence)
return new.join(li)
return gunzipped_bytes_obj.decode()'''
if __name__ == '__main__':
publisher.port = 6380
@ -79,6 +73,12 @@ if __name__ == '__main__':
processed_paste = 0
time.sleep(1)
continue
file_name_paste = paste.split('/')[-1]
if len(file_name_paste)>255:
new_file_name_paste = '{}{}.gz'.format(file_name_paste[:215], str(uuid.uuid4()))
paste = rreplace(paste, file_name_paste, new_file_name_paste, 1)
# Creating the full filepath
filename = os.path.join(PASTES_FOLDER, paste)

View file

@ -31,7 +31,7 @@ lastTimeKillCommand = {}
current_selected_value = 0
current_selected_queue = ""
current_selected_action = ""
current_selected_action = 0
current_selected_amount = 0
# Map PID to Queue name (For restart and killing)
PID_NAME_DICO = {}
@ -480,7 +480,10 @@ class Show_paste(Frame):
self.label_list[i]._text = ""
except Exception as e:
if current_selected_value in COMPLETE_PASTE_PATH_PER_PID:
self.label_list[0]._text = "Error while displaying the paste: " + COMPLETE_PASTE_PATH_PER_PID[current_selected_value]
else:
self.label_list[0]._text = "Error Generic exception caught"
self.label_list[1]._text = str(e)
for i in range(2,self.num_label):
self.label_list[i]._text = ""

View file

@ -29,10 +29,18 @@ import os
import base64
import subprocess
import redis
import signal
import re
from Helper import Process
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
def fetch(p, r_cache, urls, domains, path):
failed = []
@ -113,6 +121,8 @@ if __name__ == "__main__":
message = p.get_from_set()
prec_filename = None
max_execution_time = p.config.getint("Onion", "max_execution_time")
# send to crawler:
activate_crawler = p.config.get("Crawler", "activate_crawler")
if activate_crawler == 'True':
@ -130,6 +140,7 @@ if __name__ == "__main__":
while True:
message = p.get_from_set()
if message is not None:
print(message)
filename, score = message.split()
@ -140,6 +151,9 @@ if __name__ == "__main__":
urls = []
PST = Paste.Paste(filename)
# max execution time on regex
signal.alarm(max_execution_time)
try:
for x in PST.get_regex(url_regex):
print(x)
# Extracting url with regex
@ -150,6 +164,13 @@ if __name__ == "__main__":
print(url)
domains_list.append(domain)
urls.append(url)
except TimeoutException:
encoded_list = []
p.incr_module_timeout_statistic()
print ("{0} processing timeout".format(PST.p_path))
continue
signal.alarm(0)
'''
for x in PST.get_regex(i2p_regex):
@ -177,8 +198,12 @@ if __name__ == "__main__":
print(len(domains_list))
if len(domains_list) > 0:
if not activate_crawler:
publisher.warning('{}Detected {} .onion(s);{}'.format(
to_print, len(domains_list),PST.p_rel_path))
else:
publisher.info('{}Detected {} .onion(s);{}'.format(
to_print, len(domains_list),PST.p_rel_path))
now = datetime.datetime.now()
path = os.path.join('onions', str(now.year).zfill(4),
str(now.month).zfill(2),
@ -199,11 +224,19 @@ if __name__ == "__main__":
else:
continue
# too many subdomain
if len(domain.split('.')) > 5:
continue
if not r_onion.sismember('month_onion_up:{}'.format(date_month), domain) and not r_onion.sismember('onion_down:'+date , domain):
if not r_onion.sismember('onion_domain_crawler_queue', domain):
print('send to onion crawler')
r_onion.sadd('onion_domain_crawler_queue', domain)
msg = '{};{}'.format(url,PST.p_rel_path)
if not r_onion.hexists('onion_metadata:{}'.format(domain), 'first_seen'):
r_onion.sadd('onion_crawler_priority_queue', msg)
print('send to priority queue')
else:
r_onion.sadd('onion_crawler_queue', msg)
#p.populate_set_out(msg, 'Crawler')
@ -222,4 +255,3 @@ if __name__ == "__main__":
publisher.debug("Script url is Idling 10s")
#print('Sleeping')
time.sleep(10)
message = p.get_from_set()

View file

@ -67,7 +67,7 @@ while True:
print(paste)
with open(pystemonpath+paste, 'rb') as f: #.read()
messagedata = f.read()
path_to_send = pastes_directory+paste
path_to_send = os.path.join(pastes_directory,paste)
s = b' '.join( [ topic.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] )
socket.send(s)

View file

@ -0,0 +1,253 @@
[Directories]
bloomfilters = Blooms
dicofilters = Dicos
pastes = PASTES
hash = HASHS
crawled = crawled
crawled_screenshot = CRAWLED_SCREENSHOT
wordtrending_csv = var/www/static/csv/wordstrendingdata
wordsfile = files/wordfile
protocolstrending_csv = var/www/static/csv/protocolstrendingdata
protocolsfile = files/protocolsfile
tldstrending_csv = var/www/static/csv/tldstrendingdata
tldsfile = faup/src/data/mozilla.tlds
domainstrending_csv = var/www/static/csv/domainstrendingdata
pystemonpath = /opt/pystemon/
sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt
##### Notifications ######
[Notifications]
ail_domain = http://localhost:7000
sender = sender@example.com
sender_host = smtp.example.com
sender_port = 1337
sender_pw = None
# optional for using with authenticated SMTP over SSL
# sender_pw = securepassword
##### Flask #####
[Flask]
#Proxying requests to the app
baseUrl = /
#Number of logs to display in the dashboard
max_dashboard_logs = 15
#Maximum number of character to display in the toolip
max_preview_char = 250
#Maximum number of character to display in the modal
max_preview_modal = 800
#Default number of header to display in trending graphs
default_display = 10
#Number of minutes displayed for the number of processed pastes.
minute_processed_paste = 10
#Maximum line length authorized to make a diff between duplicates
DiffMaxLineLength = 10000
#### Modules ####
[BankAccount]
max_execution_time = 60
[Categ]
#Minimum number of match between the paste and the category file
matchingThreshold=1
[Credential]
#Minimum length that a credential must have to be considered as such
minimumLengthThreshold=3
#Will be pushed as alert if the number of credentials is greater to that number
criticalNumberToAlert=8
#Will be considered as false positive if less that X matches from the top password list
minTopPassList=5
[Curve]
max_execution_time = 90
[Onion]
max_execution_time = 180
[Base64]
path = Base64/
max_execution_time = 60
[Binary]
path = Base64/
max_execution_time = 60
[Hex]
path = Base64/
max_execution_time = 60
[Modules_Duplicates]
#Number of month to look back
maximum_month_range = 3
#The value where two pastes are considerate duplicate for ssdeep.
threshold_duplicate_ssdeep = 50
#The value where two pastes are considerate duplicate for tlsh.
threshold_duplicate_tlsh = 52
#Minimum size of the paste considered
min_paste_size = 0.3
[Module_ModuleInformation]
#Threshold to deduce if a module is stuck or not, in seconds.
threshold_stucked_module=600
[Module_Mixer]
#Define the configuration of the mixer, possible value: 1, 2 or 3
operation_mode = 3
#Define the time that a paste will be considerate duplicate. in seconds (1day = 86400)
ttl_duplicate = 86400
default_unnamed_feed_name = unnamed_feeder
[RegexForTermsFrequency]
max_execution_time = 60
##### Redis #####
[Redis_Cache]
host = localhost
port = 6379
db = 0
[Redis_Log]
host = localhost
port = 6380
db = 0
[Redis_Log_submit]
host = localhost
port = 6380
db = 1
[Redis_Queues]
host = localhost
port = 6381
db = 0
[Redis_Data_Merging]
host = localhost
port = 6379
db = 1
[Redis_Paste_Name]
host = localhost
port = 6379
db = 2
[Redis_Mixer_Cache]
host = localhost
port = 6381
db = 1
##### ARDB #####
[ARDB_Curve]
host = localhost
port = 6382
db = 1
[ARDB_Sentiment]
host = localhost
port = 6382
db = 4
[ARDB_TermFreq]
host = localhost
port = 6382
db = 2
[ARDB_TermCred]
host = localhost
port = 6382
db = 5
[ARDB_DB]
host = localhost
port = 6382
db = 0
[ARDB_Trending]
host = localhost
port = 6382
db = 3
[ARDB_Hashs]
host = localhost
db = 1
[ARDB_Tags]
host = localhost
port = 6382
db = 6
[ARDB_Metadata]
host = localhost
port = 6382
db = 7
[ARDB_Statistics]
host = localhost
port = 6382
db = 8
[ARDB_Onion]
host = localhost
port = 6382
db = 9
[Url]
cc_critical = DE
[DomClassifier]
cc = DE
cc_tld = r'\.de$'
dns = 8.8.8.8
[Mail]
dns = 8.8.8.8
[Web]
dns = 149.13.33.69
# Indexer configuration
[Indexer]
type = whoosh
path = indexdir
register = indexdir/all_index.txt
#size in Mb
index_max_size = 2000
[ailleakObject]
maxDuplicateToPushToMISP=10
###############################################################################
# For multiple feed, add them with "," without space
# e.g.: tcp://127.0.0.1:5556,tcp://127.0.0.1:5557
[ZMQ_Global]
#address = tcp://crf.circl.lu:5556
address = tcp://127.0.0.1:5556,tcp://crf.circl.lu:5556
channel = 102
bind = tcp://127.0.0.1:5556
[ZMQ_Url]
address = tcp://127.0.0.1:5004
channel = urls
[ZMQ_FetchedOnion]
address = tcp://127.0.0.1:5005
channel = FetchedOnion
[RedisPubSub]
host = localhost
port = 6381
db = 0
[Crawler]
activate_crawler = False
crawler_depth_limit = 1
splash_url_onion = http://172.17.0.1
splash_onion_port = 8050

View file

@ -68,6 +68,9 @@ minTopPassList=5
[Curve]
max_execution_time = 90
[Onion]
max_execution_time = 180
[Base64]
path = Base64/
max_execution_time = 60

View file

@ -10,10 +10,12 @@ import datetime
import base64
import redis
import json
import time
from scrapy.spidermiddlewares.httperror import HttpError
from twisted.internet.error import DNSLookupError
from twisted.internet.error import TimeoutError
from twisted.web._newclient import ResponseNeverReceived
from scrapy import Spider
from scrapy.linkextractors import LinkExtractor
@ -39,6 +41,8 @@ class TorSplashCrawler():
'SPIDER_MIDDLEWARES': {'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,},
'DUPEFILTER_CLASS': 'scrapy_splash.SplashAwareDupeFilter',
'HTTPERROR_ALLOW_ALL': True,
'RETRY_TIMES': 2,
'CLOSESPIDER_PAGECOUNT': 50,
'DEPTH_LIMIT': crawler_depth_limit
})
@ -97,7 +101,7 @@ class TorSplashCrawler():
yield SplashRequest(
self.start_urls,
self.parse,
#errback=self.errback_catcher,
errback=self.errback_catcher,
endpoint='render.json',
meta={'father': self.original_paste},
args={ 'html': 1,
@ -122,6 +126,10 @@ class TorSplashCrawler():
print('Connection to proxy refused')
else:
#avoid filename too big
if len(self.domains[0]) > 215:
UUID = self.domains[0][-215:]+str(uuid.uuid4())
else:
UUID = self.domains[0]+str(uuid.uuid4())
filename_paste = os.path.join(self.crawled_paste_filemame, UUID)
relative_filename_paste = os.path.join(self.crawler_path, UUID)
@ -174,7 +182,7 @@ class TorSplashCrawler():
yield SplashRequest(
link.url,
self.parse,
#errback=self.errback_catcher,
errback=self.errback_catcher,
endpoint='render.json',
meta={'father': relative_filename_paste},
args={ 'html': 1,
@ -184,17 +192,39 @@ class TorSplashCrawler():
'wait': 10}
)
'''
def errback_catcher(self, failure):
# catch all errback failures,
self.logger.error(repr(failure))
if failure.check(ResponseNeverReceived):
request = failure.request
url = request.meta['splash']['args']['url']
father = request.meta['father']
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url)
time.sleep(10)
yield SplashRequest(
url,
self.parse,
errback=self.errback_catcher,
endpoint='render.json',
meta={'father': father},
args={ 'html': 1,
'png': 1,
'render_all': 1,
'har': 1,
'wait': 10}
)
else:
print('failure')
#print(failure)
print(failure.type)
#print(failure.request.meta['item'])
'''
#if isinstance(failure.value, HttpError):
if failure.check(HttpError):
elif failure.check(HttpError):
# you can get the response
response = failure.value.response
print('HttpError')

View file

@ -3,3 +3,5 @@ facebookcorewwwi.onion
graylady3jvrrxbe.onion
expyuzz4wqqyqhjn.onion
dccbbv6cooddgcrq.onion
pugljpwjhbiagkrn.onion
jld3zkuo4b5mbios.onion

View file

@ -5,12 +5,15 @@ usage() { echo "Usage: sudo $0 [-f <config_absolute_path>] [-p <port_start>] [-n
echo " -p: number of the first splash server port number. This number is incremented for the others splash server";
echo " -n: number of splash servers to start";
echo "";
echo " -options:";
echo " -u: max unbound in-memory cache (Mb, Restart Splash when full, default=3000 Mb)";
echo "";
echo "example:";
echo "sudo ./launch_splash_crawler.sh -f /home/my_user/AIL-framework/configs/docker/splash_onion/etc/splash/proxy-profiles/ -p 8050 -n 3";
exit 1;
}
while getopts ":p:f:n:" o; do
while getopts ":p:f:n:u:" o; do
case "${o}" in
p)
p=${OPTARG}
@ -21,6 +24,9 @@ while getopts ":p:f:n:" o; do
n)
n=${OPTARG}
;;
u)
u=${OPTARG}
;;
*)
usage
;;
@ -28,6 +34,10 @@ while getopts ":p:f:n:" o; do
done
shift $((OPTIND-1))
if [ -z "${u}" ]; then
u=3000;
fi
if [ -z "${p}" ] || [ -z "${f}" ] || [ -z "${n}" ]; then
usage;
fi
@ -52,7 +62,7 @@ sleep 0.1
for ((i=0;i<=$((${n} - 1));i++)); do
port_number=$((${p} + $i))
screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -p '$port_number':8050 --cpus=1 --memory=4.5G -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash; read x'
screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -d -p '$port_number':8050 --restart=always --cpus=1 --memory=4.5G -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash --maxrss '$u'; read x'
sleep 0.1
printf "$GREEN Splash server launched on port $port_number$DEFAULT\n"
done

View file

@ -61,7 +61,7 @@ tcp-backlog 511
# Examples:
#
# bind 192.168.1.100 10.0.0.1
# bind 127.0.0.1
bind 127.0.0.1
# Specify the path for the Unix socket that will be used to listen for
# incoming connections. There is no default, so Redis will not listen

View file

@ -61,7 +61,7 @@ tcp-backlog 511
# Examples:
#
# bind 192.168.1.100 10.0.0.1
# bind 127.0.0.1
bind 127.0.0.1
# Specify the path for the Unix socket that will be used to listen for
# incoming connections. There is no default, so Redis will not listen

View file

@ -61,7 +61,7 @@ tcp-backlog 511
# Examples:
#
# bind 192.168.1.100 10.0.0.1
# bind 127.0.0.1
bind 127.0.0.1
# Specify the path for the Unix socket that will be used to listen for
# incoming connections. There is no default, so Redis will not listen

Binary file not shown.

523
docker-compose.yml Normal file
View file

@ -0,0 +1,523 @@
version: '3'
services:
ardb:
entrypoint:
- ardb-server
- /opt/AIL/configs/6382.conf
healthcheck:
test: ["CMD", "redis-cli", "-p", "6382", "ping"]
interval: 30s
timeout: 10s
retries: 5
network_mode: service:flask
image: ail-framework
volumes:
- ./configs:/opt/AIL/configs:ro
crawler:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Crawler.py
- onion
- "8050"
network_mode: service:flask
image: ail-framework
volumes:
- ./CRAWLED_SCREENSHOT/:/opt/AIL/CRAWLED_SCREENSHOT
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
flask:
build: .
entrypoint:
- /opt/AIL/var/www/Flask_server.py
ports:
- "7000:7000"
image: ail-framework
volumes:
- ./CRAWLED_SCREENSHOT/:/opt/AIL/CRAWLED_SCREENSHOT
- ./PASTES/:/opt/AIL/PASTES
- ./indexdir:/opt/AIL/indexdir
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/var/www
log-queue:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/AILENV/bin/log_subscriber
- -p
- "6380"
- -c
- Queing
- -l
- /opt/AIL/logs/
network_mode: service:flask
image: ail-framework
volumes:
- ./configs:/opt/AIL/configs:ro
log-script:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/AILENV/bin/log_subscriber
- -p
- "6380"
- -c
- Script
- -l
- /opt/AIL/logs/
network_mode: service:flask
image: ail-framework
volumes:
- ./configs:/opt/AIL/configs:ro
pystemon:
depends_on:
- redis-log
entrypoint:
- /opt/pystemon/pystemon.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./pystemon/archives:/opt/pystemon/archive
- ./pystemon/proxies.txt:/opt/pystemon/proxies.txt:ro
- ./pystemon/pystemon.yaml:/opt/pystemon/pystemon.yaml:ro
working_dir: /opt/pystemon
pystemon-feeder:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/feeder/pystemon-feeder.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./pystemon/archives:/opt/pystemon/archive
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
queues:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/launch_queues.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
redis-cache:
entrypoint:
- redis-server
- /opt/AIL/configs/6379.conf
healthcheck:
test: ["CMD", "redis-cli", "-p", "6379", "ping"]
interval: 30s
timeout: 10s
retries: 5
image: ail-framework
network_mode: service:flask
volumes:
- ./configs:/opt/AIL/configs:ro
redis-log:
entrypoint:
- redis-server
- /opt/AIL/configs/6380.conf
healthcheck:
test: ["CMD", "redis-cli", "-p", "6380", "ping"]
interval: 30s
timeout: 10s
retries: 5
network_mode: service:flask
image: ail-framework
volumes:
- ./configs:/opt/AIL/configs:ro
redis-mixer-cache:
entrypoint:
- redis-server
- /opt/AIL/configs/6381.conf
healthcheck:
test: ["CMD", "redis-cli", "-p", "6381", "ping"]
interval: 30s
timeout: 10s
retries: 5
image: ail-framework
network_mode: service:flask
volumes:
- ./configs:/opt/AIL/configs:ro
script-alerthandler:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/alertHandler.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-apikey:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/ApiKey.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-bankaccount:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/BankAccount.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-bitcoin:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Bitcoin.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-categ:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Categ.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-credential:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Credential.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-creditcards:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/CreditCards.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-curve:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Curve.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-curvemanagetopsets:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/CurveManageTopSets.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-cve:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Cve.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-decoder:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Decoder.py
network_mode: service:flask
image: ail-framework
volumes:
- ./HASHS:/opt/AIL/HASHS
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-domclassifier:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/DomClassifier.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-duplicates:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Duplicates.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-global:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Global.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-indexer:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Indexer.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./indexdir:/opt/AIL/indexdir
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-keys:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Keys.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-libinjection:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/LibInjection.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-lines:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Lines.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-mail:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Mail.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-mispthehivefeeder:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/MISP_The_Hive_feeder.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-mixer:
depends_on:
- redis-mixer-cache
entrypoint:
- /opt/AIL/bin/Mixer.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-modulestats:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/ModuleStats.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-onion:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Onion.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-phone:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Phone.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-regexfortermsfrequency:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/RegexForTermsFrequency.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-release:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Release.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-sentimentanalysis:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/SentimentAnalysis.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-setfortermsfrequency:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/SetForTermsFrequency.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-sqlinjectiondetection:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/SQLInjectionDetection.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-submitpaste:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/submit_paste.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-tags:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Tags.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-tokenize:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Tokenize.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-web:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Web.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin
script-webstats:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/WebStats.py
network_mode: service:flask
image: ail-framework
volumes:
- ./PASTES/:/opt/AIL/PASTES
- ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro
working_dir: /opt/AIL/bin

View file

@ -95,6 +95,7 @@ popd
mkdir -p $AIL_HOME/PASTES
pip3 install -U pip
pip3 install 'git+https://github.com/D4-project/BGP-Ranking.git/@7e698f87366e6f99b4d0d11852737db28e3ddc62#egg=pybgpranking&subdirectory=client'
pip3 install -U -r pip3_packages_requirement.txt
# Pyfaup

1
pystemon/proxies.txt Normal file
View file

@ -0,0 +1 @@
http://127.0.0.1:8080

View file

@ -30,6 +30,12 @@ r_serv = redis.StrictRedis(
db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
r_cache = redis.StrictRedis(
host=cfg.get("Redis_Cache", "host"),
port=cfg.getint("Redis_Cache", "port"),
db=cfg.getint("Redis_Cache", "db"),
decode_responses=True)
r_serv_log = redis.StrictRedis(
host=cfg.get("Redis_Log", "host"),
port=cfg.getint("Redis_Log", "port"),

View file

@ -101,7 +101,8 @@ def all_hash_search():
date_to = request.form.get('date_to')
type = request.form.get('type')
encoding = request.form.get('encoding')
return redirect(url_for('hashDecoded.hashDecoded_page', date_from=date_from, date_to=date_to, type=type, encoding=encoding))
show_decoded_files = request.form.get('show_decoded_files')
return redirect(url_for('hashDecoded.hashDecoded_page', date_from=date_from, date_to=date_to, type=type, encoding=encoding, show_decoded_files=show_decoded_files))
@hashDecoded.route("/hashDecoded/", methods=['GET'])
def hashDecoded_page():
@ -109,6 +110,7 @@ def hashDecoded_page():
date_to = request.args.get('date_to')
type = request.args.get('type')
encoding = request.args.get('encoding')
show_decoded_files = request.args.get('show_decoded_files')
if type == 'All types':
type = None
@ -161,6 +163,8 @@ def hashDecoded_page():
daily_date = None
l_64 = set()
if show_decoded_files:
show_decoded_files = True
for date in date_range:
if encoding is None:
l_hash = r_serv_metadata.zrange('hash_date:' +date, 0, -1)
@ -214,7 +218,7 @@ def hashDecoded_page():
l_type = r_serv_metadata.smembers('hash_all_type')
return render_template("hashDecoded.html", l_64=b64_metadata, vt_enabled=vt_enabled, l_type=l_type, type=type, daily_type_chart=daily_type_chart, daily_date=daily_date,
encoding=encoding, all_encoding=all_encoding, date_from=date_from, date_to=date_to)
encoding=encoding, all_encoding=all_encoding, date_from=date_from, date_to=date_to, show_decoded_files=show_decoded_files)
@hashDecoded.route('/hashDecoded/hash_by_type')
def hash_by_type():
@ -400,6 +404,63 @@ def decoder_type_json():
to_json.append({'name': decoder, 'value': nb_decoded[decoder]})
return jsonify(to_json)
@hashDecoded.route('/hashDecoded/top5_type_json')
def top5_type_json():
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
typ = request.args.get('type')
decoder = request.args.get('encoding')
if decoder == 'All encoding' or decoder is None:
all_decoder = r_serv_metadata.smembers('all_decoder')
else:
if not r_serv_metadata.sismember('all_decoder', decoder):
return jsonify({'Error': 'This decoder do not exist'})
else:
all_decoder = [decoder]
if typ == 'All types' or typ is None or typ=='None':
all_type = r_serv_metadata.smembers('hash_all_type')
else:
typ = typ.replace(' ', '+')
if not r_serv_metadata.sismember('hash_all_type', typ):
return jsonify({'Error': 'This type do not exist'})
else:
all_type = [typ]
date_range = []
if date_from is not None and date_to is not None:
#change format
try:
if len(date_from) != 8:
date_from = date_from[0:4] + date_from[5:7] + date_from[8:10]
date_to = date_to[0:4] + date_to[5:7] + date_to[8:10]
date_range = substract_date(date_from, date_to)
except:
pass
if not date_range:
date_range.append(datetime.date.today().strftime("%Y%m%d"))
# TODO replace with ZUNIONSTORE
nb_types_decoded = {}
for date in date_range:
for typ in all_type:
for decoder in all_decoder:
nb_decoded = r_serv_metadata.zscore('{}_type:{}'.format(decoder, typ), date)
if nb_decoded is not None:
if typ in nb_types_decoded:
nb_types_decoded[typ] = nb_types_decoded[typ] + int(nb_decoded)
else:
nb_types_decoded[typ] = int(nb_decoded)
to_json = []
top5_types = sorted(nb_types_decoded, key=nb_types_decoded.get, reverse=True)[:5]
for typ in top5_types:
to_json.append({'name': typ, 'value': nb_types_decoded[typ]})
return jsonify(to_json)
@hashDecoded.route('/hashDecoded/daily_type_json')
def daily_type_json():

View file

@ -121,7 +121,14 @@
{% endif %}
{% endfor %}
</select>
<br>
<div class="checkbox">
<label>
<input type="checkbox" name="show_decoded_files" value="True" {% if show_decoded_files %}checked{% endif %}>
<div style="color:#286090; display:inline-block">
Show decoded files <i class="fa fa-file"></i>
</div>
</label>
</div>
<button class="btn btn-primary" style="text-align:center;">
<i class="fa fa-files-o"></i> Search
</button>
@ -129,6 +136,8 @@
</div>
</div>
<div id="pie_chart_encoded">
</div>
<div id="pie_chart_top5_types">
</div>
</div>
</div>
@ -189,12 +198,14 @@
</tbody>
</table>
{% else %}
{% if show_decoded_files %}
{% if date_from|string == date_to|string %}
<h3> {{ date_from }}, No Hashes</h3>
{% else %}
<h3> {{ date_from }} to {{ date_to }}, No Hashes</h3>
{% endif %}
{% endif %}
{% endif %}
</div>
</div>
@ -248,9 +259,12 @@
{% elif daily_type_chart %}
chart.stackBarChart =barchart_type_stack("{{ url_for('hashDecoded.range_type_json') }}?date_from={{daily_date}}&date_to={{daily_date}}", 'id');
{% else %}
chart.stackBarChart = barchart_type_stack("{{ url_for('hashDecoded.range_type_json') }}?date_from={{date_from}}&date_to={{date_to}}", 'id')
chart.stackBarChart = barchart_type_stack("{{ url_for('hashDecoded.range_type_json') }}?date_from={{date_from}}&date_to={{date_to}}", 'id');
{% endif %}
draw_pie_chart("pie_chart_encoded" ,"{{ url_for('hashDecoded.decoder_type_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}", "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}&encoding=");
draw_pie_chart("pie_chart_top5_types" ,"{{ url_for('hashDecoded.top5_type_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}", "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type=");
chart.onResize();
$(window).on("resize", function() {
chart.onResize();
@ -498,21 +512,22 @@ window.chart = chart;
</script>
<script>
function draw_pie_chart(id, url_json, pie_on_click_url) {
var width_pie = 200;
var height_pie = 200;
var padding_pie = 10;
var opacity_pie = .8;
var width_pie = 200;
var height_pie = 200;
var padding_pie = 10;
var opacity_pie = .8;
var radius_pie = Math.min(width_pie - padding_pie, height_pie - padding_pie) / 2;
//var color_pie = d3.scaleOrdinal(d3.schemeCategory10);
var color_pie = d3.scaleOrdinal(d3.schemeSet3);
var radius_pie = Math.min(width_pie - padding_pie, height_pie - padding_pie) / 2;
//var color_pie = d3.scaleOrdinal(d3.schemeCategory10);
var color_pie = d3.scaleOrdinal(d3.schemeSet3);
var div_pie = d3.select("body").append("div")
var div_pie = d3.select("body").append("div")
.attr("class", "tooltip")
.style("opacity", 0);
var svg_pie = d3.select("#pie_chart_encoded")
var svg_pie = d3.select("#"+id)
.append('svg')
.attr("width", '100%')
.attr("height", '100%')
@ -520,14 +535,14 @@ var svg_pie = d3.select("#pie_chart_encoded")
.attr('preserveAspectRatio','xMinYMin')
var g_pie = svg_pie.append('g')
var g_pie = svg_pie.append('g')
.attr('transform', 'translate(' + (width_pie/2) + ',' + (height_pie/2) + ')');
var arc_pie = d3.arc()
var arc_pie = d3.arc()
.innerRadius(0)
.outerRadius(radius_pie);
d3.json("{{ url_for('hashDecoded.decoder_type_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}")
d3.json(url_json)
.then(function(data){
var pie_pie = d3.pie()
@ -544,12 +559,13 @@ d3.json("{{ url_for('hashDecoded.decoder_type_json') }}?date_from={{date_from}}&
.attr('class', 'pie_path')
.on("mouseover", mouseovered_pie)
.on("mouseout", mouseouted_pie)
.on("click", function (d) {window.location.href = "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}&encoding="+d.data.name })
.on("click", function (d) {window.location.href = pie_on_click_url+d.data.name })
.style('opacity', opacity_pie)
.style('stroke', 'white');
});
function mouseovered_pie(d) {
function mouseovered_pie(d) {
// tooltip
var content;
@ -564,13 +580,15 @@ function mouseovered_pie(d) {
div_pie.html(content)
.style("left", (d3.event.pageX) + "px")
.style("top", (d3.event.pageY - 28) + "px");
}
}
function mouseouted_pie() {
function mouseouted_pie() {
div_pie.transition()
.duration(500)
.style("opacity", 0);
}
}
</script>

View file

@ -19,6 +19,7 @@ import Flask_config
app = Flask_config.app
cfg = Flask_config.cfg
baseUrl = Flask_config.baseUrl
r_cache = Flask_config.r_cache
r_serv_onion = Flask_config.r_serv_onion
r_serv_metadata = Flask_config.r_serv_metadata
bootstrap_label = Flask_config.bootstrap_label
@ -90,7 +91,11 @@ def hiddenServices_page():
metadata_onion = {}
metadata_onion['domain'] = onion
metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check')
if metadata_onion['last_check'] is None:
metadata_onion['last_check'] = '********'
metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen')
if metadata_onion['first_seen'] is None:
metadata_onion['first_seen'] = '********'
if get_onion_status(onion, metadata_onion['last_check']):
metadata_onion['status_text'] = 'UP'
metadata_onion['status_color'] = 'Green'
@ -101,7 +106,71 @@ def hiddenServices_page():
metadata_onion['status_icon'] = 'fa-times-circle'
list_onion.append(metadata_onion)
return render_template("hiddenServices.html", last_onions=list_onion, statDomains=statDomains)
crawler_metadata=[]
all_onion_crawler = r_cache.smembers('all_crawler:onion')
for crawler in all_onion_crawler:
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
crawler_info = '{} - {}'.format(crawler, started_time)
if status_info=='Waiting' or status_info=='Crawling':
status=True
else:
status=False
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
return render_template("hiddenServices.html", last_onions=list_onion, statDomains=statDomains,
crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
@hiddenServices.route("/hiddenServices/last_crawled_domains_with_stats_json", methods=['GET'])
def last_crawled_domains_with_stats_json():
last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
list_onion = []
now = datetime.datetime.now()
date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d"))
statDomains = {}
statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
for onion in last_onions:
metadata_onion = {}
metadata_onion['domain'] = onion
metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check')
if metadata_onion['last_check'] is None:
metadata_onion['last_check'] = '********'
metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen')
if metadata_onion['first_seen'] is None:
metadata_onion['first_seen'] = '********'
if get_onion_status(onion, metadata_onion['last_check']):
metadata_onion['status_text'] = 'UP'
metadata_onion['status_color'] = 'Green'
metadata_onion['status_icon'] = 'fa-check-circle'
else:
metadata_onion['status_text'] = 'DOWN'
metadata_onion['status_color'] = 'Red'
metadata_onion['status_icon'] = 'fa-times-circle'
list_onion.append(metadata_onion)
crawler_metadata=[]
all_onion_crawler = r_cache.smembers('all_crawler:onion')
for crawler in all_onion_crawler:
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
crawler_info = '{} - {}'.format(crawler, started_time)
if status_info=='Waiting' or status_info=='Crawling':
status=True
else:
status=False
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
return jsonify({'last_onions': list_onion, 'statDomains': statDomains, 'crawler_metadata':crawler_metadata})
@hiddenServices.route("/hiddenServices/get_onions_by_daterange", methods=['POST'])
def get_onions_by_daterange():
@ -199,8 +268,12 @@ def onion_domain():
# # TODO: FIXME return 404
last_check = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'last_check')
if last_check is None:
last_check = '********'
last_check = '{}/{}/{}'.format(last_check[0:4], last_check[4:6], last_check[6:8])
first_seen = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'first_seen')
if first_seen is None:
first_seen = '********'
first_seen = '{}/{}/{}'.format(first_seen[0:4], first_seen[4:6], first_seen[6:8])
origin_paste = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'paste_parent')

View file

@ -66,7 +66,7 @@
<th>Status</th>
</tr>
</thead>
<tbody>
<tbody id="tbody_last_crawled">
{% for metadata_onion in last_onions %}
<tr>
<td><a target="_blank" href="{{ url_for('hiddenServices.onion_domain') }}?onion_domain={{ metadata_onion['domain'] }}">{{ metadata_onion['domain'] }}</a></td>
@ -142,7 +142,6 @@
</div>
</div>
<div class="panel panel-info">
<div class="panel-heading">
<i class="fa fa-eye-slash"></i> Domains Crawled Today
@ -152,41 +151,81 @@
<tbody>
<tr>
<td>
<a target="_blank" href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?date_from={{ date_from }}&domains_up=True">
<div style="color:Green; display:inline-block">
<i class="fa fa-check-circle fa-2x"></i>
Domains UP
</div>
</a>
</td>
<td>
<a target="_blank" href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?date_from={{ date_from }}&domains_up=True">
<div style="color:Green; display:inline-block">
{{ statDomains['domains_up'] }}
<div id="text_domain_up">{{ statDomains['domains_up'] }}</div>
</div>
</a>
</td>
</tr>
<tr>
<td>
<a target="_blank" href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?date_from={{ date_from }}&domains_down=True">
<div style="color:Red; display:inline-block">
<i class="fa fa-times-circle fa-2x"></i>
Domains DOWN
</div>
</a>
</td>
<td>
<a target="_blank" href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?date_from={{ date_from }}&domains_down=True">
<div style="color:Red; display:inline-block">
{{ statDomains['domains_down'] }}
<div id="text_domain_down">{{ statDomains['domains_down'] }}</div>
</div>
</a>
</td>
</tr>
<tr>
<td>Crawled Domains</td>
<td>{{ statDomains['total'] }}</td>
<td>
<a target="_blank" href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?date_from={{ date_from }}&domains_up=True&domains_down=True">
Crawled Domains
</a>
</td>
<td>
<a target="_blank" href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?date_from={{ date_from }}&domains_up=True&domains_down=True">
<div id="text_total_domains">{{ statDomains['total'] }}</div>
</a>
</td>
</tr>
<tr>
<td>Domains in Queue</td>
<td>{{ statDomains['domains_queue'] }}</td>
<td><div id="text_domain_queue">{{ statDomains['domains_queue'] }}</div></td>
</tr>
</tbody>
</table>
</div>
<div class="panel panel-info" {%if not crawler_metadata%}hidden{%endif%} id="panel_crawler">
<div class="panel-heading">
Crawlers Status
</div>
<table class="table table-hover table-striped">
<tbody id="tbody_crawler_info">
{% for crawler in crawler_metadata %}
<tr>
<td>
<i class="fa fa-{%if crawler['status']%}check{%else%}times{%endif%}-circle fa-2x" style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};"></i> {{crawler['crawler_info']}}
</td>
<td>
{{crawler['crawling_domain']}}
</td>
<td style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};">
{{crawler['status_info']}}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
@ -196,6 +235,7 @@
<script>
var all_graph = {};
var to_refresh = false
$(document).ready(function(){
activePage = "page-hiddenServices"
$("#"+activePage).addClass("active");
@ -232,10 +272,22 @@
});
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hiddenServices.domain_crawled_7days_json') }}?type=onion");
$( window ).focus(function() {
to_refresh = true
refresh_list_crawled();
});
$( window ).blur(function() {
to_refresh = false
});
to_refresh = true
refresh_list_crawled();
});
$(window).on("resize", function() {
all_graph.onResize();
});
</script>
<script>
@ -319,6 +371,80 @@ d3.json(url)
});
});
}
</script>
<script>
function refresh_list_crawled(){
$.getJSON("{{ url_for('hiddenServices.last_crawled_domains_with_stats_json') }}",
function(data) {
var tableRef = document.getElementById('tbody_last_crawled');
$("#tbody_last_crawled").empty()
for (var i = 0; i < data.last_onions.length; i++) {
var data_domain = data.last_onions[i]
var newRow = tableRef.insertRow(tableRef.rows.length);
var newCell = newRow.insertCell(0);
newCell.innerHTML = "<td><a target=\"_blank\" href=\"{{ url_for('hiddenServices.onion_domain') }}?onion_domain="+data_domain['domain']+"\">"+data_domain['domain']+"</a></td>";
newCell = newRow.insertCell(1);
newCell.innerHTML = "<td>"+data_domain['first_seen'].substr(0, 4)+"/"+data_domain['first_seen'].substr(4, 2)+"/"+data_domain['first_seen'].substr(6, 2)+"</td>"
newCell = newRow.insertCell(2);
newCell.innerHTML = "<td>"+data_domain['last_check'].substr(0, 4)+"/"+data_domain['last_check'].substr(4, 2)+"/"+data_domain['last_check'].substr(6, 2)+"</td>"
newCell = newRow.insertCell(3);
newCell.innerHTML = "<td><div style=\"color:"+data_domain['status_color']+"; display:inline-block\"><i class=\"fa "+data_domain['status_icon']+" fa-2x\"></i>"+data_domain['status_text']+"</div></td>"
}
var statDomains = data.statDomains
document.getElementById('text_domain_up').innerHTML = statDomains['domains_up']
document.getElementById('text_domain_down').innerHTML = statDomains['domains_down']
document.getElementById('text_domain_queue').innerHTML = statDomains['domains_queue']
document.getElementById('text_total_domains').innerHTML = statDomains['total']
if(data.crawler_metadata.length!=0){
$("#tbody_crawler_info").empty();
var tableRef = document.getElementById('tbody_crawler_info');
for (var i = 0; i < data.crawler_metadata.length; i++) {
var crawler = data.crawler_metadata[i];
var newRow = tableRef.insertRow(tableRef.rows.length);
var text_color;
var icon;
if(crawler['status']){
text_color = 'Green';
icon = 'check';
} else {
text_color = 'Red';
icon = 'times';
}
var newCell = newRow.insertCell(0);
newCell.innerHTML = "<td><i class=\"fa fa-"+icon+"-circle fa-2x\" style=\"color:"+text_color+";\"></i>"+crawler['crawler_info']+"</td>";
newCell = newRow.insertCell(1);
newCell.innerHTML = "<td><a target=\"_blank\" href=\"{{ url_for('hiddenServices.onion_domain') }}?onion_domain="+crawler['crawling_domain']+"\">"+crawler['crawling_domain']+"</a></td>";
newCell = newRow.insertCell(2);
newCell.innerHTML = "<td><div style=\"color:"+text_color+";\">"+crawler['status_info']+"</div></td>";
$("#panel_crawler").show();
}
} else {
$("#panel_crawler").hide();
}
}
);
if (to_refresh) {
setTimeout("refresh_list_crawled()", 10000);
}
}
</script>
</body>

View file

@ -60,10 +60,10 @@ wget https://cdn.datatables.net/plug-ins/1.10.7/integration/bootstrap/3/dataTabl
wget https://cdn.datatables.net/plug-ins/1.10.7/integration/bootstrap/3/dataTables.bootstrap.js -O ./static/js/dataTables.bootstrap.js
#Ressource for graph
wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.js -O ./static/js/jquery.flot.js
wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.pie.js -O ./static/js/jquery.flot.pie.js
wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.time.js -O ./static/js/jquery.flot.time.js
wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.stack.js -O ./static/js/jquery.flot.stack.js
wget https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb6109df5c1e8003/jquery.flot.js -O ./static/js/jquery.flot.js
wget https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb6109df5c1e8003/jquery.flot.pie.js -O ./static/js/jquery.flot.pie.js
wget https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb6109df5c1e8003/jquery.flot.time.js -O ./static/js/jquery.flot.time.js
wget https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb6109df5c1e8003/jquery.flot.stack.js -O ./static/js/jquery.flot.stack.js
#Ressources for sparkline and canvasJS and slider
wget http://omnipotent.net/jquery.sparkline/2.1.2/jquery.sparkline.min.js -O ./static/js/jquery.sparkline.min.js
@ -83,8 +83,12 @@ pushd static/image
wget https://www.circl.lu/assets/images/logos/AIL.png -O AIL.png
popd
#active virtualenv
source ./../../AILENV/bin/activate
if ! [[ -n "$AIL_HOME" ]]
then
#active virtualenv
source ./../../AILENV/bin/activate
fi
#Update MISP Taxonomies and Galaxies
python3 -m pip install git+https://github.com/MISP/PyTaxonomies --upgrade
python3 -m pip install git+https://github.com/MISP/PyMISPGalaxies --upgrade