diff --git a/.gitignore b/.gitignore index 2d276111..83c83b29 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,9 @@ bin/packages/config.cfg.backup configs/keys files +# Pystemon archives +pystemon/archives + # installed files nltk_data/ doc/all_modules.txt diff --git a/Dockerfile b/Dockerfile index 340e5014..533c44c4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,8 +27,18 @@ WORKDIR /opt/AIL # Default to UTF-8 file.encoding ENV LANG C.UTF-8 +ENV AIL_HOME /opt/AIL +ENV AIL_BIN ${AIL_HOME}/bin +ENV AIL_FLASK ${AIL_HOME}/var/www +ENV AIL_REDIS ${AIL_HOME}/redis/src +ENV AIL_ARDB ${AIL_HOME}/ardb/src +ENV AIL_VENV ${AIL_HOME}/AILENV + +ENV PATH ${AIL_VENV}/bin:${AIL_HOME}:${AIL_REDIS}:${AIL_ARDB}:${AIL_BIN}:${AIL_FLASK}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin RUN ./pystemon/install.sh +RUN pip install -r /opt/pystemon/requirements.txt +RUN pip install -r /opt/AIL/crawler_requirements.txt COPY docker_start.sh /docker_start.sh ENTRYPOINT ["/bin/bash", "docker_start.sh"] diff --git a/OVERVIEW.md b/OVERVIEW.md index 32eae1d8..f80590bf 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -101,7 +101,6 @@ ARDB_DB ZADD - 'base64_hash:'+hash paste * nb_seen_in_paste ZADD - 'binary_hash:'+hash paste * nb_seen_in_paste - ZADD - 'hash_type:'+type date nb_seen ZADD - 'base64_type:'+type date nb_seen ZADD - 'binary_type:'+type date nb_seen diff --git a/README.md b/README.md index e8af8e5f..9ca65d2a 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ AIL is a modular framework to analyse potential information leaks from unstructu - + @@ -168,6 +168,22 @@ Privacy and GDPR [AIL information leaks analysis and the GDPR in the context of collection, analysis and sharing information leaks](https://www.circl.lu/assets/files/information-leaks-analysis-and-gdpr.pdf) document provides an overview how to use AIL in a lawfulness context especially in the scope of General Data Protection Regulation. +Research using AIL +------------------ + +If you write academic paper, relying or using AIL, it can be cited with the following BibTeX: + +~~~~ +@inproceedings{mokaddem2018ail, + title={AIL-The design and implementation of an Analysis Information Leak framework}, + author={Mokaddem, Sami and Wagener, G{\'e}rard and Dulaunoy, Alexandre}, + booktitle={2018 IEEE International Conference on Big Data (Big Data)}, + pages={5049--5057}, + year={2018}, + organization={IEEE} +} +~~~~ + Screenshots =========== @@ -237,11 +253,11 @@ License ``` Copyright (C) 2014 Jules Debra - Copyright (C) 2014-2018 CIRCL - Computer Incident Response Center Luxembourg (c/o smile, security made in Lëtzebuerg, Groupement d'Intérêt Economique) - Copyright (c) 2014-2018 Raphaël Vinot - Copyright (c) 2014-2018 Alexandre Dulaunoy - Copyright (c) 2016-2018 Sami Mokaddem - Copyright (c) 2018 Thirion Aurélien + Copyright (C) 2014-2019 CIRCL - Computer Incident Response Center Luxembourg (c/o smile, security made in Lëtzebuerg, Groupement d'Intérêt Economique) + Copyright (c) 2014-2019 Raphaël Vinot + Copyright (c) 2014-2019 Alexandre Dulaunoy + Copyright (c) 2016-2019 Sami Mokaddem + Copyright (c) 2018-2019 Thirion Aurélien This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by diff --git a/bin/CVE_check.py b/bin/CVE_check.py new file mode 100755 index 00000000..63f611de --- /dev/null +++ b/bin/CVE_check.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +from packages import Paste +from Helper import Process + +import os +import re +import time +import redis +import configparser + +from collections import defaultdict + +def get_dict_cve(list_paste_cve, only_one_same_cve_by_paste=False): + dict_keyword = {} + + for paste_cve in list_paste_cve: + paste_content = Paste.Paste(paste_cve).get_p_content() + + cve_list = reg_cve.findall(paste_content) + if only_one_same_cve_by_paste: + cve_list = set(cve_list) + + for cve in reg_cve.findall(paste_content): + try: + dict_keyword[cve] += 1 + except KeyError: + dict_keyword[cve] = 1 + + print('------------------------------------------------') + if dict_keyword: + res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)] + for item in res: + pass + print(item) + + + +if __name__ == '__main__': + + # CONFIG # + configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') + if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + + cfg = configparser.ConfigParser() + cfg.read(configfile) + + serv_metadata = redis.StrictRedis( + host=cfg.get("ARDB_Metadata", "host"), + port=cfg.getint("ARDB_Metadata", "port"), + db=cfg.getint("ARDB_Metadata", "db"), + decode_responses=True) + + serv_tags = redis.StrictRedis( + host=cfg.get("ARDB_Tags", "host"), + port=cfg.get("ARDB_Tags", "port"), + db=cfg.get("ARDB_Tags", "db"), + decode_responses=True) + + reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,7}') + + #all_past_cve = serv_tags.smembers('infoleak:automatic-detection="cve"') + #all_past_cve_regular = serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"') + #all_past_cve_crawler = serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"') + + #print('{} + {} = {}'.format(len(all_past_cve_regular), len(all_past_cve_crawler), len(all_past_cve))) + + print('ALL_CVE') + get_dict_cve(serv_tags.smembers('infoleak:automatic-detection="cve"'), True) + print() + print() + print() + print('REGULAR_CVE') + get_dict_cve(serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True) + print() + print() + print() + print('CRAWLER_CVE') + get_dict_cve(serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True) diff --git a/bin/Crawler.py b/bin/Crawler.py index 0f69cfe6..e6b61a99 100755 --- a/bin/Crawler.py +++ b/bin/Crawler.py @@ -10,6 +10,8 @@ import time import subprocess import requests +from pyfaup.faup import Faup + sys.path.append(os.environ['AIL_BIN']) from Helper import Process from pubsublogger import publisher @@ -18,10 +20,13 @@ def on_error_send_message_back_in_queue(type_hidden_service, domain, message): # send this msg back in the queue if not r_onion.sismember('{}_domain_crawler_queue'.format(type_hidden_service), domain): r_onion.sadd('{}_domain_crawler_queue'.format(type_hidden_service), domain) - r_onion.sadd('{}_crawler_queue'.format(type_hidden_service), message) + r_onion.sadd('{}_crawler_priority_queue'.format(type_hidden_service), message) def crawl_onion(url, domain, date, date_month, message): + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'crawling_domain', domain) + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S")) + #if not r_onion.sismember('full_onion_up', domain) and not r_onion.sismember('onion_down:'+date , domain): super_father = r_serv_metadata.hget('paste_metadata:'+paste, 'super_father') if super_father is None: @@ -37,19 +42,21 @@ def crawl_onion(url, domain, date, date_month, message): # TODO: relaunch docker or send error message nb_retry += 1 - if nb_retry == 30: + if nb_retry == 6: on_error_send_message_back_in_queue(type_hidden_service, domain, message) publisher.error('{} SPASH DOWN'.format(splash_url)) print('--------------------------------------') print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' {} DOWN'.format(splash_url)) - exit(1) + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'SPLASH DOWN') + nb_retry == 0 print(' \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m') print(' Retry({}) in 10 seconds'.format(nb_retry)) time.sleep(10) if r.status_code == 200: + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling') process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', splash_url, type_hidden_service, url, domain, paste, super_father], stdout=subprocess.PIPE) while process.poll() is None: @@ -67,6 +74,7 @@ def crawl_onion(url, domain, date, date_month, message): print('') print(' PROXY DOWN OR BAD CONFIGURATION\033[0m'.format(splash_url)) print('------------------------------------------------------------------------') + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Error') exit(-2) else: print(process.stdout.read()) @@ -76,6 +84,7 @@ def crawl_onion(url, domain, date, date_month, message): print('--------------------------------------') print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' {} DOWN'.format(splash_url)) + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling') exit(1) @@ -119,6 +128,7 @@ if __name__ == '__main__': print('splash url: {}'.format(splash_url)) crawler_depth_limit = p.config.getint("Crawler", "crawler_depth_limit") + faup = Faup() PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) @@ -140,6 +150,10 @@ if __name__ == '__main__': db=p.config.getint("ARDB_Onion", "db"), decode_responses=True) + r_cache.sadd('all_crawler:{}'.format(type_hidden_service), splash_port) + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting') + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S")) + # load domains blacklist try: with open(os.environ['AIL_BIN']+'/torcrawler/blacklist_onion.txt', 'r') as f: @@ -152,8 +166,12 @@ if __name__ == '__main__': while True: - # Recovering the streamed message informations. - message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service)) + # Priority Queue - Recovering the streamed message informations. + message = r_onion.spop('{}_crawler_priority_queue'.format(type_hidden_service)) + + if message is None: + # Recovering the streamed message informations. + message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service)) if message is not None: @@ -173,6 +191,8 @@ if __name__ == '__main__': domain_url = 'http://{}'.format(domain) + print() + print() print('\033[92m------------------START CRAWLER------------------\033[0m') print('crawler type: {}'.format(type_hidden_service)) print('\033[92m-------------------------------------------------\033[0m') @@ -180,12 +200,24 @@ if __name__ == '__main__': print('domain: {}'.format(domain)) print('domain_url: {}'.format(domain_url)) - if not r_onion.sismember('blacklist_{}'.format(type_hidden_service), domain): + faup.decode(domain) + onion_domain=faup.get()['domain'].decode() + + if not r_onion.sismember('blacklist_{}'.format(type_hidden_service), domain) and not r_onion.sismember('blacklist_{}'.format(type_hidden_service), onion_domain): date = datetime.datetime.now().strftime("%Y%m%d") date_month = datetime.datetime.now().strftime("%Y%m") if not r_onion.sismember('month_{}_up:{}'.format(type_hidden_service, date_month), domain) and not r_onion.sismember('{}_down:{}'.format(type_hidden_service, date), domain): + # first seen + if not r_onion.hexists('{}_metadata:{}'.format(type_hidden_service, domain), 'first_seen'): + r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'first_seen', date) + + # last_father + r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'paste_parent', paste) + + # last check + r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'last_check', date) crawl_onion(url, domain, date, date_month, message) if url != domain_url: @@ -198,21 +230,12 @@ if __name__ == '__main__': r_onion.sadd('{}_down:{}'.format(type_hidden_service, date), domain) #r_onion.sadd('{}_down_link:{}'.format(type_hidden_service, date), url) #r_onion.hincrby('{}_link_down'.format(type_hidden_service), url, 1) - if not r_onion.exists('{}_metadata:{}'.format(type_hidden_service, domain)): - r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'first_seen', date) - r_onion.hset('{}_metadata:{}'.format(type_hidden_service,domain), 'last_seen', date) else: #r_onion.hincrby('{}_link_up'.format(type_hidden_service), url, 1) if r_onion.sismember('month_{}_up:{}'.format(type_hidden_service, date_month), domain) and r_serv_metadata.exists('paste_children:'+paste): msg = 'infoleak:automatic-detection="{}";{}'.format(type_hidden_service, paste) p.populate_set_out(msg, 'Tags') - # last check - r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'last_check', date) - - # last_father - r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'paste_parent', paste) - # add onion screenshot history # add crawled days if r_onion.lindex('{}_history:{}'.format(type_hidden_service, domain), 0) != date: @@ -243,6 +266,14 @@ if __name__ == '__main__': r_onion.lpush('last_{}'.format(type_hidden_service), domain) r_onion.ltrim('last_{}'.format(type_hidden_service), 0, 15) + #update crawler status + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting') + r_cache.hdel('metadata_crawler:{}'.format(splash_port), 'crawling_domain') + else: + print(' Blacklisted Onion') + print() + print() + else: continue else: diff --git a/bin/Global.py b/bin/Global.py index c1e16496..6f26ad0e 100755 --- a/bin/Global.py +++ b/bin/Global.py @@ -23,23 +23,17 @@ Requirements import base64 import os import time +import uuid from pubsublogger import publisher from Helper import Process import magic -import io -#import gzip -''' -def gunzip_bytes_obj(bytes_obj): - in_ = io.BytesIO() - in_.write(bytes_obj) - in_.seek(0) - with gzip.GzipFile(fileobj=in_, mode='rb') as fo: - gunzipped_bytes_obj = fo.read() +def rreplace(s, old, new, occurrence): + li = s.rsplit(old, occurrence) + return new.join(li) - return gunzipped_bytes_obj.decode()''' if __name__ == '__main__': publisher.port = 6380 @@ -79,6 +73,12 @@ if __name__ == '__main__': processed_paste = 0 time.sleep(1) continue + + file_name_paste = paste.split('/')[-1] + if len(file_name_paste)>255: + new_file_name_paste = '{}{}.gz'.format(file_name_paste[:215], str(uuid.uuid4())) + paste = rreplace(paste, file_name_paste, new_file_name_paste, 1) + # Creating the full filepath filename = os.path.join(PASTES_FOLDER, paste) diff --git a/bin/ModulesInformationV2.py b/bin/ModulesInformationV2.py index 30a24f15..cef6301c 100755 --- a/bin/ModulesInformationV2.py +++ b/bin/ModulesInformationV2.py @@ -31,7 +31,7 @@ lastTimeKillCommand = {} current_selected_value = 0 current_selected_queue = "" current_selected_action = "" -current_selected_action = 0 +current_selected_amount = 0 # Map PID to Queue name (For restart and killing) PID_NAME_DICO = {} @@ -480,7 +480,10 @@ class Show_paste(Frame): self.label_list[i]._text = "" except Exception as e: - self.label_list[0]._text = "Error while displaying the paste: " + COMPLETE_PASTE_PATH_PER_PID[current_selected_value] + if current_selected_value in COMPLETE_PASTE_PATH_PER_PID: + self.label_list[0]._text = "Error while displaying the paste: " + COMPLETE_PASTE_PATH_PER_PID[current_selected_value] + else: + self.label_list[0]._text = "Error Generic exception caught" self.label_list[1]._text = str(e) for i in range(2,self.num_label): self.label_list[i]._text = "" diff --git a/bin/Onion.py b/bin/Onion.py index e38f363a..ddae9afc 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -29,10 +29,18 @@ import os import base64 import subprocess import redis +import signal import re from Helper import Process +class TimeoutException(Exception): + pass + +def timeout_handler(signum, frame): + raise TimeoutException + +signal.signal(signal.SIGALRM, timeout_handler) def fetch(p, r_cache, urls, domains, path): failed = [] @@ -113,6 +121,8 @@ if __name__ == "__main__": message = p.get_from_set() prec_filename = None + max_execution_time = p.config.getint("Onion", "max_execution_time") + # send to crawler: activate_crawler = p.config.get("Crawler", "activate_crawler") if activate_crawler == 'True': @@ -130,6 +140,7 @@ if __name__ == "__main__": while True: + message = p.get_from_set() if message is not None: print(message) filename, score = message.split() @@ -140,16 +151,26 @@ if __name__ == "__main__": urls = [] PST = Paste.Paste(filename) - for x in PST.get_regex(url_regex): - print(x) - # Extracting url with regex - url, s, credential, subdomain, domain, host, port, \ - resource_path, query_string, f1, f2, f3, f4 = x + # max execution time on regex + signal.alarm(max_execution_time) + try: + for x in PST.get_regex(url_regex): + print(x) + # Extracting url with regex + url, s, credential, subdomain, domain, host, port, \ + resource_path, query_string, f1, f2, f3, f4 = x - if '.onion' in url: - print(url) - domains_list.append(domain) - urls.append(url) + if '.onion' in url: + print(url) + domains_list.append(domain) + urls.append(url) + except TimeoutException: + encoded_list = [] + p.incr_module_timeout_statistic() + print ("{0} processing timeout".format(PST.p_path)) + continue + + signal.alarm(0) ''' for x in PST.get_regex(i2p_regex): @@ -177,8 +198,12 @@ if __name__ == "__main__": print(len(domains_list)) if len(domains_list) > 0: - publisher.warning('{}Detected {} .onion(s);{}'.format( - to_print, len(domains_list),PST.p_rel_path)) + if not activate_crawler: + publisher.warning('{}Detected {} .onion(s);{}'.format( + to_print, len(domains_list),PST.p_rel_path)) + else: + publisher.info('{}Detected {} .onion(s);{}'.format( + to_print, len(domains_list),PST.p_rel_path)) now = datetime.datetime.now() path = os.path.join('onions', str(now.year).zfill(4), str(now.month).zfill(2), @@ -199,12 +224,20 @@ if __name__ == "__main__": else: continue + # too many subdomain + if len(domain.split('.')) > 5: + continue + if not r_onion.sismember('month_onion_up:{}'.format(date_month), domain) and not r_onion.sismember('onion_down:'+date , domain): if not r_onion.sismember('onion_domain_crawler_queue', domain): print('send to onion crawler') r_onion.sadd('onion_domain_crawler_queue', domain) msg = '{};{}'.format(url,PST.p_rel_path) - r_onion.sadd('onion_crawler_queue', msg) + if not r_onion.hexists('onion_metadata:{}'.format(domain), 'first_seen'): + r_onion.sadd('onion_crawler_priority_queue', msg) + print('send to priority queue') + else: + r_onion.sadd('onion_crawler_queue', msg) #p.populate_set_out(msg, 'Crawler') else: @@ -222,4 +255,3 @@ if __name__ == "__main__": publisher.debug("Script url is Idling 10s") #print('Sleeping') time.sleep(10) - message = p.get_from_set() diff --git a/bin/feeder/pystemon-feeder.py b/bin/feeder/pystemon-feeder.py index 280849ba..5c9f743c 100755 --- a/bin/feeder/pystemon-feeder.py +++ b/bin/feeder/pystemon-feeder.py @@ -67,7 +67,7 @@ while True: print(paste) with open(pystemonpath+paste, 'rb') as f: #.read() messagedata = f.read() - path_to_send = pastes_directory+paste + path_to_send = os.path.join(pastes_directory,paste) s = b' '.join( [ topic.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] ) socket.send(s) diff --git a/bin/packages/config.cfg.docker-compose-sample b/bin/packages/config.cfg.docker-compose-sample new file mode 100644 index 00000000..2f563493 --- /dev/null +++ b/bin/packages/config.cfg.docker-compose-sample @@ -0,0 +1,253 @@ +[Directories] +bloomfilters = Blooms +dicofilters = Dicos +pastes = PASTES +hash = HASHS +crawled = crawled +crawled_screenshot = CRAWLED_SCREENSHOT + +wordtrending_csv = var/www/static/csv/wordstrendingdata +wordsfile = files/wordfile + +protocolstrending_csv = var/www/static/csv/protocolstrendingdata +protocolsfile = files/protocolsfile + +tldstrending_csv = var/www/static/csv/tldstrendingdata +tldsfile = faup/src/data/mozilla.tlds + +domainstrending_csv = var/www/static/csv/domainstrendingdata + +pystemonpath = /opt/pystemon/ + +sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt + +##### Notifications ###### +[Notifications] +ail_domain = http://localhost:7000 +sender = sender@example.com +sender_host = smtp.example.com +sender_port = 1337 +sender_pw = None + +# optional for using with authenticated SMTP over SSL +# sender_pw = securepassword + +##### Flask ##### +[Flask] +#Proxying requests to the app +baseUrl = / +#Number of logs to display in the dashboard +max_dashboard_logs = 15 +#Maximum number of character to display in the toolip +max_preview_char = 250 +#Maximum number of character to display in the modal +max_preview_modal = 800 +#Default number of header to display in trending graphs +default_display = 10 +#Number of minutes displayed for the number of processed pastes. +minute_processed_paste = 10 +#Maximum line length authorized to make a diff between duplicates +DiffMaxLineLength = 10000 + +#### Modules #### +[BankAccount] +max_execution_time = 60 + +[Categ] +#Minimum number of match between the paste and the category file +matchingThreshold=1 + +[Credential] +#Minimum length that a credential must have to be considered as such +minimumLengthThreshold=3 +#Will be pushed as alert if the number of credentials is greater to that number +criticalNumberToAlert=8 +#Will be considered as false positive if less that X matches from the top password list +minTopPassList=5 + +[Curve] +max_execution_time = 90 + +[Onion] +max_execution_time = 180 + +[Base64] +path = Base64/ +max_execution_time = 60 + +[Binary] +path = Base64/ +max_execution_time = 60 + +[Hex] +path = Base64/ +max_execution_time = 60 + +[Modules_Duplicates] +#Number of month to look back +maximum_month_range = 3 +#The value where two pastes are considerate duplicate for ssdeep. +threshold_duplicate_ssdeep = 50 +#The value where two pastes are considerate duplicate for tlsh. +threshold_duplicate_tlsh = 52 +#Minimum size of the paste considered +min_paste_size = 0.3 + +[Module_ModuleInformation] +#Threshold to deduce if a module is stuck or not, in seconds. +threshold_stucked_module=600 + +[Module_Mixer] +#Define the configuration of the mixer, possible value: 1, 2 or 3 +operation_mode = 3 +#Define the time that a paste will be considerate duplicate. in seconds (1day = 86400) +ttl_duplicate = 86400 +default_unnamed_feed_name = unnamed_feeder + +[RegexForTermsFrequency] +max_execution_time = 60 + +##### Redis ##### +[Redis_Cache] +host = localhost +port = 6379 +db = 0 + +[Redis_Log] +host = localhost +port = 6380 +db = 0 + +[Redis_Log_submit] +host = localhost +port = 6380 +db = 1 + +[Redis_Queues] +host = localhost +port = 6381 +db = 0 + +[Redis_Data_Merging] +host = localhost +port = 6379 +db = 1 + +[Redis_Paste_Name] +host = localhost +port = 6379 +db = 2 + +[Redis_Mixer_Cache] +host = localhost +port = 6381 +db = 1 + +##### ARDB ##### +[ARDB_Curve] +host = localhost +port = 6382 +db = 1 + +[ARDB_Sentiment] +host = localhost +port = 6382 +db = 4 + +[ARDB_TermFreq] +host = localhost +port = 6382 +db = 2 + +[ARDB_TermCred] +host = localhost +port = 6382 +db = 5 + +[ARDB_DB] +host = localhost +port = 6382 +db = 0 + +[ARDB_Trending] +host = localhost +port = 6382 +db = 3 + +[ARDB_Hashs] +host = localhost +db = 1 + +[ARDB_Tags] +host = localhost +port = 6382 +db = 6 + +[ARDB_Metadata] +host = localhost +port = 6382 +db = 7 + +[ARDB_Statistics] +host = localhost +port = 6382 +db = 8 + +[ARDB_Onion] +host = localhost +port = 6382 +db = 9 + +[Url] +cc_critical = DE + +[DomClassifier] +cc = DE +cc_tld = r'\.de$' +dns = 8.8.8.8 + +[Mail] +dns = 8.8.8.8 + +[Web] +dns = 149.13.33.69 + +# Indexer configuration +[Indexer] +type = whoosh +path = indexdir +register = indexdir/all_index.txt +#size in Mb +index_max_size = 2000 + +[ailleakObject] +maxDuplicateToPushToMISP=10 + +############################################################################### + +# For multiple feed, add them with "," without space +# e.g.: tcp://127.0.0.1:5556,tcp://127.0.0.1:5557 +[ZMQ_Global] +#address = tcp://crf.circl.lu:5556 +address = tcp://127.0.0.1:5556,tcp://crf.circl.lu:5556 +channel = 102 +bind = tcp://127.0.0.1:5556 + +[ZMQ_Url] +address = tcp://127.0.0.1:5004 +channel = urls + +[ZMQ_FetchedOnion] +address = tcp://127.0.0.1:5005 +channel = FetchedOnion + +[RedisPubSub] +host = localhost +port = 6381 +db = 0 + +[Crawler] +activate_crawler = False +crawler_depth_limit = 1 +splash_url_onion = http://172.17.0.1 +splash_onion_port = 8050 diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index b5980766..ace656cc 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -68,6 +68,9 @@ minTopPassList=5 [Curve] max_execution_time = 90 +[Onion] +max_execution_time = 180 + [Base64] path = Base64/ max_execution_time = 60 diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index 47486dd9..6bb4d938 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -10,10 +10,12 @@ import datetime import base64 import redis import json +import time from scrapy.spidermiddlewares.httperror import HttpError from twisted.internet.error import DNSLookupError from twisted.internet.error import TimeoutError +from twisted.web._newclient import ResponseNeverReceived from scrapy import Spider from scrapy.linkextractors import LinkExtractor @@ -39,6 +41,8 @@ class TorSplashCrawler(): 'SPIDER_MIDDLEWARES': {'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,}, 'DUPEFILTER_CLASS': 'scrapy_splash.SplashAwareDupeFilter', 'HTTPERROR_ALLOW_ALL': True, + 'RETRY_TIMES': 2, + 'CLOSESPIDER_PAGECOUNT': 50, 'DEPTH_LIMIT': crawler_depth_limit }) @@ -97,7 +101,7 @@ class TorSplashCrawler(): yield SplashRequest( self.start_urls, self.parse, - #errback=self.errback_catcher, + errback=self.errback_catcher, endpoint='render.json', meta={'father': self.original_paste}, args={ 'html': 1, @@ -122,7 +126,11 @@ class TorSplashCrawler(): print('Connection to proxy refused') else: - UUID = self.domains[0]+str(uuid.uuid4()) + #avoid filename too big + if len(self.domains[0]) > 215: + UUID = self.domains[0][-215:]+str(uuid.uuid4()) + else: + UUID = self.domains[0]+str(uuid.uuid4()) filename_paste = os.path.join(self.crawled_paste_filemame, UUID) relative_filename_paste = os.path.join(self.crawler_path, UUID) filename_screenshot = os.path.join(self.crawled_screenshot, UUID +'.png') @@ -174,7 +182,7 @@ class TorSplashCrawler(): yield SplashRequest( link.url, self.parse, - #errback=self.errback_catcher, + errback=self.errback_catcher, endpoint='render.json', meta={'father': relative_filename_paste}, args={ 'html': 1, @@ -184,17 +192,39 @@ class TorSplashCrawler(): 'wait': 10} ) - ''' def errback_catcher(self, failure): # catch all errback failures, self.logger.error(repr(failure)) - print('failure') - #print(failure) - print(failure.type) - #print(failure.request.meta['item']) + if failure.check(ResponseNeverReceived): + request = failure.request + url = request.meta['splash']['args']['url'] + father = request.meta['father'] + + self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url) + time.sleep(10) + yield SplashRequest( + url, + self.parse, + errback=self.errback_catcher, + endpoint='render.json', + meta={'father': father}, + args={ 'html': 1, + 'png': 1, + 'render_all': 1, + 'har': 1, + 'wait': 10} + ) + + else: + print('failure') + #print(failure) + print(failure.type) + #print(failure.request.meta['item']) + + ''' #if isinstance(failure.value, HttpError): - if failure.check(HttpError): + elif failure.check(HttpError): # you can get the response response = failure.value.response print('HttpError') @@ -214,7 +244,7 @@ class TorSplashCrawler(): print('TimeoutError') print(TimeoutError) self.logger.error('TimeoutError on %s', request.url) - ''' + ''' def save_crawled_paste(self, filename, content): diff --git a/bin/torcrawler/blacklist_onion.txt b/bin/torcrawler/blacklist_onion.txt index 15dfa0de..3718fc96 100644 --- a/bin/torcrawler/blacklist_onion.txt +++ b/bin/torcrawler/blacklist_onion.txt @@ -3,3 +3,5 @@ facebookcorewwwi.onion graylady3jvrrxbe.onion expyuzz4wqqyqhjn.onion dccbbv6cooddgcrq.onion +pugljpwjhbiagkrn.onion +jld3zkuo4b5mbios.onion diff --git a/bin/torcrawler/launch_splash_crawler.sh b/bin/torcrawler/launch_splash_crawler.sh index e47efc36..5c7f21ee 100755 --- a/bin/torcrawler/launch_splash_crawler.sh +++ b/bin/torcrawler/launch_splash_crawler.sh @@ -5,12 +5,15 @@ usage() { echo "Usage: sudo $0 [-f ] [-p ] [-n echo " -p: number of the first splash server port number. This number is incremented for the others splash server"; echo " -n: number of splash servers to start"; echo ""; + echo " -options:"; + echo " -u: max unbound in-memory cache (Mb, Restart Splash when full, default=3000 Mb)"; + echo ""; echo "example:"; echo "sudo ./launch_splash_crawler.sh -f /home/my_user/AIL-framework/configs/docker/splash_onion/etc/splash/proxy-profiles/ -p 8050 -n 3"; exit 1; } -while getopts ":p:f:n:" o; do +while getopts ":p:f:n:u:" o; do case "${o}" in p) p=${OPTARG} @@ -21,6 +24,9 @@ while getopts ":p:f:n:" o; do n) n=${OPTARG} ;; + u) + u=${OPTARG} + ;; *) usage ;; @@ -28,6 +34,10 @@ while getopts ":p:f:n:" o; do done shift $((OPTIND-1)) +if [ -z "${u}" ]; then + u=3000; +fi + if [ -z "${p}" ] || [ -z "${f}" ] || [ -z "${n}" ]; then usage; fi @@ -52,7 +62,7 @@ sleep 0.1 for ((i=0;i<=$((${n} - 1));i++)); do port_number=$((${p} + $i)) - screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -p '$port_number':8050 --cpus=1 --memory=4.5G -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash; read x' + screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -d -p '$port_number':8050 --restart=always --cpus=1 --memory=4.5G -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash --maxrss '$u'; read x' sleep 0.1 printf "$GREEN Splash server launched on port $port_number$DEFAULT\n" done diff --git a/configs/6379.conf b/configs/6379.conf index d799cd17..9a535bed 100644 --- a/configs/6379.conf +++ b/configs/6379.conf @@ -61,7 +61,7 @@ tcp-backlog 511 # Examples: # # bind 192.168.1.100 10.0.0.1 -# bind 127.0.0.1 +bind 127.0.0.1 # Specify the path for the Unix socket that will be used to listen for # incoming connections. There is no default, so Redis will not listen diff --git a/configs/6380.conf b/configs/6380.conf index 2a30b0d1..31c7a6e0 100644 --- a/configs/6380.conf +++ b/configs/6380.conf @@ -61,7 +61,7 @@ tcp-backlog 511 # Examples: # # bind 192.168.1.100 10.0.0.1 -# bind 127.0.0.1 +bind 127.0.0.1 # Specify the path for the Unix socket that will be used to listen for # incoming connections. There is no default, so Redis will not listen diff --git a/configs/6381.conf b/configs/6381.conf index 95a5c07d..8360a199 100644 --- a/configs/6381.conf +++ b/configs/6381.conf @@ -61,7 +61,7 @@ tcp-backlog 511 # Examples: # # bind 192.168.1.100 10.0.0.1 -# bind 127.0.0.1 +bind 127.0.0.1 # Specify the path for the Unix socket that will be used to listen for # incoming connections. There is no default, so Redis will not listen diff --git a/doc/presentation/ail-training-december-2018.pdf b/doc/presentation/ail-training-december-2018.pdf new file mode 100644 index 00000000..5aa6d9d9 Binary files /dev/null and b/doc/presentation/ail-training-december-2018.pdf differ diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..fc7f2a91 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,523 @@ +version: '3' +services: + ardb: + entrypoint: + - ardb-server + - /opt/AIL/configs/6382.conf + healthcheck: + test: ["CMD", "redis-cli", "-p", "6382", "ping"] + interval: 30s + timeout: 10s + retries: 5 + network_mode: service:flask + image: ail-framework + volumes: + - ./configs:/opt/AIL/configs:ro + crawler: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Crawler.py + - onion + - "8050" + network_mode: service:flask + image: ail-framework + volumes: + - ./CRAWLED_SCREENSHOT/:/opt/AIL/CRAWLED_SCREENSHOT + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + flask: + build: . + entrypoint: + - /opt/AIL/var/www/Flask_server.py + ports: + - "7000:7000" + image: ail-framework + volumes: + - ./CRAWLED_SCREENSHOT/:/opt/AIL/CRAWLED_SCREENSHOT + - ./PASTES/:/opt/AIL/PASTES + - ./indexdir:/opt/AIL/indexdir + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/var/www + log-queue: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/AILENV/bin/log_subscriber + - -p + - "6380" + - -c + - Queing + - -l + - /opt/AIL/logs/ + network_mode: service:flask + image: ail-framework + volumes: + - ./configs:/opt/AIL/configs:ro + log-script: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/AILENV/bin/log_subscriber + - -p + - "6380" + - -c + - Script + - -l + - /opt/AIL/logs/ + network_mode: service:flask + image: ail-framework + volumes: + - ./configs:/opt/AIL/configs:ro + pystemon: + depends_on: + - redis-log + entrypoint: + - /opt/pystemon/pystemon.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./pystemon/archives:/opt/pystemon/archive + - ./pystemon/proxies.txt:/opt/pystemon/proxies.txt:ro + - ./pystemon/pystemon.yaml:/opt/pystemon/pystemon.yaml:ro + working_dir: /opt/pystemon + pystemon-feeder: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/feeder/pystemon-feeder.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./pystemon/archives:/opt/pystemon/archive + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + queues: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/launch_queues.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + redis-cache: + entrypoint: + - redis-server + - /opt/AIL/configs/6379.conf + healthcheck: + test: ["CMD", "redis-cli", "-p", "6379", "ping"] + interval: 30s + timeout: 10s + retries: 5 + image: ail-framework + network_mode: service:flask + volumes: + - ./configs:/opt/AIL/configs:ro + redis-log: + entrypoint: + - redis-server + - /opt/AIL/configs/6380.conf + healthcheck: + test: ["CMD", "redis-cli", "-p", "6380", "ping"] + interval: 30s + timeout: 10s + retries: 5 + network_mode: service:flask + image: ail-framework + volumes: + - ./configs:/opt/AIL/configs:ro + redis-mixer-cache: + entrypoint: + - redis-server + - /opt/AIL/configs/6381.conf + healthcheck: + test: ["CMD", "redis-cli", "-p", "6381", "ping"] + interval: 30s + timeout: 10s + retries: 5 + image: ail-framework + network_mode: service:flask + volumes: + - ./configs:/opt/AIL/configs:ro + script-alerthandler: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/alertHandler.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-apikey: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/ApiKey.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-bankaccount: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/BankAccount.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-bitcoin: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Bitcoin.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-categ: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Categ.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-credential: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Credential.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-creditcards: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/CreditCards.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-curve: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Curve.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-curvemanagetopsets: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/CurveManageTopSets.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-cve: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Cve.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-decoder: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Decoder.py + network_mode: service:flask + image: ail-framework + volumes: + - ./HASHS:/opt/AIL/HASHS + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-domclassifier: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/DomClassifier.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-duplicates: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Duplicates.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-global: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Global.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-indexer: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Indexer.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./indexdir:/opt/AIL/indexdir + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-keys: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Keys.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-libinjection: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/LibInjection.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-lines: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Lines.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-mail: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Mail.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-mispthehivefeeder: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/MISP_The_Hive_feeder.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-mixer: + depends_on: + - redis-mixer-cache + entrypoint: + - /opt/AIL/bin/Mixer.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-modulestats: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/ModuleStats.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-onion: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Onion.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-phone: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Phone.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-regexfortermsfrequency: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/RegexForTermsFrequency.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-release: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Release.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-sentimentanalysis: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/SentimentAnalysis.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-setfortermsfrequency: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/SetForTermsFrequency.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-sqlinjectiondetection: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/SQLInjectionDetection.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-submitpaste: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/submit_paste.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-tags: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Tags.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-tokenize: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Tokenize.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-web: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/Web.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin + script-webstats: + depends_on: + - redis-log + entrypoint: + - /opt/AIL/bin/WebStats.py + network_mode: service:flask + image: ail-framework + volumes: + - ./PASTES/:/opt/AIL/PASTES + - ./bin/packages/config.cfg:/opt/AIL/bin/packages/config.cfg:ro + working_dir: /opt/AIL/bin diff --git a/installing_deps.sh b/installing_deps.sh index 484ca770..579c2a9d 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -95,6 +95,7 @@ popd mkdir -p $AIL_HOME/PASTES pip3 install -U pip +pip3 install 'git+https://github.com/D4-project/BGP-Ranking.git/@7e698f87366e6f99b4d0d11852737db28e3ddc62#egg=pybgpranking&subdirectory=client' pip3 install -U -r pip3_packages_requirement.txt # Pyfaup diff --git a/pystemon/proxies.txt b/pystemon/proxies.txt new file mode 100644 index 00000000..a4ae3812 --- /dev/null +++ b/pystemon/proxies.txt @@ -0,0 +1 @@ +http://127.0.0.1:8080 diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index 104a1c25..7e74c4d1 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -30,6 +30,12 @@ r_serv = redis.StrictRedis( db=cfg.getint("Redis_Queues", "db"), decode_responses=True) +r_cache = redis.StrictRedis( + host=cfg.get("Redis_Cache", "host"), + port=cfg.getint("Redis_Cache", "port"), + db=cfg.getint("Redis_Cache", "db"), + decode_responses=True) + r_serv_log = redis.StrictRedis( host=cfg.get("Redis_Log", "host"), port=cfg.getint("Redis_Log", "port"), diff --git a/var/www/modules/hashDecoded/Flask_hashDecoded.py b/var/www/modules/hashDecoded/Flask_hashDecoded.py index faf6af34..62a32f75 100644 --- a/var/www/modules/hashDecoded/Flask_hashDecoded.py +++ b/var/www/modules/hashDecoded/Flask_hashDecoded.py @@ -101,7 +101,8 @@ def all_hash_search(): date_to = request.form.get('date_to') type = request.form.get('type') encoding = request.form.get('encoding') - return redirect(url_for('hashDecoded.hashDecoded_page', date_from=date_from, date_to=date_to, type=type, encoding=encoding)) + show_decoded_files = request.form.get('show_decoded_files') + return redirect(url_for('hashDecoded.hashDecoded_page', date_from=date_from, date_to=date_to, type=type, encoding=encoding, show_decoded_files=show_decoded_files)) @hashDecoded.route("/hashDecoded/", methods=['GET']) def hashDecoded_page(): @@ -109,6 +110,7 @@ def hashDecoded_page(): date_to = request.args.get('date_to') type = request.args.get('type') encoding = request.args.get('encoding') + show_decoded_files = request.args.get('show_decoded_files') if type == 'All types': type = None @@ -161,14 +163,16 @@ def hashDecoded_page(): daily_date = None l_64 = set() - for date in date_range: - if encoding is None: - l_hash = r_serv_metadata.zrange('hash_date:' +date, 0, -1) - else: - l_hash = r_serv_metadata.zrange(encoding+'_date:' +date, 0, -1) - if l_hash: - for hash in l_hash: - l_64.add(hash) + if show_decoded_files: + show_decoded_files = True + for date in date_range: + if encoding is None: + l_hash = r_serv_metadata.zrange('hash_date:' +date, 0, -1) + else: + l_hash = r_serv_metadata.zrange(encoding+'_date:' +date, 0, -1) + if l_hash: + for hash in l_hash: + l_64.add(hash) num_day_sparkline = 6 date_range_sparkline = get_date_range(num_day_sparkline) @@ -214,7 +218,7 @@ def hashDecoded_page(): l_type = r_serv_metadata.smembers('hash_all_type') return render_template("hashDecoded.html", l_64=b64_metadata, vt_enabled=vt_enabled, l_type=l_type, type=type, daily_type_chart=daily_type_chart, daily_date=daily_date, - encoding=encoding, all_encoding=all_encoding, date_from=date_from, date_to=date_to) + encoding=encoding, all_encoding=all_encoding, date_from=date_from, date_to=date_to, show_decoded_files=show_decoded_files) @hashDecoded.route('/hashDecoded/hash_by_type') def hash_by_type(): @@ -400,6 +404,63 @@ def decoder_type_json(): to_json.append({'name': decoder, 'value': nb_decoded[decoder]}) return jsonify(to_json) +@hashDecoded.route('/hashDecoded/top5_type_json') +def top5_type_json(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + + typ = request.args.get('type') + decoder = request.args.get('encoding') + + if decoder == 'All encoding' or decoder is None: + all_decoder = r_serv_metadata.smembers('all_decoder') + else: + if not r_serv_metadata.sismember('all_decoder', decoder): + return jsonify({'Error': 'This decoder do not exist'}) + else: + all_decoder = [decoder] + + if typ == 'All types' or typ is None or typ=='None': + all_type = r_serv_metadata.smembers('hash_all_type') + else: + typ = typ.replace(' ', '+') + if not r_serv_metadata.sismember('hash_all_type', typ): + return jsonify({'Error': 'This type do not exist'}) + else: + all_type = [typ] + + date_range = [] + if date_from is not None and date_to is not None: + #change format + try: + if len(date_from) != 8: + date_from = date_from[0:4] + date_from[5:7] + date_from[8:10] + date_to = date_to[0:4] + date_to[5:7] + date_to[8:10] + date_range = substract_date(date_from, date_to) + except: + pass + + if not date_range: + date_range.append(datetime.date.today().strftime("%Y%m%d")) + + # TODO replace with ZUNIONSTORE + nb_types_decoded = {} + for date in date_range: + for typ in all_type: + for decoder in all_decoder: + nb_decoded = r_serv_metadata.zscore('{}_type:{}'.format(decoder, typ), date) + if nb_decoded is not None: + if typ in nb_types_decoded: + nb_types_decoded[typ] = nb_types_decoded[typ] + int(nb_decoded) + else: + nb_types_decoded[typ] = int(nb_decoded) + + to_json = [] + top5_types = sorted(nb_types_decoded, key=nb_types_decoded.get, reverse=True)[:5] + for typ in top5_types: + to_json.append({'name': typ, 'value': nb_types_decoded[typ]}) + return jsonify(to_json) + @hashDecoded.route('/hashDecoded/daily_type_json') def daily_type_json(): diff --git a/var/www/modules/hashDecoded/templates/hashDecoded.html b/var/www/modules/hashDecoded/templates/hashDecoded.html index 995ecad9..19ddcdaf 100644 --- a/var/www/modules/hashDecoded/templates/hashDecoded.html +++ b/var/www/modules/hashDecoded/templates/hashDecoded.html @@ -121,7 +121,14 @@ {% endif %} {% endfor %} -
+
+ +
@@ -129,6 +136,8 @@
+
+
@@ -189,10 +198,12 @@
Latest ReleaseGitHub version
Contributors
{% else %} - {% if date_from|string == date_to|string %} -

{{ date_from }}, No Hashes

- {% else %} -

{{ date_from }} to {{ date_to }}, No Hashes

+ {% if show_decoded_files %} + {% if date_from|string == date_to|string %} +

{{ date_from }}, No Hashes

+ {% else %} +

{{ date_from }} to {{ date_to }}, No Hashes

+ {% endif %} {% endif %} {% endif %} @@ -248,9 +259,12 @@ {% elif daily_type_chart %} chart.stackBarChart =barchart_type_stack("{{ url_for('hashDecoded.range_type_json') }}?date_from={{daily_date}}&date_to={{daily_date}}", 'id'); {% else %} - chart.stackBarChart = barchart_type_stack("{{ url_for('hashDecoded.range_type_json') }}?date_from={{date_from}}&date_to={{date_to}}", 'id') + chart.stackBarChart = barchart_type_stack("{{ url_for('hashDecoded.range_type_json') }}?date_from={{date_from}}&date_to={{date_to}}", 'id'); {% endif %} + draw_pie_chart("pie_chart_encoded" ,"{{ url_for('hashDecoded.decoder_type_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}", "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}&encoding="); + draw_pie_chart("pie_chart_top5_types" ,"{{ url_for('hashDecoded.top5_type_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}", "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type="); + chart.onResize(); $(window).on("resize", function() { chart.onResize(); @@ -498,79 +512,83 @@ window.chart = chart; diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index 3cc19e55..7097b57a 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -19,6 +19,7 @@ import Flask_config app = Flask_config.app cfg = Flask_config.cfg baseUrl = Flask_config.baseUrl +r_cache = Flask_config.r_cache r_serv_onion = Flask_config.r_serv_onion r_serv_metadata = Flask_config.r_serv_metadata bootstrap_label = Flask_config.bootstrap_label @@ -90,7 +91,11 @@ def hiddenServices_page(): metadata_onion = {} metadata_onion['domain'] = onion metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check') + if metadata_onion['last_check'] is None: + metadata_onion['last_check'] = '********' metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen') + if metadata_onion['first_seen'] is None: + metadata_onion['first_seen'] = '********' if get_onion_status(onion, metadata_onion['last_check']): metadata_onion['status_text'] = 'UP' metadata_onion['status_color'] = 'Green' @@ -101,7 +106,71 @@ def hiddenServices_page(): metadata_onion['status_icon'] = 'fa-times-circle' list_onion.append(metadata_onion) - return render_template("hiddenServices.html", last_onions=list_onion, statDomains=statDomains) + crawler_metadata=[] + all_onion_crawler = r_cache.smembers('all_crawler:onion') + for crawler in all_onion_crawler: + crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain') + started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time') + status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status') + crawler_info = '{} - {}'.format(crawler, started_time) + if status_info=='Waiting' or status_info=='Crawling': + status=True + else: + status=False + crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status}) + + date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8]) + return render_template("hiddenServices.html", last_onions=list_onion, statDomains=statDomains, + crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string) + +@hiddenServices.route("/hiddenServices/last_crawled_domains_with_stats_json", methods=['GET']) +def last_crawled_domains_with_stats_json(): + last_onions = r_serv_onion.lrange('last_onion', 0 ,-1) + list_onion = [] + + now = datetime.datetime.now() + date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d")) + statDomains = {} + statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date)) + statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date)) + statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] + statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue') + + for onion in last_onions: + metadata_onion = {} + metadata_onion['domain'] = onion + metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check') + if metadata_onion['last_check'] is None: + metadata_onion['last_check'] = '********' + metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen') + if metadata_onion['first_seen'] is None: + metadata_onion['first_seen'] = '********' + if get_onion_status(onion, metadata_onion['last_check']): + metadata_onion['status_text'] = 'UP' + metadata_onion['status_color'] = 'Green' + metadata_onion['status_icon'] = 'fa-check-circle' + else: + metadata_onion['status_text'] = 'DOWN' + metadata_onion['status_color'] = 'Red' + metadata_onion['status_icon'] = 'fa-times-circle' + list_onion.append(metadata_onion) + + crawler_metadata=[] + all_onion_crawler = r_cache.smembers('all_crawler:onion') + for crawler in all_onion_crawler: + crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain') + started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time') + status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status') + crawler_info = '{} - {}'.format(crawler, started_time) + if status_info=='Waiting' or status_info=='Crawling': + status=True + else: + status=False + crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status}) + + date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8]) + + return jsonify({'last_onions': list_onion, 'statDomains': statDomains, 'crawler_metadata':crawler_metadata}) @hiddenServices.route("/hiddenServices/get_onions_by_daterange", methods=['POST']) def get_onions_by_daterange(): @@ -199,8 +268,12 @@ def onion_domain(): # # TODO: FIXME return 404 last_check = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'last_check') + if last_check is None: + last_check = '********' last_check = '{}/{}/{}'.format(last_check[0:4], last_check[4:6], last_check[6:8]) first_seen = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'first_seen') + if first_seen is None: + first_seen = '********' first_seen = '{}/{}/{}'.format(first_seen[0:4], first_seen[4:6], first_seen[6:8]) origin_paste = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'paste_parent') diff --git a/var/www/modules/hiddenServices/templates/hiddenServices.html b/var/www/modules/hiddenServices/templates/hiddenServices.html index b781a26a..58b5937f 100644 --- a/var/www/modules/hiddenServices/templates/hiddenServices.html +++ b/var/www/modules/hiddenServices/templates/hiddenServices.html @@ -66,7 +66,7 @@ Status - + {% for metadata_onion in last_onions %} {{ metadata_onion['domain'] }} @@ -142,7 +142,6 @@ -
Domains Crawled Today @@ -152,41 +151,81 @@ -
- - Domains UP -
+ +
+ + Domains UP +
+
-
- {{ statDomains['domains_up'] }} -
+ +
+
{{ statDomains['domains_up'] }}
+
+
-
- - Domains DOWN -
+ +
+ + Domains DOWN +
+
-
- {{ statDomains['domains_down'] }} -
+ +
+
{{ statDomains['domains_down'] }}
+
+
- Crawled Domains - {{ statDomains['total'] }} + + + Crawled Domains + + + + +
{{ statDomains['total'] }}
+
+ Domains in Queue - {{ statDomains['domains_queue'] }} +
{{ statDomains['domains_queue'] }}
+ +
+
+ Crawlers Status +
+ + + {% for crawler in crawler_metadata %} + + + + + + {% endfor %} + +
+ {{crawler['crawler_info']}} + + {{crawler['crawling_domain']}} + + {{crawler['status_info']}} +
+
+
@@ -196,6 +235,7 @@ + + diff --git a/var/www/update_thirdparty.sh b/var/www/update_thirdparty.sh index 47fd4ecc..317de4e4 100755 --- a/var/www/update_thirdparty.sh +++ b/var/www/update_thirdparty.sh @@ -60,10 +60,10 @@ wget https://cdn.datatables.net/plug-ins/1.10.7/integration/bootstrap/3/dataTabl wget https://cdn.datatables.net/plug-ins/1.10.7/integration/bootstrap/3/dataTables.bootstrap.js -O ./static/js/dataTables.bootstrap.js #Ressource for graph -wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.js -O ./static/js/jquery.flot.js -wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.pie.js -O ./static/js/jquery.flot.pie.js -wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.time.js -O ./static/js/jquery.flot.time.js -wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.stack.js -O ./static/js/jquery.flot.stack.js +wget https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb6109df5c1e8003/jquery.flot.js -O ./static/js/jquery.flot.js +wget https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb6109df5c1e8003/jquery.flot.pie.js -O ./static/js/jquery.flot.pie.js +wget https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb6109df5c1e8003/jquery.flot.time.js -O ./static/js/jquery.flot.time.js +wget https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb6109df5c1e8003/jquery.flot.stack.js -O ./static/js/jquery.flot.stack.js #Ressources for sparkline and canvasJS and slider wget http://omnipotent.net/jquery.sparkline/2.1.2/jquery.sparkline.min.js -O ./static/js/jquery.sparkline.min.js @@ -83,8 +83,12 @@ pushd static/image wget https://www.circl.lu/assets/images/logos/AIL.png -O AIL.png popd -#active virtualenv -source ./../../AILENV/bin/activate +if ! [[ -n "$AIL_HOME" ]] +then + #active virtualenv + source ./../../AILENV/bin/activate +fi + #Update MISP Taxonomies and Galaxies python3 -m pip install git+https://github.com/MISP/PyTaxonomies --upgrade python3 -m pip install git+https://github.com/MISP/PyMISPGalaxies --upgrade