diff --git a/HOWTO.md b/HOWTO.md index 3fe312ba..629e8fa8 100644 --- a/HOWTO.md +++ b/HOWTO.md @@ -51,9 +51,9 @@ If you want to add a new webpage for a module in AIL, follow these simple steps: 3. Edit the created html files under the template folder as well as the Flask_* python script so that they fit your needs. -4. You can change the order of your module in the top navigation header in the file [./var/www/templates/header_base.html](./var/www/templates/header_base.html) +4. You can change the order of your module in the top navigation header in the file [./var/www/templates/header_base.html](./var/www/templates/header_base.html) -5. You can ignore module, and so, not display them in the top navigation header by adding the module name in the file [./var/www/templates/ignored_modules.txt](./var/www/templates/ignored_modules.txt) +5. You can ignore module, and so, not display them in the top navigation header by adding the module name in the file [./var/www/templates/ignored_modules.txt](./var/www/templates/ignored_modules.txt) How to contribute a module -------------------------- @@ -100,7 +100,8 @@ In AIL, you can track terms, set of terms and even regexes without creating a de Crawler --------------------- -In AIL, you can crawl hidden services. + +In AIL, you can crawl Tor hidden services. Don't forget to review the proxy configuration of your Tor client and especially if you enabled the SOCKS5 proxy and binding on the appropriate IP address reachable via the dockers where Splash runs. There are two types of installation. You can install a *local* or a *remote* Splash server. ``(Splash host) = the server running the splash service`` @@ -110,7 +111,7 @@ There are two types of installation. You can install a *local* or a *remote* Spl 1. *(Splash host)* Launch ``crawler_hidden_services_install.sh`` to install all requirements (type ``y`` if a localhost splah server is used or use the ``-y`` option) -2. *(Splash host)* To install and setup your tor proxy: +2. *(Splash host)* To install and setup your tor proxy: - Install the tor proxy: ``sudo apt-get install tor -y`` (Not required if ``Splah host == AIL host`` - The tor proxy is installed by default in AIL) - Add the following line ``SOCKSPolicy accept 172.17.0.0/16`` in ``/etc/tor/torrc`` @@ -126,7 +127,7 @@ There are two types of installation. You can install a *local* or a *remote* Spl ### Starting the scripts -- *(Splash host)* Launch all Splash servers with: +- *(Splash host)* Launch all Splash servers with: ```sudo ./bin/torcrawler/launch_splash_crawler.sh -f -p -n ``` With ```` and ```` matching those specified at ``splash_onion_port`` in the configuration file of point 3 (``/bin/packages/config.cfg``) @@ -137,6 +138,7 @@ All Splash dockers are launched inside the ``Docker_Splash`` screen. You can use ### TL;DR - Local setup + #### Installation - ```crawler_hidden_services_install.sh -y``` - Add the following line in ``SOCKSPolicy accept 172.17.0.0/16`` in ``/etc/tor/torrc`` diff --git a/README.md b/README.md index 83bb0b86..72166c58 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,11 @@ Features * Automatic paste export at detection on [MISP](https://github.com/MISP/MISP) (events) and [The Hive](https://github.com/TheHive-Project/TheHive) (alerts) on selected tags * Extracted and decoded files can be searched by date range, type of file (mime-type) and encoding discovered * Graph relationships between decoded file (hashes) +* Tor hidden services crawler to crawl and parse output +* Tor onion availability is monitored to detect up and down of hidden services +* Browser hidden services are screenshot and integrated in the analysed output including a blurring screenshot interface (to avoid "burning the eyes" of the security analysis with specific content) +* Tor hidden services is part of the standard framework, all the AIL modules are available to the crawled hidden services + Installation ------------ @@ -100,7 +105,7 @@ curl https://get.docker.com | /bin/bash 2. Type these commands to build the Docker image: ```bash -git clone https://github.com/CIRCL/ail-framework +git clone https://github.com/CIRCL/AIL-framework.git cd AIL-framework docker build -t ail-framework . ``` @@ -163,6 +168,12 @@ Privacy and GDPR Screenshots =========== + +Tor hidden service crawler +-------------------------- + +![Tor hidden service](./doc/screenshots/ail-bitcoinmixer.png?raw=true "Tor hidden service crawler") + Trending charts --------------- diff --git a/bin/NotificationHelper.py b/bin/NotificationHelper.py index 6dad63c7..d8f7fe92 100755 --- a/bin/NotificationHelper.py +++ b/bin/NotificationHelper.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # -*-coding:UTF-8 -* +import argparse import configparser import os import smtplib @@ -21,6 +22,7 @@ TrackedTermsNotificationEnabled_Name = "TrackedNotifications" # Keys will be e.g. TrackedNotificationEmails TrackedTermsNotificationEmailsPrefix_Name = "TrackedNotificationEmails_" + def sendEmailNotification(recipient, alert_name, content): if not os.path.exists(configfile): @@ -69,19 +71,24 @@ def sendEmailNotification(recipient, alert_name, content): else: smtp_server = smtplib.SMTP(sender_host, sender_port) - mime_msg = MIMEMultipart() mime_msg['From'] = sender mime_msg['To'] = recipient - mime_msg['Subject'] = "AIL Framework "+ alert_name + " Alert" + mime_msg['Subject'] = "AIL Framework " + alert_name + " Alert" body = content mime_msg.attach(MIMEText(body, 'plain')) smtp_server.sendmail(sender, recipient, mime_msg.as_string()) smtp_server.quit() - print('Send notification '+ alert_name + ' to '+recipient) + print('Send notification ' + alert_name + ' to '+recipient) except Exception as e: print(str(e)) # raise e + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Test notification sender.') + parser.add_argument("addr", help="Test mail 'to' address") + args = parser.parse_args() + sendEmailNotification(args.addr, '_mail test_', 'Success.') diff --git a/bin/RegexForTermsFrequency.py b/bin/RegexForTermsFrequency.py index ecca8e4d..fae7a03a 100755 --- a/bin/RegexForTermsFrequency.py +++ b/bin/RegexForTermsFrequency.py @@ -9,35 +9,43 @@ supplied in the term webpage. import redis import time from pubsublogger import publisher -from packages import lib_words from packages import Paste -import os -from os import environ -import datetime import calendar import re +import signal +import time from Helper import Process - # Email notifications from NotificationHelper import * + +class TimeoutException(Exception): + pass + + +def timeout_handler(signum, frame): + raise TimeoutException + +signal.signal(signal.SIGALRM, timeout_handler) + # Config Variables -DICO_REFRESH_TIME = 60 #s +DICO_REFRESH_TIME = 60 # s BlackListTermsSet_Name = "BlackListSetTermSet" TrackedTermsSet_Name = "TrackedSetTermSet" TrackedRegexSet_Name = "TrackedRegexSet" -top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set +top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set oneDay = 60*60*24 top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] top_termFreq_setName_week = ["TopTermFreq_set_week", 7] top_termFreq_setName_month = ["TopTermFreq_set_month", 31] -top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] +top_termFreq_set_array = [top_termFreq_setName_day, top_termFreq_setName_week, top_termFreq_setName_month] # create direct link in mail full_paste_url = "/showsavedpaste/?paste=" + def refresh_dicos(): dico_regex = {} dico_regexname_to_redis = {} @@ -53,6 +61,7 @@ if __name__ == "__main__": config_section = 'RegexForTermsFrequency' p = Process(config_section) + max_execution_time = p.config.getint(config_section, "max_execution_time") # REDIS # server_term = redis.StrictRedis( @@ -67,7 +76,7 @@ if __name__ == "__main__": # create direct link in mail full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url - #compile the regex + # compile the regex dico_refresh_cooldown = time.time() dico_regex, dico_regexname_to_redis = refresh_dicos() @@ -87,13 +96,22 @@ if __name__ == "__main__": timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) curr_set = top_termFreq_setName_day[0] + str(timestamp) - content = Paste.Paste(filename).get_p_content() + paste = Paste.Paste(filename) + content = paste.get_p_content() - #iterate the word with the regex + # iterate the word with the regex for regex_str, compiled_regex in dico_regex.items(): - matched = compiled_regex.search(content) - if matched is not None: #there is a match + signal.alarm(max_execution_time) + try: + matched = compiled_regex.search(content) + except TimeoutException: + print ("{0} processing timeout".format(paste.p_path)) + continue + else: + signal.alarm(0) + + if matched is not None: # there is a match print('regex matched {}'.format(regex_str)) matched = matched.group(0) regex_str_complete = "/" + regex_str + "/" @@ -104,8 +122,8 @@ if __name__ == "__main__": # create mail body mail_body = ("AIL Framework,\n" - "New occurrence for regex: " + regex_str + "\n" - ''+full_paste_url + filename) + "New occurrence for regex: " + regex_str + "\n" + ''+full_paste_url + filename) # Send to every associated email adress for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + regex_str_complete): @@ -115,9 +133,9 @@ if __name__ == "__main__": new_to_the_set = server_term.sadd(set_name, filename) new_to_the_set = True if new_to_the_set == 1 else False - #consider the num of occurence of this term + # consider the num of occurence of this term regex_value = int(server_term.hincrby(timestamp, dico_regexname_to_redis[regex_str], int(1))) - #1 term per paste + # 1 term per paste if new_to_the_set: regex_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), dico_regexname_to_redis[regex_str], int(1))) server_term.zincrby("per_paste_" + curr_set, dico_regexname_to_redis[regex_str], float(1)) diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index b459f011..fbe4f6f3 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -99,6 +99,9 @@ operation_mode = 3 #Define the time that a paste will be considerate duplicate. in seconds (1day = 86400) ttl_duplicate = 86400 +[RegexForTermsFrequency] +max_execution_time = 60 + ##### Redis ##### [Redis_Cache] host = localhost diff --git a/bin/torcrawler/blacklist_onion.txt b/bin/torcrawler/blacklist_onion.txt index a96b0bb8..15dfa0de 100644 --- a/bin/torcrawler/blacklist_onion.txt +++ b/bin/torcrawler/blacklist_onion.txt @@ -1 +1,5 @@ www.facebookcorewwwi.onion +facebookcorewwwi.onion +graylady3jvrrxbe.onion +expyuzz4wqqyqhjn.onion +dccbbv6cooddgcrq.onion diff --git a/doc/screenshots/ail-bitcoinmixer.png b/doc/screenshots/ail-bitcoinmixer.png new file mode 100644 index 00000000..8339c4d9 Binary files /dev/null and b/doc/screenshots/ail-bitcoinmixer.png differ