This commit is contained in:
Terrtia 2018-10-24 15:16:41 +02:00
commit 5afbc3179c
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
7 changed files with 71 additions and 26 deletions

View file

@ -100,7 +100,8 @@ In AIL, you can track terms, set of terms and even regexes without creating a de
Crawler Crawler
--------------------- ---------------------
In AIL, you can crawl hidden services.
In AIL, you can crawl Tor hidden services. Don't forget to review the proxy configuration of your Tor client and especially if you enabled the SOCKS5 proxy and binding on the appropriate IP address reachable via the dockers where Splash runs.
There are two types of installation. You can install a *local* or a *remote* Splash server. There are two types of installation. You can install a *local* or a *remote* Splash server.
``(Splash host) = the server running the splash service`` ``(Splash host) = the server running the splash service``
@ -137,6 +138,7 @@ All Splash dockers are launched inside the ``Docker_Splash`` screen. You can use
### TL;DR - Local setup ### TL;DR - Local setup
#### Installation #### Installation
- ```crawler_hidden_services_install.sh -y``` - ```crawler_hidden_services_install.sh -y```
- Add the following line in ``SOCKSPolicy accept 172.17.0.0/16`` in ``/etc/tor/torrc`` - Add the following line in ``SOCKSPolicy accept 172.17.0.0/16`` in ``/etc/tor/torrc``

View file

@ -56,6 +56,11 @@ Features
* Automatic paste export at detection on [MISP](https://github.com/MISP/MISP) (events) and [The Hive](https://github.com/TheHive-Project/TheHive) (alerts) on selected tags * Automatic paste export at detection on [MISP](https://github.com/MISP/MISP) (events) and [The Hive](https://github.com/TheHive-Project/TheHive) (alerts) on selected tags
* Extracted and decoded files can be searched by date range, type of file (mime-type) and encoding discovered * Extracted and decoded files can be searched by date range, type of file (mime-type) and encoding discovered
* Graph relationships between decoded file (hashes) * Graph relationships between decoded file (hashes)
* Tor hidden services crawler to crawl and parse output
* Tor onion availability is monitored to detect up and down of hidden services
* Browser hidden services are screenshot and integrated in the analysed output including a blurring screenshot interface (to avoid "burning the eyes" of the security analysis with specific content)
* Tor hidden services is part of the standard framework, all the AIL modules are available to the crawled hidden services
Installation Installation
------------ ------------
@ -100,7 +105,7 @@ curl https://get.docker.com | /bin/bash
2. Type these commands to build the Docker image: 2. Type these commands to build the Docker image:
```bash ```bash
git clone https://github.com/CIRCL/ail-framework git clone https://github.com/CIRCL/AIL-framework.git
cd AIL-framework cd AIL-framework
docker build -t ail-framework . docker build -t ail-framework .
``` ```
@ -163,6 +168,12 @@ Privacy and GDPR
Screenshots Screenshots
=========== ===========
Tor hidden service crawler
--------------------------
![Tor hidden service](./doc/screenshots/ail-bitcoinmixer.png?raw=true "Tor hidden service crawler")
Trending charts Trending charts
--------------- ---------------

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import argparse
import configparser import configparser
import os import os
import smtplib import smtplib
@ -21,6 +22,7 @@ TrackedTermsNotificationEnabled_Name = "TrackedNotifications"
# Keys will be e.g. TrackedNotificationEmails<TERMNAME> # Keys will be e.g. TrackedNotificationEmails<TERMNAME>
TrackedTermsNotificationEmailsPrefix_Name = "TrackedNotificationEmails_" TrackedTermsNotificationEmailsPrefix_Name = "TrackedNotificationEmails_"
def sendEmailNotification(recipient, alert_name, content): def sendEmailNotification(recipient, alert_name, content):
if not os.path.exists(configfile): if not os.path.exists(configfile):
@ -69,19 +71,24 @@ def sendEmailNotification(recipient, alert_name, content):
else: else:
smtp_server = smtplib.SMTP(sender_host, sender_port) smtp_server = smtplib.SMTP(sender_host, sender_port)
mime_msg = MIMEMultipart() mime_msg = MIMEMultipart()
mime_msg['From'] = sender mime_msg['From'] = sender
mime_msg['To'] = recipient mime_msg['To'] = recipient
mime_msg['Subject'] = "AIL Framework "+ alert_name + " Alert" mime_msg['Subject'] = "AIL Framework " + alert_name + " Alert"
body = content body = content
mime_msg.attach(MIMEText(body, 'plain')) mime_msg.attach(MIMEText(body, 'plain'))
smtp_server.sendmail(sender, recipient, mime_msg.as_string()) smtp_server.sendmail(sender, recipient, mime_msg.as_string())
smtp_server.quit() smtp_server.quit()
print('Send notification '+ alert_name + ' to '+recipient) print('Send notification ' + alert_name + ' to '+recipient)
except Exception as e: except Exception as e:
print(str(e)) print(str(e))
# raise e # raise e
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Test notification sender.')
parser.add_argument("addr", help="Test mail 'to' address")
args = parser.parse_args()
sendEmailNotification(args.addr, '_mail test_', 'Success.')

View file

@ -9,20 +9,27 @@ supplied in the term webpage.
import redis import redis
import time import time
from pubsublogger import publisher from pubsublogger import publisher
from packages import lib_words
from packages import Paste from packages import Paste
import os
from os import environ
import datetime
import calendar import calendar
import re import re
import signal
import time
from Helper import Process from Helper import Process
# Email notifications # Email notifications
from NotificationHelper import * from NotificationHelper import *
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
# Config Variables # Config Variables
DICO_REFRESH_TIME = 60 #s DICO_REFRESH_TIME = 60 # s
BlackListTermsSet_Name = "BlackListSetTermSet" BlackListTermsSet_Name = "BlackListSetTermSet"
TrackedTermsSet_Name = "TrackedSetTermSet" TrackedTermsSet_Name = "TrackedSetTermSet"
@ -33,11 +40,12 @@ oneDay = 60*60*24
top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] top_termFreq_setName_day = ["TopTermFreq_set_day_", 1]
top_termFreq_setName_week = ["TopTermFreq_set_week", 7] top_termFreq_setName_week = ["TopTermFreq_set_week", 7]
top_termFreq_setName_month = ["TopTermFreq_set_month", 31] top_termFreq_setName_month = ["TopTermFreq_set_month", 31]
top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] top_termFreq_set_array = [top_termFreq_setName_day, top_termFreq_setName_week, top_termFreq_setName_month]
# create direct link in mail # create direct link in mail
full_paste_url = "/showsavedpaste/?paste=" full_paste_url = "/showsavedpaste/?paste="
def refresh_dicos(): def refresh_dicos():
dico_regex = {} dico_regex = {}
dico_regexname_to_redis = {} dico_regexname_to_redis = {}
@ -53,6 +61,7 @@ if __name__ == "__main__":
config_section = 'RegexForTermsFrequency' config_section = 'RegexForTermsFrequency'
p = Process(config_section) p = Process(config_section)
max_execution_time = p.config.getint(config_section, "max_execution_time")
# REDIS # # REDIS #
server_term = redis.StrictRedis( server_term = redis.StrictRedis(
@ -67,7 +76,7 @@ if __name__ == "__main__":
# create direct link in mail # create direct link in mail
full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url
#compile the regex # compile the regex
dico_refresh_cooldown = time.time() dico_refresh_cooldown = time.time()
dico_regex, dico_regexname_to_redis = refresh_dicos() dico_regex, dico_regexname_to_redis = refresh_dicos()
@ -87,13 +96,22 @@ if __name__ == "__main__":
timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0))
curr_set = top_termFreq_setName_day[0] + str(timestamp) curr_set = top_termFreq_setName_day[0] + str(timestamp)
content = Paste.Paste(filename).get_p_content() paste = Paste.Paste(filename)
content = paste.get_p_content()
#iterate the word with the regex # iterate the word with the regex
for regex_str, compiled_regex in dico_regex.items(): for regex_str, compiled_regex in dico_regex.items():
matched = compiled_regex.search(content)
if matched is not None: #there is a match signal.alarm(max_execution_time)
try:
matched = compiled_regex.search(content)
except TimeoutException:
print ("{0} processing timeout".format(paste.p_path))
continue
else:
signal.alarm(0)
if matched is not None: # there is a match
print('regex matched {}'.format(regex_str)) print('regex matched {}'.format(regex_str))
matched = matched.group(0) matched = matched.group(0)
regex_str_complete = "/" + regex_str + "/" regex_str_complete = "/" + regex_str + "/"
@ -115,9 +133,9 @@ if __name__ == "__main__":
new_to_the_set = server_term.sadd(set_name, filename) new_to_the_set = server_term.sadd(set_name, filename)
new_to_the_set = True if new_to_the_set == 1 else False new_to_the_set = True if new_to_the_set == 1 else False
#consider the num of occurence of this term # consider the num of occurence of this term
regex_value = int(server_term.hincrby(timestamp, dico_regexname_to_redis[regex_str], int(1))) regex_value = int(server_term.hincrby(timestamp, dico_regexname_to_redis[regex_str], int(1)))
#1 term per paste # 1 term per paste
if new_to_the_set: if new_to_the_set:
regex_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), dico_regexname_to_redis[regex_str], int(1))) regex_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), dico_regexname_to_redis[regex_str], int(1)))
server_term.zincrby("per_paste_" + curr_set, dico_regexname_to_redis[regex_str], float(1)) server_term.zincrby("per_paste_" + curr_set, dico_regexname_to_redis[regex_str], float(1))

View file

@ -99,6 +99,9 @@ operation_mode = 3
#Define the time that a paste will be considerate duplicate. in seconds (1day = 86400) #Define the time that a paste will be considerate duplicate. in seconds (1day = 86400)
ttl_duplicate = 86400 ttl_duplicate = 86400
[RegexForTermsFrequency]
max_execution_time = 60
##### Redis ##### ##### Redis #####
[Redis_Cache] [Redis_Cache]
host = localhost host = localhost

View file

@ -1 +1,5 @@
www.facebookcorewwwi.onion www.facebookcorewwwi.onion
facebookcorewwwi.onion
graylady3jvrrxbe.onion
expyuzz4wqqyqhjn.onion
dccbbv6cooddgcrq.onion

Binary file not shown.

After

Width:  |  Height:  |  Size: 360 KiB