diff --git a/.gitignore b/.gitignore index 6973080f..2d276111 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ var/www/submitted # Local config bin/packages/config.cfg +bin/packages/config.cfg.backup configs/keys files @@ -41,3 +42,4 @@ doc/all_modules.txt # auto generated doc/module-data-flow.png doc/data-flow.png +doc/statistics diff --git a/Dockerfile b/Dockerfile index cdd26d6c..71318ba4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,4 +23,7 @@ WORKDIR /opt/AIL # Default to UTF-8 file.encoding ENV LANG C.UTF-8 -CMD bash docker_start.sh +RUN ./pystemon/install.sh + +COPY docker_start.sh /docker_start.sh +ENTRYPOINT ["/bin/bash", "docker_start.sh"] diff --git a/OVERVIEW.md b/OVERVIEW.md index b62a063f..effb387d 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -23,6 +23,15 @@ Redis and ARDB overview ARDB overview --------------------------- +ARDB_DB +* DB 1 - Curve +* DB 2 - TermFreq +* DB 3 - Trending +* DB 4 - Sentiment +* DB 5 - TermCred +* DB 6 - Tags +* DB 7 - Metadata +* DB 8 - Statistics * DB 7 - Metadata: ----------------------------------------- BASE64 ---------------------------------------- @@ -40,7 +49,7 @@ ARDB overview SET - 'all_decoder' decoder* - SET - 'hash_all_type' hash_type * + SET - 'hash_all_type' hash_type * SET - 'hash_base64_all_type' hash_type * SET - 'hash_binary_all_type' hash_type * @@ -62,4 +71,3 @@ ARDB overview GET - 'base64_decoded:'+date nd_decoded GET - 'binary_decoded:'+date nd_decoded - diff --git a/bin/BankAccount.py b/bin/BankAccount.py index 58fa3e64..06e86d06 100755 --- a/bin/BankAccount.py +++ b/bin/BankAccount.py @@ -11,6 +11,8 @@ It apply IBAN regexes on paste content and warn if above a threshold. import redis import time +import redis +import datetime import re import string from itertools import chain @@ -54,11 +56,13 @@ def check_all_iban(l_iban, paste, filename): iban = ''.join(e for e in iban if e.isalnum()) #iban = iban.upper() res = iban_regex_verify.findall(iban) + date = datetime.datetime.now().strftime("%Y%m") if res: print('checking '+iban) if is_valid_iban(iban): print('------') nb_valid_iban = nb_valid_iban + 1 + server_statistics.hincrby('iban_by_country:'+date, iban[0:2], 1) if(nb_valid_iban > 0): to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) @@ -79,6 +83,13 @@ if __name__ == "__main__": p = Process(config_section) max_execution_time = p.config.getint("BankAccount", "max_execution_time") + # ARDB # + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + publisher.info("BankAccount started") message = p.get_from_set() diff --git a/bin/Base64.py b/bin/Base64.py deleted file mode 100755 index e8b3fbc5..00000000 --- a/bin/Base64.py +++ /dev/null @@ -1,181 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" - Base64 module - - Dectect Base64 and decode it -""" -import time -import os -import datetime -import redis - -from pubsublogger import publisher - -from Helper import Process -from packages import Paste - -import re -import base64 -from hashlib import sha1 -import magic -import json - -import signal - -class TimeoutException(Exception): - pass - -def timeout_handler(signum, frame): - raise TimeoutException - -signal.signal(signal.SIGALRM, timeout_handler) - - -def search_base64(content, message, date): - find = False - base64_list = re.findall(regex_base64, content) - if(len(base64_list) > 0): - - for b64 in base64_list: - if len(b64) >= 40 : - decode = base64.b64decode(b64) - print(decode) - - type = magic.from_buffer(decode, mime=True) - #print(type) - #print(decode) - - find = True - hash = sha1(decode).hexdigest() - print(message) - print(hash) - - data = {} - data['name'] = hash - data['date'] = datetime.datetime.now().strftime("%d/%m/%y") - data['origin'] = message - data['estimated type'] = type - json_data = json.dumps(data) - - date_paste = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8]) - date_key = date[0:4] + date[4:6] + date[6:8] - - serv_metadata.zincrby('base64_date:'+date_key, hash, 1) - - # first time we see this hash - if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'): - serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_paste) - serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) - else: - serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) - - # first time we see this file on this paste - if serv_metadata.zscore('base64_hash:'+hash, message) is None: - print('first') - serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1) - - serv_metadata.sadd('base64_paste:'+message, hash) # paste - hash map - serv_metadata.zincrby('base64_hash:'+hash, message, 1)# hash - paste map - - # create hash metadata - serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type) - serv_metadata.sadd('hash_all_type', type) - serv_metadata.sadd('hash_base64_all_type', type) - serv_metadata.zincrby('base64_type:'+type, date_key, 1) - - save_base64_as_file(decode, type, hash, json_data, id) - print('found {} '.format(type)) - # duplicate - else: - serv_metadata.zincrby('base64_hash:'+hash, message, 1) # number of b64 on this paste - - if(find): - publisher.warning('base64 decoded') - #Send to duplicate - p.populate_set_out(message, 'Duplicate') - #send to Browse_warning_paste - msg = ('base64;{}'.format(message)) - p.populate_set_out( msg, 'alertHandler') - - msg = 'infoleak:automatic-detection="base64";{}'.format(message) - p.populate_set_out(msg, 'Tags') - -def save_base64_as_file(decode, type, hash, json_data, id): - - local_filename_b64 = os.path.join(p.config.get("Directories", "base64"), type, hash[:2], hash) - filename_b64 = os.path.join(os.environ['AIL_HOME'], local_filename_b64) - - filename_json = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "base64"), type, hash[:2], hash + '.json') - - dirname = os.path.dirname(filename_b64) - if not os.path.exists(dirname): - os.makedirs(dirname) - - with open(filename_b64, 'wb') as f: - f.write(decode) - - # create hash metadata - serv_metadata.hset('metadata_hash:'+hash, 'saved_path', local_filename_b64) - serv_metadata.hset('metadata_hash:'+hash, 'size', os.path.getsize(filename_b64)) - - with open(filename_json, 'w') as f: - f.write(json_data) - - - - -if __name__ == '__main__': - # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) - # Port of the redis instance used by pubsublogger - publisher.port = 6380 - # Script is the default channel used for the modules. - publisher.channel = 'Script' - - # Section name in bin/packages/modules.cfg - config_section = 'Base64' - - # Setup the I/O queues - p = Process(config_section) - max_execution_time = p.config.getint("Base64", "max_execution_time") - - serv_metadata = redis.StrictRedis( - host=p.config.get("ARDB_Metadata", "host"), - port=p.config.getint("ARDB_Metadata", "port"), - db=p.config.getint("ARDB_Metadata", "db"), - decode_responses=True) - - # Sent to the logging a description of the module - publisher.info("Base64 started") - - regex_base64 = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)' - re.compile(regex_base64) - - # Endless loop getting messages from the input queue - while True: - # Get one message from the input queue - message = p.get_from_set() - if message is None: - - publisher.debug("{} queue is empty, waiting".format(config_section)) - time.sleep(1) - continue - - filename = message - paste = Paste.Paste(filename) - - signal.alarm(max_execution_time) - try: - # Do something with the message from the queue - #print(filename) - content = paste.get_p_content() - date = str(paste._get_p_date()) - search_base64(content,message, date) - - except TimeoutException: - p.incr_module_timeout_statistic() - print ("{0} processing timeout".format(paste.p_path)) - continue - else: - signal.alarm(0) diff --git a/bin/Binary.py b/bin/Binary.py deleted file mode 100755 index 29d6f2c5..00000000 --- a/bin/Binary.py +++ /dev/null @@ -1,184 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" - Binary module - - Dectect Binary and decode it -""" -import time -import os -import datetime -import redis - -from pubsublogger import publisher - -from Helper import Process -from packages import Paste - -import re -from hashlib import sha1 -import magic -import json - -import signal - -class TimeoutException(Exception): - pass - -def timeout_handler(signum, frame): - raise TimeoutException - -signal.signal(signal.SIGALRM, timeout_handler) - -def decode_binary_string(binary_string): - return ''.join(chr(int(s[i*8:i*8+8],2)) for i in range(len(s)//8)) - -def decode_binary(binary_string): - return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)])) - - -def search_binary(content, message, date): - find = False - binary_list = re.findall(regex_binary, content) - if(len(binary_list) > 0): - - for binary in binary_list: - if len(binary) >= 40 : - decode = decode_binary(binary) - print(message) - - type = magic.from_buffer(decode, mime=True) - print(type) - - find = True - hash = sha1(decode).hexdigest() - print(hash) - - data = {} - data['name'] = hash - data['date'] = datetime.datetime.now().strftime("%d/%m/%y") - data['origin'] = message - data['estimated type'] = type - json_data = json.dumps(data) - - date_paste = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8]) - date_key = date[0:4] + date[4:6] + date[6:8] - - serv_metadata.zincrby('binary_date:'+date_key, hash, 1) - - # first time we see this hash - if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'): - serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_paste) - serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) - else: - serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) - - # first time we see this file encoding on this paste - if serv_metadata.zscore('binary_hash:'+hash, message) is None: - print('first binary') - serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1) - - serv_metadata.sadd('binary_paste:'+message, hash) # paste - hash map - serv_metadata.zincrby('binary_hash:'+hash, message, 1)# hash - paste map - - # create hash metadata - serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type) - serv_metadata.sadd('hash_all_type', type) - serv_metadata.sadd('hash_binary_all_type', type) - serv_metadata.zincrby('binary_type:'+type, date_key, 1) - - save_binary_as_file(decode, type, hash, json_data, id) - print('found {} '.format(type)) - # duplicate - else: - serv_metadata.zincrby('binary_hash:'+hash, message, 1) # number of b64 on this paste - - if(find): - publisher.warning('binary decoded') - #Send to duplicate - p.populate_set_out(message, 'Duplicate') - #send to Browse_warning_paste - msg = ('binary;{}'.format(message)) - p.populate_set_out( msg, 'alertHandler') - - msg = 'infoleak:automatic-detection="binary";{}'.format(message) - p.populate_set_out(msg, 'Tags') - -def save_binary_as_file(decode, type, hash, json_data, id): - - local_filename_b64 = os.path.join(p.config.get("Directories", "base64"), type, hash[:2], hash) - filename_b64 = os.path.join(os.environ['AIL_HOME'], local_filename_b64) - - filename_json = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "base64"), type, hash[:2], hash + '.json') - - dirname = os.path.dirname(filename_b64) - if not os.path.exists(dirname): - os.makedirs(dirname) - - with open(filename_b64, 'wb') as f: - f.write(decode) - - # create hash metadata - serv_metadata.hset('metadata_hash:'+hash, 'saved_path', local_filename_b64) - serv_metadata.hset('metadata_hash:'+hash, 'size', os.path.getsize(filename_b64)) - - with open(filename_json, 'w') as f: - f.write(json_data) - - - - -if __name__ == '__main__': - # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) - # Port of the redis instance used by pubsublogger - publisher.port = 6380 - # Script is the default channel used for the modules. - publisher.channel = 'Script' - - # Section name in bin/packages/modules.cfg - config_section = 'Binary' - - # Setup the I/O queues - p = Process(config_section) - max_execution_time = p.config.getint("Binary", "max_execution_time") - - serv_metadata = redis.StrictRedis( - host=p.config.get("ARDB_Metadata", "host"), - port=p.config.getint("ARDB_Metadata", "port"), - db=p.config.getint("ARDB_Metadata", "db"), - decode_responses=True) - - # Sent to the logging a description of the module - publisher.info("Binary started") - - regex_binary = '[0-1]{40,}' - re.compile(regex_binary) - - # Endless loop getting messages from the input queue - while True: - # Get one message from the input queue - message = p.get_from_set() - if message is None: - - publisher.debug("{} queue is empty, waiting".format(config_section)) - time.sleep(1) - continue - - filename = message - paste = Paste.Paste(filename) - - signal.alarm(max_execution_time) - try: - # Do something with the message from the queue - #print(filename) - content = paste.get_p_content() - date = str(paste._get_p_date()) - search_binary(content,message, date) - - except TimeoutException: - p.incr_module_timeout_statistic() - print ("{0} processing timeout".format(paste.p_path)) - continue - else: - signal.alarm(0) diff --git a/bin/Credential.py b/bin/Credential.py index d1016586..7f665227 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -28,6 +28,7 @@ import sys from packages import Paste from pubsublogger import publisher from Helper import Process +import datetime import re import redis from pyfaup.faup import Faup @@ -58,6 +59,12 @@ if __name__ == "__main__": db=p.config.get("ARDB_TermCred", "db"), decode_responses=True) + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert") minTopPassList = p.config.getint("Credential", "minTopPassList") @@ -65,6 +72,7 @@ if __name__ == "__main__": #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+" regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" + while True: message = p.get_from_set() if message is None: @@ -132,6 +140,13 @@ if __name__ == "__main__": if sites_set: print("=======> Probably on : {}".format(', '.join(sites_set))) + + date = datetime.datetime.now().strftime("%Y%m") + for cred in creds: + maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0] + faup.decode(maildomains) + tld = faup.get()['tld'] + server_statistics.hincrby('credential_by_tld:'+date, tld, 1) else: publisher.info(to_print) print('found {} credentials'.format(len(creds))) diff --git a/bin/Decoder.py b/bin/Decoder.py index af385fed..abbf760b 100755 --- a/bin/Decoder.py +++ b/bin/Decoder.py @@ -103,8 +103,8 @@ def save_hash(decoder_name, message, date, decoded): serv_metadata.sadd('hash_'+ decoder_name +'_all_type', type) # first time we see this hash today - if serv_metadata.zscore('hash_date:'+date_key, hash) is None: - serv_metadata.zincrby('hash_type:'+type, date_key, 1) + #if serv_metadata.zscore('hash_date:'+date_key, hash) is None: + # serv_metadata.zincrby('hash_type:'+type, date_key, 1) # first time we see this hash encoding today if serv_metadata.zscore(decoder_name+'_date:'+date_key, hash) is None: diff --git a/bin/Helper.py b/bin/Helper.py index d90388f5..52097ef6 100755 --- a/bin/Helper.py +++ b/bin/Helper.py @@ -111,7 +111,7 @@ class PubSub(object): class Process(object): - def __init__(self, conf_section): + def __init__(self, conf_section, module=True): configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') if not os.path.exists(configfile): raise Exception('Unable to find the configuration file. \ @@ -125,23 +125,24 @@ class Process(object): self.subscriber_name = conf_section self.pubsub = None - if self.modules.has_section(conf_section): - self.pubsub = PubSub() - else: - raise Exception('Your process has to listen to at least one feed.') - self.r_temp = redis.StrictRedis( - host=self.config.get('RedisPubSub', 'host'), - port=self.config.get('RedisPubSub', 'port'), - db=self.config.get('RedisPubSub', 'db'), - decode_responses=True) + if module: + if self.modules.has_section(conf_section): + self.pubsub = PubSub() + else: + raise Exception('Your process has to listen to at least one feed.') + self.r_temp = redis.StrictRedis( + host=self.config.get('RedisPubSub', 'host'), + port=self.config.get('RedisPubSub', 'port'), + db=self.config.get('RedisPubSub', 'db'), + decode_responses=True) - self.serv_statistics = redis.StrictRedis( - host=self.config.get('ARDB_Statistics', 'host'), - port=self.config.get('ARDB_Statistics', 'port'), - db=self.config.get('ARDB_Statistics', 'db'), - decode_responses=True) + self.serv_statistics = redis.StrictRedis( + host=self.config.get('ARDB_Statistics', 'host'), + port=self.config.get('ARDB_Statistics', 'port'), + db=self.config.get('ARDB_Statistics', 'db'), + decode_responses=True) - self.moduleNum = os.getpid() + self.moduleNum = os.getpid() def populate_set_in(self): # monoproc diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 9da28a81..a7c0631d 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -28,6 +28,7 @@ isqueued=`screen -ls | egrep '[0-9]+.Queue_AIL' | cut -d. -f1` isscripted=`screen -ls | egrep '[0-9]+.Script_AIL' | cut -d. -f1` isflasked=`screen -ls | egrep '[0-9]+.Flask_AIL' | cut -d. -f1` iscrawler=`screen -ls | egrep '[0-9]+.Crawler_AIL' | cut -d. -f1` +isfeeded=`screen -ls | egrep '[0-9]+.Feeder_Pystemon' | cut -d. -f1` function helptext { echo -e $YELLOW" @@ -338,15 +339,28 @@ function launch_flask { fi } +function launch_feeder { + if [[ ! $isfeeded ]]; then + screen -dmS "Feeder_Pystemon" + sleep 0.1 + echo -e $GREEN"\t* Launching Pystemon feeder"$DEFAULT + screen -S "Feeder_Pystemon" -X screen -t "Pystemon_feeder" bash -c 'cd '${AIL_BIN}'; ./feeder/pystemon-feeder.py; read x' + sleep 0.1 + screen -S "Feeder_Pystemon" -X screen -t "Pystemon" bash -c 'cd '${AIL_HOME}/../pystemon'; python2 pystemon.py; read x' + else + echo -e $RED"\t* A Feeder screen is already launched"$DEFAULT + fi +} + function killall { - if [[ $isredis || $isardb || $islogged || $isqueued || $isscripted || $isflasked ]]; then + if [[ $isredis || $isardb || $islogged || $isqueued || $isscripted || $isflasked || $isfeeded ]]; then echo -e $GREEN"Gracefully closing redis servers"$DEFAULT shutting_down_redis; sleep 0.2 echo -e $GREEN"Gracefully closing ardb servers"$DEFAULT shutting_down_ardb; echo -e $GREEN"Killing all"$DEFAULT - kill $isredis $isardb $islogged $isqueued $isscripted $isflasked + kill $isredis $isardb $islogged $isqueued $isscripted $isflasked $isfeeded sleep 0.2 echo -e $ROSE`screen -ls`$DEFAULT echo -e $GREEN"\t* $isredis $isardb $islogged $isqueued $isscripted killed."$DEFAULT @@ -459,6 +473,8 @@ while [ "$1" != "" ]; do ;; -c | --crawler ) launching_crawler; ;; + -f | --launchFeeder ) launch_feeder; + ;; -h | --help ) helptext; exit ;; diff --git a/bin/LibInjection.py b/bin/LibInjection.py index 4ad388d5..283bba00 100755 --- a/bin/LibInjection.py +++ b/bin/LibInjection.py @@ -12,6 +12,8 @@ It tries to identify SQL Injections with libinjection. """ import time +import datetime +import redis import string import urllib.request import re @@ -54,6 +56,12 @@ def analyse(url, path): msg = 'infoleak:automatic-detection="sql-injection";{}'.format(path) p.populate_set_out(msg, 'Tags') + #statistics + tld = url_parsed['tld'] + if tld is not None: + date = datetime.datetime.now().strftime("%Y%m") + server_statistics.hincrby('SQLInjection_by_tld:'+date, tld, 1) + if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # Port of the redis instance used by pubsublogger @@ -70,6 +78,12 @@ if __name__ == '__main__': # Sent to the logging a description of the module publisher.info("Try to detect SQL injection with LibInjection") + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + faup = Faup() # Endless loop getting messages from the input queue diff --git a/bin/MISP_The_Hive_feeder.py b/bin/MISP_The_Hive_feeder.py index 7fa6b223..0a8f1791 100755 --- a/bin/MISP_The_Hive_feeder.py +++ b/bin/MISP_The_Hive_feeder.py @@ -96,7 +96,7 @@ if __name__ == "__main__": publisher.port = 6380 publisher.channel = "Script" - config_section = 'misp_the_hive_feeder' + config_section = 'MISP_The_hive_feeder' configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') if not os.path.exists(configfile): @@ -123,8 +123,6 @@ if __name__ == "__main__": if uuid_ail is None: uuid_ail = r_serv_db.set('ail:uuid', uuid.uuid4() ) - config_section = 'misp_the_hive_feeder' - p = Process(config_section) # create MISP connection if flag_misp: diff --git a/bin/Mail.py b/bin/Mail.py index c1d8cf70..1f682661 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -2,7 +2,7 @@ # -*-coding:UTF-8 -* """ -The CreditCards Module +The Mail Module ====================== This module is consuming the Redis-list created by the Categ module. @@ -12,13 +12,15 @@ It apply mail regexes on paste content and warn if above a threshold. """ import redis -import pprint import time +import datetime import dns.exception from packages import Paste from packages import lib_refine from pubsublogger import publisher +from pyfaup.faup import Faup + from Helper import Process if __name__ == "__main__": @@ -27,6 +29,8 @@ if __name__ == "__main__": config_section = 'Mail' + faup = Faup() + p = Process(config_section) addr_dns = p.config.get("Mail", "dns") @@ -36,6 +40,12 @@ if __name__ == "__main__": port=p.config.getint("Redis_Cache", "port"), db=p.config.getint("Redis_Cache", "db"), decode_responses=True) + # ARDB # + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) # FUNCTIONS # publisher.info("Suscribed to channel mails_categ") @@ -66,7 +76,6 @@ if __name__ == "__main__": PST.save_attribute_redis(channel, (MX_values[0], list(MX_values[1]))) - pprint.pprint(MX_values) to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\ format(PST.p_source, PST.p_date, PST.p_name, MX_values[0], PST.p_path) @@ -79,12 +88,22 @@ if __name__ == "__main__": msg = 'infoleak:automatic-detection="mail";{}'.format(filename) p.populate_set_out(msg, 'Tags') + #create country statistics + date = datetime.datetime.now().strftime("%Y%m") + for mail in MX_values[1]: + print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date)) + p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats') + + faup.decode(mail) + tld = faup.get()['tld'] + server_statistics.hincrby('mail_by_tld:'+date, tld, MX_values[1][mail]) + else: publisher.info(to_print) - #Send to ModuleStats + #create country statistics for mail in MX_values[1]: - print('mail;{};{};{}'.format(1, mail, PST.p_date)) - p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats') + print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date)) + p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats') prec_filename = filename diff --git a/bin/Mixer.py b/bin/Mixer.py index 96f20815..e1656b8e 100755 --- a/bin/Mixer.py +++ b/bin/Mixer.py @@ -101,6 +101,8 @@ if __name__ == '__main__': #feeder_name = ( complete_paste.replace("archive/","") ).split("/")[0] feeder_name, paste_name = complete_paste.split('>>') feeder_name.replace(" ","") + if 'import_dir' in feeder_name: + feeder_name = feeder_name.split('/')[1] paste_name = complete_paste except ValueError as e: diff --git a/bin/ModulesInformationV2.py b/bin/ModulesInformationV2.py index 36b397ca..30a24f15 100755 --- a/bin/ModulesInformationV2.py +++ b/bin/ModulesInformationV2.py @@ -724,13 +724,15 @@ def format_string(tab, padding_row): text="" for ite, elem in enumerate(the_array): - if len(elem) > padding_row[ite]: - text += "*" + elem[-padding_row[ite]+6:] - padd_off = " "*5 - else: - text += elem - padd_off = " "*0 - text += (padding_row[ite] - len(elem))*" " + padd_off + if elem is not None and type(elem) is str: + if len(elem) > padding_row[ite]: + text += "*" + elem[-padding_row[ite]+6:] + padd_off = " "*5 + else: + text += elem + padd_off = " "*0 + text += (padding_row[ite] - len(elem))*" " + padd_off + printstring.append( (text, the_pid) ) return printstring diff --git a/bin/preProcessFeed.py b/bin/PreProcessFeed.py.sample similarity index 100% rename from bin/preProcessFeed.py rename to bin/PreProcessFeed.py.sample diff --git a/bin/SQLInjectionDetection.py b/bin/SQLInjectionDetection.py index 117f3dc0..f03d7555 100755 --- a/bin/SQLInjectionDetection.py +++ b/bin/SQLInjectionDetection.py @@ -12,6 +12,8 @@ It test different possibility to makes some sqlInjection. """ import time +import datetime +import redis import string import urllib.request import re @@ -85,6 +87,13 @@ def analyse(url, path): msg = 'infoleak:automatic-detection="sql-injection";{}'.format(path) p.populate_set_out(msg, 'Tags') + + #statistics + tld = url_parsed['tld'] + if tld is not None: + date = datetime.datetime.now().strftime("%Y%m") + server_statistics.hincrby('SQLInjection_by_tld:'+date, tld, 1) + else: print("Potential SQL injection:") print(urllib.request.unquote(url)) @@ -143,6 +152,12 @@ if __name__ == '__main__': # Sent to the logging a description of the module publisher.info("Try to detect SQL injection") + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + faup = Faup() # Endless loop getting messages from the input queue diff --git a/bin/feeder/pystemon-feeder.py b/bin/feeder/pystemon-feeder.py index a59a0a5b..b398e650 100755 --- a/bin/feeder/pystemon-feeder.py +++ b/bin/feeder/pystemon-feeder.py @@ -54,21 +54,22 @@ r = redis.StrictRedis(host='localhost', db=10, decode_responses=True) # 101 pastes processed feed # 102 raw pastes feed +topic = '102' while True: time.sleep(base_sleeptime + sleep_inc) - topic = 101 paste = r.lpop("pastes") print(paste) if paste is None: continue - socket.send_string("%d %s" % (topic, paste)) - topic = 102 try: with open(pystemonpath+paste, 'rb') as f: #.read() messagedata = f.read() - socket.send_string("%d %s %s" % (topic, paste, base64.b64encode(messagedata).decode())) - sleep_inc = sleep_inc-0.01 if sleep_inc-0.01 > 0 else 0 + path_to_send = pystemonpath+paste + + s = b' '.join( [ topic.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] ) + socket.send(s) + sleep_inc = sleep_inc-0.01 if sleep_inc-0.01 > 0 else 0 except IOError as e: # file not found, could be a buffering issue -> increase sleeping time print('IOError: Increasing sleep time') diff --git a/bin/import_dir.py b/bin/import_dir.py index d8360631..6d6e4a94 100755 --- a/bin/import_dir.py +++ b/bin/import_dir.py @@ -4,6 +4,7 @@ import zmq import base64 from io import StringIO +import datetime import gzip import argparse import os @@ -13,10 +14,10 @@ import mimetypes ''' ' ' Import content/pastes into redis. -' If content is not compressed yet, compress it. +' If content is not compressed yet, compress it (only text). ' ' /!\ WARNING /!\ - Content to be imported must be placed in a directory tree of the form + Content to be imported can be placed in a directory tree of the form root/ | +-- Year/ @@ -28,6 +29,10 @@ import mimetypes +-- Content e.g.: ~/to_import/2017/08/22/paste1.gz + + or this directory tree will be created with the current date + e.g.: + ~/to_import/paste1.gz ' ''' @@ -66,36 +71,34 @@ if __name__ == "__main__": for filename in filenames: complete_path = os.path.join(dirname, filename) - #take wanted path of the file - wanted_path = os.path.realpath(complete_path) - wanted_path = wanted_path.split('/') - wanted_path = '/'.join(wanted_path[-(4+args.hierarchy):]) - - with gzip.open(complete_path, 'rb') as f: + with open(complete_path, 'rb') as f: messagedata = f.read() - #print(type(complete_path)) - #file = open(complete_path) - #messagedata = file.read() - - #if paste do not have a 'date hierarchy' ignore it - if not is_hierachy_valid(complete_path): - print('/!\ hierarchy not valid, should have the format yyyy/mm/dd/paste.gz /!\ ') - print(complete_path) - break - #verify that the data is gzipEncoded. if not compress it if 'text' in str(mimetypes.guess_type(complete_path)[0]): - out = StringIO.StringIO() - with gzip.GzipFile(fileobj=out, mode="w") as f: - f.write(messagedata) - messagedata = out.getvalue() - wanted_path += '.gz' + messagedata = gzip.compress(messagedata) + complete_path += '.gz' - print(args.name+'>'+wanted_path) - path_to_send = args.name + '>' + wanted_path - #s = b'{} {} {}'.format(args.channel, path_to_send, base64.b64encode(messagedata)) - # use bytes object - s = b' '.join( [ args.channel.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] ) - socket.send(s) - time.sleep(args.seconds) + + if complete_path[-4:] != '.gz': + + #if paste do not have a 'date hierarchy', create it + if not is_hierachy_valid(complete_path): + now = datetime.datetime.now() + paste_name = complete_path.split('/')[-1] + directory = complete_path.split('/')[-2] + wanted_path = os.path.join(directory, now.strftime("%Y"), now.strftime("%m"), now.strftime("%d"), paste_name) + else: + #take wanted path of the file + wanted_path = os.path.realpath(complete_path) + wanted_path = wanted_path.split('/') + wanted_path = '/'.join(wanted_path[-(4+args.hierarchy):]) + + path_to_send = 'import_dir/' + args.name + '>>' + wanted_path + s = b' '.join( [ args.channel.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] ) + socket.send(s) + print('import_dir/' + args.name+'>>'+wanted_path) + time.sleep(args.seconds) + + else: + print('{} : incorrect type'.format(complete_path)) diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index 5bb83d21..64b1f7f6 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -34,6 +34,8 @@ sender_port = 1337 ##### Flask ##### [Flask] +#Proxying requests to the app +baseUrl = / #Number of logs to display in the dashboard max_dashboard_logs = 15 #Maximum number of character to display in the toolip diff --git a/bin/packages/lib_refine.py b/bin/packages/lib_refine.py index 83511e40..5d2af0a9 100644 --- a/bin/packages/lib_refine.py +++ b/bin/packages/lib_refine.py @@ -38,6 +38,7 @@ def checking_MX_record(r_serv, adress_set, addr_dns): score = 0 num = len(adress_set) WalidMX = set([]) + validMX = {} # Transforming the set into a string MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower()) resolver = dns.resolver.Resolver() @@ -46,20 +47,23 @@ def checking_MX_record(r_serv, adress_set, addr_dns): resolver.lifetime = 2 if MXdomains != []: - for MXdomain in set(MXdomains): + for MXdomain in MXdomains: try: + MXdomain = MXdomain[1:] # Already in Redis living. - if r_serv.exists(MXdomain[1:]): + if r_serv.exists(MXdomain): score += 1 - WalidMX.add(MXdomain[1:]) + WalidMX.add(MXdomain) + validMX[MXdomain] = validMX.get(MXdomain, 0) + 1 # Not already in Redis else: # If I'm Walid MX domain - if resolver.query(MXdomain[1:], rdtype=dns.rdatatype.MX): + if resolver.query(MXdomain, rdtype=dns.rdatatype.MX): # Gonna be added in redis. - r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) + r_serv.setex(MXdomain, 1, timedelta(days=1)) score += 1 - WalidMX.add(MXdomain[1:]) + WalidMX.add(MXdomain) + validMX[MXdomain] = validMX.get(MXdomain, 0) + 1 else: pass @@ -86,13 +90,14 @@ def checking_MX_record(r_serv, adress_set, addr_dns): except dns.resolver.Timeout: print('timeout') - r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) + r_serv.setex(MXdomain, 1, timedelta(days=1)) except Exception as e: print(e) publisher.debug("emails before: {0} after: {1} (valid)".format(num, score)) - return (num, WalidMX) + #return (num, WalidMX) + return (num, validMX) def checking_A_record(r_serv, domains_set): diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index 14605e8c..deb5a069 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -6,9 +6,9 @@ publish = Redis_Mixer,Redis_preProcess1 subscribe = Redis_Mixer publish = Redis_Global,Redis_ModuleStats -[PreProcessFeed] -subscribe = Redis_preProcess1 -publish = Redis_Mixer +#[PreProcessFeed] +#subscribe = Redis_preProcess1 +#publish = Redis_Mixer [Duplicates] subscribe = Redis_Duplicate @@ -92,7 +92,7 @@ subscribe = Redis_alertHandler subscribe = Redis_Tags publish = Redis_Tags_feed -[misp_the_hive_feeder] +[MISP_The_hive_feeder] subscribe = Redis_Tags_feed #[send_to_queue] diff --git a/configs/keys/virusTotalKEYS.py.sample b/configs/keys/virusTotalKEYS.py.sample new file mode 100644 index 00000000..dc2a8495 --- /dev/null +++ b/configs/keys/virusTotalKEYS.py.sample @@ -0,0 +1,4 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +vt_key = 'YOUR_API_KEY' #virus total API key diff --git a/doc/presentation/ail-training-luxembourg-2018.pdf b/doc/presentation/ail-training-luxembourg-2018.pdf new file mode 100644 index 00000000..403c89d1 Binary files /dev/null and b/doc/presentation/ail-training-luxembourg-2018.pdf differ diff --git a/doc/statistics/create_graph_by_tld.py b/doc/statistics/create_graph_by_tld.py new file mode 100755 index 00000000..3ab3e1cc --- /dev/null +++ b/doc/statistics/create_graph_by_tld.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' +Create statistics pie charts by tld + +Default tld: lu +''' + +import matplotlib +matplotlib.use('Agg') +import os +import sys +import redis +import argparse +import datetime +import heapq +import operator +import matplotlib.pyplot as plt +import numpy as np + +sys.path.append(os.environ['AIL_BIN']) + +from Helper import Process + +def create_pie_chart(country ,db_key, date, pie_title, path, save_name): + + monthly_credential_by_tld = server_statistics.hkeys(db_key + date) + + l_tld = [] + for tld in monthly_credential_by_tld: + nb_tld = server_statistics.hget(db_key + date, tld) + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + l_tld.append( (tld, nb_tld) ) + + mail_tld_top5 = heapq.nlargest(5, l_tld, key=operator.itemgetter(1)) + + # Pie chart, where the slices will be ordered and plotted counter-clockwise: + labels = [] + sizes = [] + explode = [] # only "explode" the 2nd slice (i.e. 'Hogs') + explode_value = 0 + for tld in mail_tld_top5: + labels.append(tld[0] +' ('+str(tld[1])+')') + sizes.append(tld[1]) + explode.append(explode_value) + explode_value = explode_value +0.1 + + nb_tld = server_statistics.hget(db_key + date, country) + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + country_label = country + ' ('+str(nb_tld)+')' + if country_label not in labels: + labels.append(country_label) + sizes.append(nb_tld) + explode.append(explode_value) + explode = tuple(explode) + + fig1, ax1 = plt.subplots() + ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', + shadow=True, startangle=90) + ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. + + ax1.set_title(pie_title) + #plt.show() + plt.savefig(os.path.join(path,save_name)) + plt.close(fig1) + +def create_donut_chart(db_key, date, pie_title, path, save_name): + + monthly_credential_by_tld = server_statistics.hkeys(db_key + date) + print() + + l_tld = [] + for tld in monthly_credential_by_tld: + nb_tld = server_statistics.hget(db_key + date, tld) + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + l_tld.append( (tld, nb_tld) ) + + mail_tld_top5 = heapq.nlargest(5, l_tld, key=operator.itemgetter(1)) + + # Pie chart, where the slices will be ordered and plotted counter-clockwise: + recipe = [] + data = [] + for tld in mail_tld_top5: + recipe.append(tld[0]) + data.append(tld[1]) + + nb_tld = server_statistics.hget(db_key + date, country) + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + if country not in recipe: + recipe.append(country) + data.append(nb_tld) + + fig1, ax1 = plt.subplots(figsize=(6, 3), subplot_kw=dict(aspect="equal")) + + wedges, texts = ax1.pie(data, wedgeprops=dict(width=0.5), startangle=-40) + + bbox_props = dict(boxstyle="square,pad=0.3", fc="w", ec="k", lw=0.72) + kw = dict(xycoords='data', textcoords='data', arrowprops=dict(arrowstyle="-"), + bbox=bbox_props, zorder=0, va="center") + + for i, p in enumerate(wedges): + ang = (p.theta2 - p.theta1)/2. + p.theta1 + y = np.sin(np.deg2rad(ang)) + x = np.cos(np.deg2rad(ang)) + horizontalalignment = {-1: "right", 1: "left"}[int(np.sign(x))] + connectionstyle = "angle,angleA=0,angleB={}".format(ang) + kw["arrowprops"].update({"connectionstyle": connectionstyle}) + ax1.annotate(recipe[i], xy=(x, y), xytext=(1.35*np.sign(x), 1.4*y), + horizontalalignment=horizontalalignment, **kw) + + ax1.set_title(pie_title) + #plt.show() + plt.savefig(os.path.join(path, save_name)) + plt.close(fig1) + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description='''This script is a part of the Analysis Information Leak + framework. Create statistics pie charts".''', + epilog='Example: ./create_lu_graph.py 0 lu now, create_lu_graph.py 0 lu 201807') + + parser.add_argument('type', type=int, default=0, + help='''The graph type (default 0), + 0: all, + 1: credential_pie, + 2: mail_pie + 3: sqlinjection_pie, + 4: iban_pie,''', + choices=[0, 1, 2, 3, 4], action='store') + + parser.add_argument('country', type=str, default="lu", + help='''The country code, lu:default''', + action='store') + + parser.add_argument('date', type=str, default="now", + help='''month %Y%m, example: 201810''', action='store') + + args = parser.parse_args() + + path = os.path.join(os.environ['AIL_HOME'], 'doc', 'statistics') # save path + + config_section = 'ARDB_Statistics' + + p = Process(config_section, False) + + # ARDB # + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + + if args.date == 'now' or len(args.date) != 6: + date = datetime.datetime.now().strftime("%Y%m") + else: + date = args.date + + if args.type == 0: + create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png') + create_pie_chart(args.country, 'mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png') + create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: SQLInjection by tld", path, 'AIL_SQLInjection_by_tld.png') + create_pie_chart(args.country.upper(), 'iban_by_country:', date, "AIL: Iban by country", path, 'AIL_iban_by_country.png') + elif args.type == 1: + create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png') + elif args.type == 2: + create_pie_chart(args.country, 'mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png') + elif args.type == 3: + create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: sqlInjection by tld", path, 'AIL_sqlInjectionl_by_tld.png') + elif args.type == 4: + create_pie_chart(args.country.upper(), 'iban_by_country:', date, "AIL: Iban by country", path, 'AIL_iban_by_country.png') diff --git a/docker_start.sh b/docker_start.sh index 1f80ecae..c8f06010 100755 --- a/docker_start.sh +++ b/docker_start.sh @@ -1,89 +1,20 @@ -echo "Currently unmaintained, continue at your own risk of not having a working AIL at the end :(" -exit 1 +#!/bin/bash +signalListener() { + "$@" & + pid="$!" + trap "echo 'Stopping'; kill -SIGTERM $pid" SIGINT SIGTERM + + while kill -0 $pid > /dev/null 2>&1; do + wait + done +} + source ./AILENV/bin/activate cd bin +./LAUNCH.sh -l +./LAUNCH.sh -f -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_LEVELDB:$PATH -export PATH=$AIL_ARDB:$PATH -if [ -z $1 ]; then - export AILENV=/opt/AIL - else - export AILENV=$1 -fi +signalListener tail -f /dev/null $@ -conf_dir="${AIL_HOME}/configs/" - -screen -dmS "Redis" -screen -S "Redis" -X screen -t "6379" bash -c 'redis-server '$conf_dir'6379.conf ; read x' -screen -S "Redis" -X screen -t "6380" bash -c 'redis-server '$conf_dir'6380.conf ; read x' -screen -S "Redis" -X screen -t "6381" bash -c 'redis-server '$conf_dir'6381.conf ; read x' - -# For Words and curves -sleep 0.1 -screen -dmS "ARDB_AIL" -screen -S "ARDB_AIL" -X screen -t "6382" bash -c 'ardb-server '$conf_dir'6382.conf ; read x' - -#Want to launch more level_db? -lvdbhost='127.0.0.1' -lvdbdir="${AIL_HOME}/LEVEL_DB_DATA/" -db1_y='2013' -db2_y='2014' -db3_y='2016' -db4_y='2017' - -dbC_y='3016' - -nb_db=13 - -screen -dmS "LevelDB" -#Add lines here with appropriates options. -screen -S "LevelDB" -X screen -t "2013" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'2013/ -P '$db1_y' -M '$nb_db'; read x' -screen -S "LevelDB" -X screen -t "2014" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'2014/ -P '$db2_y' -M '$nb_db'; read x' -screen -S "LevelDB" -X screen -t "2016" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'2016/ -P '$db3_y' -M '$nb_db'; read x' -screen -S "LevelDB" -X screen -t "2016" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'2017/ -P '$db4_y' -M '$nb_db'; read x' - -# For Curve -screen -S "LevelDB" -X screen -t "3016" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'3016/ -P '$dbC_y' -M '$nb_db'; read x' - - -screen -dmS "Logging" -screen -S "Logging" -X screen -t "LogQueue" bash -c 'log_subscriber -p 6380 -c Queuing -l ../logs/; read x' -screen -S "Logging" -X screen -t "LogScript" bash -c 'log_subscriber -p 6380 -c Script -l ../logs/; read x' - -screen -dmS "Queue" -screen -S "Queue" -X screen -t "Queues" bash -c './launch_queues.py; read x' - -screen -dmS "Script" -screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py -k 0 -c 1; read x' -screen -S "Script" -X screen -t "Mixer" bash -c './Mixer.py; read x' -screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' -screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x' -screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x' -screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x' -screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x' -screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' -screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x' -screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x' -screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x' -screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x' -screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x' -screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' -screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' -screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x' -screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' -screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x' -screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x' -screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x' -screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x' -screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' -screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' -screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' -screen -S "Script" -X screen -t "BrowseWarningPaste" bash -c './BrowseWarningPaste.py; read x' -screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x' - -cd $AILENV -cd var/www/ -python Flask_server.py +./LAUNCH.sh -k diff --git a/installing_deps.sh b/installing_deps.sh index c29ba4b9..975830f4 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -5,7 +5,7 @@ set -x sudo apt-get update -sudo apt-get install python3-pip python-virtualenv python3-dev libfreetype6-dev \ +sudo apt-get install python3-pip python-virtualenv python3-dev python3-tk libfreetype6-dev \ screen g++ python-tk unzip libsnappy-dev cmake -y #optional tor install diff --git a/pip3_packages_requirement.txt b/pip3_packages_requirement.txt index ddf60626..cc1d0543 100644 --- a/pip3_packages_requirement.txt +++ b/pip3_packages_requirement.txt @@ -73,3 +73,6 @@ https://github.com/saffsd/langid.py/archive/master.zip #LibInjection bindings pylibinjection + +# Graph +matplotlib diff --git a/pystemon/config.cfg b/pystemon/config.cfg new file mode 100644 index 00000000..90efee74 --- /dev/null +++ b/pystemon/config.cfg @@ -0,0 +1,219 @@ +[Directories] +bloomfilters = Blooms +dicofilters = Dicos +pastes = PASTES +base64 = BASE64 + +wordtrending_csv = var/www/static/csv/wordstrendingdata +wordsfile = files/wordfile + +protocolstrending_csv = var/www/static/csv/protocolstrendingdata +protocolsfile = files/protocolsfile + +tldstrending_csv = var/www/static/csv/tldstrendingdata +tldsfile = faup/src/data/mozilla.tlds + +domainstrending_csv = var/www/static/csv/domainstrendingdata + +pystemonpath = /opt/pystemon/ + +sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt + +##### Notifications ###### +[Notifications] +ail_domain = http://localhost:7000 +sender = sender@example.com +sender_host = smtp.example.com +sender_port = 1337 + +# optional for using with authenticated SMTP over SSL +# sender_pw = securepassword + +##### Flask ##### +[Flask] +#Number of logs to display in the dashboard +max_dashboard_logs = 15 +#Maximum number of character to display in the toolip +max_preview_char = 250 +#Maximum number of character to display in the modal +max_preview_modal = 800 +#Default number of header to display in trending graphs +default_display = 10 +#Number of minutes displayed for the number of processed pastes. +minute_processed_paste = 10 +#Maximum line length authorized to make a diff between duplicates +DiffMaxLineLength = 10000 + +#### Modules #### +[BankAccount] +max_execution_time = 60 + +[Categ] +#Minimum number of match between the paste and the category file +matchingThreshold=1 + +[Credential] +#Minimum length that a credential must have to be considered as such +minimumLengthThreshold=3 +#Will be pushed as alert if the number of credentials is greater to that number +criticalNumberToAlert=8 +#Will be considered as false positive if less that X matches from the top password list +minTopPassList=5 + +[Curve] +max_execution_time = 90 + +[Base64] +path = Base64/ +max_execution_time = 60 + +[Modules_Duplicates] +#Number of month to look back +maximum_month_range = 3 +#The value where two pastes are considerate duplicate for ssdeep. +threshold_duplicate_ssdeep = 50 +#The value where two pastes are considerate duplicate for tlsh. +threshold_duplicate_tlsh = 52 +#Minimum size of the paste considered +min_paste_size = 0.3 + +[Module_ModuleInformation] +#Threshold to deduce if a module is stuck or not, in seconds. +threshold_stucked_module=600 + +[Module_Mixer] +#Define the configuration of the mixer, possible value: 1, 2 or 3 +operation_mode = 3 +#Define the time that a paste will be considerate duplicate. in seconds (1day = 86400) +ttl_duplicate = 86400 + +##### Redis ##### +[Redis_Cache] +host = localhost +port = 6379 +db = 0 + +[Redis_Log] +host = localhost +port = 6380 +db = 0 + +[Redis_Log_submit] +host = localhost +port = 6380 +db = 1 + +[Redis_Queues] +host = localhost +port = 6381 +db = 0 + +[Redis_Data_Merging] +host = localhost +port = 6379 +db = 1 + +[Redis_Paste_Name] +host = localhost +port = 6379 +db = 2 + +[Redis_Mixer_Cache] +host = localhost +port = 6381 +db = 1 + +##### ARDB ##### +[ARDB_Curve] +host = localhost +port = 6382 +db = 1 + +[ARDB_Sentiment] +host = localhost +port = 6382 +db = 4 + +[ARDB_TermFreq] +host = localhost +port = 6382 +db = 2 + +[ARDB_TermCred] +host = localhost +port = 6382 +db = 5 + +[ARDB_DB] +host = localhost +port = 6382 +db = 0 + +[ARDB_Trending] +host = localhost +port = 6382 +db = 3 + +[ARDB_Hashs] +host = localhost +db = 1 + +[ARDB_Tags] +host = localhost +port = 6382 +db = 6 + +[ARDB_Metadata] +host = localhost +port = 6382 +db = 7 + +[ARDB_Statistics] +host = localhost +port = 6382 +db = 8 + +[Url] +cc_critical = DE + +[DomClassifier] +cc = DE +cc_tld = r'\.de$' +dns = 8.8.8.8 + +[Mail] +dns = 8.8.8.8 + +# Indexer configuration +[Indexer] +type = whoosh +path = indexdir +register = indexdir/all_index.txt +#size in Mb +index_max_size = 2000 + +[ailleakObject] +maxDuplicateToPushToMISP=10 + +############################################################################### + +# For multiple feed, add them with "," without space +# e.g.: tcp://127.0.0.1:5556,tcp://127.0.0.1:5557 +[ZMQ_Global] +#address = tcp://crf.circl.lu:5556 +address = tcp://127.0.0.1:5556,tcp://crf.circl.lu:5556 +channel = 102 +bind = tcp://127.0.0.1:5556 + +[ZMQ_Url] +address = tcp://127.0.0.1:5004 +channel = urls + +[ZMQ_FetchedOnion] +address = tcp://127.0.0.1:5005 +channel = FetchedOnion + +[RedisPubSub] +host = localhost +port = 6381 +db = 0 diff --git a/pystemon/install.sh b/pystemon/install.sh new file mode 100755 index 00000000..6620dbf8 --- /dev/null +++ b/pystemon/install.sh @@ -0,0 +1,9 @@ +git clone https://github.com/cvandeplas/pystemon.git /opt/pystemon + +apt-get install -y python-pip python-requests python-yaml python-redis + +pip install beautifulsoup4 + +BASEDIR=$(dirname "$0") +cp $BASEDIR/config.cfg /opt/AIL/bin/packages/ +cp $BASEDIR/pystemon.yaml /opt/pystemon/ diff --git a/pystemon/pystemon.yaml b/pystemon/pystemon.yaml new file mode 100644 index 00000000..819a3cb8 --- /dev/null +++ b/pystemon/pystemon.yaml @@ -0,0 +1,230 @@ +#network: # Network settings +# ip: '1.1.1.1' # Specify source IP address if you want to bind on a specific one + +archive: + save: yes # Keep a copy of pasties that triggered alerts + save-all: yes # Keep a copy of all pasties + dir: "alerts" # Directory where matching pasties should be kept + dir-all: "archive" # Directory where all pasties should be kept (if save-all is set to yes) + compress: yes # Store the pasties compressed + +engine: re # Only re (default) or regex (pip install regex) are supported. +strict_regex: no # when compiling regex, hard fail or not on error + +save-thread: no # Use a separate thread to save pasties + +db: + sqlite3: # Store information about the pastie in a database + enable: no # Activate this DB engine # NOT FULLY IMPLEMENTED + file: 'db.sqlite3' # The filename of the database + lookup: no # lookup sqlite for already seen pasties + +mongo: + save: no # Keep a copy of pasties that triggered alerts + save-all: no # Keep a copy of all pasties + save-profile: # configure which data to save + content-on-miss: no # save the content even on miss + timestamp: no # add the timestamp (UTC) + url: no # add the public URL + site: no # add the site + id: no # add the per-site id + matched: no # add the matched status (usefull if content-on-miss = yes) + filename: no # add the local filename (to no store content in mongodb) + lookup: no # lookup mongodb for already seen pasties + database: "paste" + collection: "paste" + url: "mongodb://localhost" + user: + password: + +redis: + queue: yes # Keep a copy of pasties that triggered alerts + queue-all: yes # Keep a copy of all pasties + server: "localhost" + port: 6379 + database: 10 + lookup: no # lookup redisdb for already seen pasties (NOT IMPLEMENTED) + +email: + alert: no # Enable/disable email alerts + from: alert@example.com + to: alert@example.com + server: 127.0.0.1 # Address of the server (hostname or IP) + port: 25 # Outgoing SMTP port: 25, 587, ... + tls: no # Enable/disable tls support + username: '' # (optional) Username for authentication. Leave blank for no authentication. + password: '' # (optional) Password for authentication. Leave blank for no authentication. + subject: '[pystemon] - {subject}' + +##### +# Definition of regular expressions to search for in the pasties +# +search: +# - description: '' # (optional) A human readable description used in alerts. +# # If left unspecified the search regular expression +# # will be used as description. +# search: '' # The regular expression to search for. +# count: '' # (optional) How many hits should it have to be interesting? +# exclude: '' # (optional) Do not alert if this regular expression matches +# regex-flags: '' # (optional) Regular expression flags to give to the find function. +# # Default = re.IGNORECASE +# # Set to 0 to have no flags set +# # See http://docs.python.org/2/library/re.html#re.DEBUG for more info. +# # Warning: when setting this the default is overridden +# # example: 're.MULTILINE + re.DOTALL + re.IGNORECASE' +# to: '' # (optional) Additional recipients for email alert, comma separated list + + - search: '[^a-zA-Z0-9]example\.com' + - search: '[^a-zA-Z0-9]foobar\.com' + - description: 'Download (non-porn)' + search: 'download' + exclude: 'porn|sex|teen' + count: 4 + +##### +# Configuration section for the paste sites +# +threads: 1 # number of download threads per site +site: +# example.com: +# archive-url: # the url where the list of last pasties is present +# # example: 'http://pastebin.com/archive' +# archive-regex: # a regular expression to extract the pastie-id from the page. +# # do not forget the () to extract the pastie-id +# # example: '.+' +# download-url: # url for the raw pastie. +# # Should contain {id} on the place where the ID of the pastie needs to be placed +# # example: 'http://pastebin.com/raw.php?i={id}' +# public-url: # optional, defaults to be the same as download-url, so it should meet the same requirements +# # is used for display in logging and e-mail notifications +# update-max: 40 # every X seconds check for new updates to see if new pasties are available +# update-min: 30 # a random number will be chosen between these two numbers +# pastie-classname: # OPTIONAL: The name of a custom Class that inherits from Pastie +# # This is practical for sites that require custom fetchPastie() functions + + pastebin.com: + archive-url: 'https://pastebin.com/archive' + archive-regex: '.+' + download-url: 'https://pastebin.com/raw/{id}' + update-max: 50 + update-min: 40 + + # See https://pastebin.com/api_scraping_faq , you will need a pro account on pastebin + pastebin.com_pro: + archive-url: 'https://scrape.pastebin.com/api_scraping.php?limit=500' + archive-regex: '"key": "(.+)",' + download-url: 'https://scrape.pastebin.com/api_scrape_item.php?i={id}' + public-url: 'https://pastebin.com/raw/{id}' + update-max: 50 + update-min: 40 + + slexy.org: + archive-url: 'https://slexy.org/recent' + archive-regex: 'View paste' + download-url: 'https://slexy.org/view/{id}' + pastie-classname: PastieSlexyOrg + + gist.github.com: + archive-url: 'https://gist.github.com/discover' + archive-regex: '' + download-url: 'https://gist.githubusercontent.com/{id}/raw/' + + codepad.org: + archive-url: 'http://codepad.org/recent' + archive-regex: 'view' + download-url: 'http://codepad.org/{id}/raw.txt' + + kpaste.net: + archive-url: 'http://kpaste.net/' + archive-regex: '" href="/(\w+)">' + download-url: 'http://kpaste.net/{id}?raw' + + ideone.com: + archive-url: 'http://ideone.com/recent' + archive-regex: '#' + download-url: 'http://ideone.com/plain/{id}' + + pastebin.ru: + archive-url: 'http://pastebin.ru/' + archive-regex: '' + download-url: 'http://pastebin.ru/{id}' + + pastebin.fr: + archive-url: 'http://pastebin.fr' + archive-regex: '' + # download-url: 'http://pastie.org/pastes/{id}/text' + + # pastebin.ca: + # archive-url: 'http://pastebin.ca' + # archive-regex: 'rel="/preview.php\?id=(\d+)' + # download-url: 'http://pastebin.ca/{id}' + + # nopaste.me: + # archive-url: 'http://nopaste.me/recent' + # archive-regex: '' + # download-url: 'http://nopaste.me/download/{id}.txt' + + # cdv.lt: + # pastie-classname: PastieCdvLt + # archive-url: 'http://cdv.lt/snippets' + # archive-regex: '[0-9]' + # download-url: 'http://cdv.lt/api/snippet/{id}' + + # snipt.net: + # pastie-classname: PastieSniptNet + # archive-url: 'https://snipt.net/public/?rss' + # archive-regex: 'https://snipt.net/(.+)/' + # download-url: 'https://snipt.net/{id}/' + + # quickleak.se: + # archive-url: 'http://www.quickleak.se/last-pastes.html' + # archive-regex: '