diff --git a/HOWTO.md b/HOWTO.md index d4a7b962..1a66402b 100644 --- a/HOWTO.md +++ b/HOWTO.md @@ -6,7 +6,7 @@ How to feed the AIL framework For the moment, there are three different ways to feed AIL with data: -1. Be a collaborator of CIRCL and ask to access our feed. It will be sent to the static IP your are using for AIL. +1. Be a collaborator of CIRCL and ask to access our feed. It will be sent to the static IP you are using for AIL. 2. You can setup [pystemon](https://github.com/CIRCL/pystemon) and use the custom feeder provided by AIL (see below). diff --git a/OVERVIEW.md b/OVERVIEW.md index aa417ff7..b62a063f 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -20,4 +20,46 @@ Redis and ARDB overview - DB 0 - Lines duplicate - DB 1 - Hashes -To be updated + +ARDB overview +--------------------------- + +* DB 7 - Metadata: + ----------------------------------------- BASE64 ---------------------------------------- + + HSET - 'metadata_hash:'+hash 'saved_path' saved_path + 'size' size + 'first_seen' first_seen + 'last_seen' last_seen + 'estimated_type' estimated_type + 'vt_link' vt_link + 'vt_report' vt_report + 'nb_seen_in_all_pastes' nb_seen_in_all_pastes + 'base64_decoder' nb_encoded + 'binary_decoder' nb_encoded + + SET - 'all_decoder' decoder* + + SET - 'hash_all_type' hash_type * + SET - 'hash_base64_all_type' hash_type * + SET - 'hash_binary_all_type' hash_type * + + SET - 'hash_paste:'+paste hash * + SET - 'base64_paste:'+paste hash * + SET - 'binary_paste:'+paste hash * + + ZADD - 'hash_date:'+20180622 hash * nb_seen_this_day + ZADD - 'base64_date:'+20180622 hash * nb_seen_this_day + ZADD - 'binary_date:'+20180622 hash * nb_seen_this_day + + ZADD - 'nb_seen_hash:'+hash paste * nb_seen_in_paste + ZADD - 'base64_hash:'+hash paste * nb_seen_in_paste + ZADD - 'binary_hash:'+hash paste * nb_seen_in_paste + + ZADD - 'hash_type:'+type date nb_seen + ZADD - 'base64_type:'+type date nb_seen + ZADD - 'binary_type:'+type date nb_seen + + GET - 'base64_decoded:'+date nd_decoded + GET - 'binary_decoded:'+date nd_decoded + diff --git a/README.md b/README.md index 0c500efd..83bb0b86 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,3 @@ -[![Build Status](https://travis-ci.org/CIRCL/AIL-framework.svg?branch=master)](https://travis-ci.org/CIRCL/AIL-framework) - AIL === @@ -11,6 +9,22 @@ AIL is a modular framework to analyse potential information leaks from unstructu ![Dashboard](./doc/screenshots/dashboard.png?raw=true "AIL framework dashboard") + + + + + + + + + + + + + +
Latest ReleaseGitHub version
Contributors
License
+ + Features -------- @@ -31,14 +45,17 @@ Features * Terms, Set of terms and Regex tracking and occurrence * Many more modules for extracting phone numbers, credentials and others * Alerting to [MISP](https://github.com/MISP/MISP) to share found leaks within a threat intelligence platform using [MISP standard](https://www.misp-project.org/objects.html#_ail_leak) -* Detect and decode Base64 and store files +* Detect and decode encoded file (Base64, hex encoded or your own decoding scheme) and store files * Detect Amazon AWS and Google API keys * Detect Bitcoin address and Bitcoin private keys -* Detect private keys and certificate +* Detect private keys, certificate, keys (including SSH, OpenVPN) +* Detect IBAN bank accounts * Tagging system with [MISP Galaxy](https://github.com/MISP/misp-galaxy) and [MISP Taxonomies](https://github.com/MISP/misp-taxonomies) tags * UI paste submission * Create events on [MISP](https://github.com/MISP/MISP) and cases on [The Hive](https://github.com/TheHive-Project/TheHive) * Automatic paste export at detection on [MISP](https://github.com/MISP/MISP) (events) and [The Hive](https://github.com/TheHive-Project/TheHive) (alerts) on selected tags +* Extracted and decoded files can be searched by date range, type of file (mime-type) and encoding discovered +* Graph relationships between decoded file (hashes) Installation ------------ @@ -152,6 +169,12 @@ Trending charts ![Trending-Web](./doc/screenshots/trending-web.png?raw=true "AIL framework webtrending") ![Trending-Modules](./doc/screenshots/trending-module.png?raw=true "AIL framework modulestrending") +Extracted encoded files from pastes +----------------------------------- + +![Extracted files from pastes](./doc/screenshots/ail-hashedfiles.png?raw=true "AIL extracted decoded files statistics") +![Relationships between extracted files from encoded file in unstructured data](./doc/screenshots/hashedfile-graph.png?raw=true "Relationships between extracted files from encoded file in unstructured data") + Browsing -------- diff --git a/bin/ApiKey.py b/bin/ApiKey.py index e7ded9b2..faf4b2d9 100755 --- a/bin/ApiKey.py +++ b/bin/ApiKey.py @@ -86,8 +86,7 @@ if __name__ == "__main__": if message is not None: - search_api_key(message) - + search_api_key(message) else: publisher.debug("Script ApiKey is Idling 10s") diff --git a/bin/Base64.py b/bin/Base64.py index 960ca6de..e8b3fbc5 100755 --- a/bin/Base64.py +++ b/bin/Base64.py @@ -8,6 +8,7 @@ import time import os import datetime +import redis from pubsublogger import publisher @@ -31,7 +32,7 @@ def timeout_handler(signum, frame): signal.signal(signal.SIGALRM, timeout_handler) -def search_base64(content, message): +def search_base64(content, message, date): find = False base64_list = re.findall(regex_base64, content) if(len(base64_list) > 0): @@ -39,6 +40,7 @@ def search_base64(content, message): for b64 in base64_list: if len(b64) >= 40 : decode = base64.b64decode(b64) + print(decode) type = magic.from_buffer(decode, mime=True) #print(type) @@ -46,6 +48,8 @@ def search_base64(content, message): find = True hash = sha1(decode).hexdigest() + print(message) + print(hash) data = {} data['name'] = hash @@ -54,8 +58,37 @@ def search_base64(content, message): data['estimated type'] = type json_data = json.dumps(data) - save_base64_as_file(decode, type, hash, json_data) - print('found {} '.format(type)) + date_paste = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8]) + date_key = date[0:4] + date[4:6] + date[6:8] + + serv_metadata.zincrby('base64_date:'+date_key, hash, 1) + + # first time we see this hash + if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'): + serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_paste) + serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) + else: + serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) + + # first time we see this file on this paste + if serv_metadata.zscore('base64_hash:'+hash, message) is None: + print('first') + serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1) + + serv_metadata.sadd('base64_paste:'+message, hash) # paste - hash map + serv_metadata.zincrby('base64_hash:'+hash, message, 1)# hash - paste map + + # create hash metadata + serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type) + serv_metadata.sadd('hash_all_type', type) + serv_metadata.sadd('hash_base64_all_type', type) + serv_metadata.zincrby('base64_type:'+type, date_key, 1) + + save_base64_as_file(decode, type, hash, json_data, id) + print('found {} '.format(type)) + # duplicate + else: + serv_metadata.zincrby('base64_hash:'+hash, message, 1) # number of b64 on this paste if(find): publisher.warning('base64 decoded') @@ -68,10 +101,10 @@ def search_base64(content, message): msg = 'infoleak:automatic-detection="base64";{}'.format(message) p.populate_set_out(msg, 'Tags') -def save_base64_as_file(decode, type, hash, json_data): +def save_base64_as_file(decode, type, hash, json_data, id): - filename_b64 = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "base64"), type, hash[:2], hash) + local_filename_b64 = os.path.join(p.config.get("Directories", "base64"), type, hash[:2], hash) + filename_b64 = os.path.join(os.environ['AIL_HOME'], local_filename_b64) filename_json = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "base64"), type, hash[:2], hash + '.json') @@ -83,6 +116,10 @@ def save_base64_as_file(decode, type, hash, json_data): with open(filename_b64, 'wb') as f: f.write(decode) + # create hash metadata + serv_metadata.hset('metadata_hash:'+hash, 'saved_path', local_filename_b64) + serv_metadata.hset('metadata_hash:'+hash, 'size', os.path.getsize(filename_b64)) + with open(filename_json, 'w') as f: f.write(json_data) @@ -103,6 +140,12 @@ if __name__ == '__main__': p = Process(config_section) max_execution_time = p.config.getint("Base64", "max_execution_time") + serv_metadata = redis.StrictRedis( + host=p.config.get("ARDB_Metadata", "host"), + port=p.config.getint("ARDB_Metadata", "port"), + db=p.config.getint("ARDB_Metadata", "db"), + decode_responses=True) + # Sent to the logging a description of the module publisher.info("Base64 started") @@ -127,13 +170,12 @@ if __name__ == '__main__': # Do something with the message from the queue #print(filename) content = paste.get_p_content() - search_base64(content,message) - - # (Optional) Send that thing to the next queue - #p.populate_set_out(something_has_been_done) + date = str(paste._get_p_date()) + search_base64(content,message, date) except TimeoutException: - print ("{0} processing timeout".format(paste.p_path)) - continue + p.incr_module_timeout_statistic() + print ("{0} processing timeout".format(paste.p_path)) + continue else: signal.alarm(0) diff --git a/bin/Binary.py b/bin/Binary.py new file mode 100755 index 00000000..29d6f2c5 --- /dev/null +++ b/bin/Binary.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" + Binary module + + Dectect Binary and decode it +""" +import time +import os +import datetime +import redis + +from pubsublogger import publisher + +from Helper import Process +from packages import Paste + +import re +from hashlib import sha1 +import magic +import json + +import signal + +class TimeoutException(Exception): + pass + +def timeout_handler(signum, frame): + raise TimeoutException + +signal.signal(signal.SIGALRM, timeout_handler) + +def decode_binary_string(binary_string): + return ''.join(chr(int(s[i*8:i*8+8],2)) for i in range(len(s)//8)) + +def decode_binary(binary_string): + return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)])) + + +def search_binary(content, message, date): + find = False + binary_list = re.findall(regex_binary, content) + if(len(binary_list) > 0): + + for binary in binary_list: + if len(binary) >= 40 : + decode = decode_binary(binary) + print(message) + + type = magic.from_buffer(decode, mime=True) + print(type) + + find = True + hash = sha1(decode).hexdigest() + print(hash) + + data = {} + data['name'] = hash + data['date'] = datetime.datetime.now().strftime("%d/%m/%y") + data['origin'] = message + data['estimated type'] = type + json_data = json.dumps(data) + + date_paste = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8]) + date_key = date[0:4] + date[4:6] + date[6:8] + + serv_metadata.zincrby('binary_date:'+date_key, hash, 1) + + # first time we see this hash + if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'): + serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_paste) + serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) + else: + serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) + + # first time we see this file encoding on this paste + if serv_metadata.zscore('binary_hash:'+hash, message) is None: + print('first binary') + serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1) + + serv_metadata.sadd('binary_paste:'+message, hash) # paste - hash map + serv_metadata.zincrby('binary_hash:'+hash, message, 1)# hash - paste map + + # create hash metadata + serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type) + serv_metadata.sadd('hash_all_type', type) + serv_metadata.sadd('hash_binary_all_type', type) + serv_metadata.zincrby('binary_type:'+type, date_key, 1) + + save_binary_as_file(decode, type, hash, json_data, id) + print('found {} '.format(type)) + # duplicate + else: + serv_metadata.zincrby('binary_hash:'+hash, message, 1) # number of b64 on this paste + + if(find): + publisher.warning('binary decoded') + #Send to duplicate + p.populate_set_out(message, 'Duplicate') + #send to Browse_warning_paste + msg = ('binary;{}'.format(message)) + p.populate_set_out( msg, 'alertHandler') + + msg = 'infoleak:automatic-detection="binary";{}'.format(message) + p.populate_set_out(msg, 'Tags') + +def save_binary_as_file(decode, type, hash, json_data, id): + + local_filename_b64 = os.path.join(p.config.get("Directories", "base64"), type, hash[:2], hash) + filename_b64 = os.path.join(os.environ['AIL_HOME'], local_filename_b64) + + filename_json = os.path.join(os.environ['AIL_HOME'], + p.config.get("Directories", "base64"), type, hash[:2], hash + '.json') + + dirname = os.path.dirname(filename_b64) + if not os.path.exists(dirname): + os.makedirs(dirname) + + with open(filename_b64, 'wb') as f: + f.write(decode) + + # create hash metadata + serv_metadata.hset('metadata_hash:'+hash, 'saved_path', local_filename_b64) + serv_metadata.hset('metadata_hash:'+hash, 'size', os.path.getsize(filename_b64)) + + with open(filename_json, 'w') as f: + f.write(json_data) + + + + +if __name__ == '__main__': + # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) + # Port of the redis instance used by pubsublogger + publisher.port = 6380 + # Script is the default channel used for the modules. + publisher.channel = 'Script' + + # Section name in bin/packages/modules.cfg + config_section = 'Binary' + + # Setup the I/O queues + p = Process(config_section) + max_execution_time = p.config.getint("Binary", "max_execution_time") + + serv_metadata = redis.StrictRedis( + host=p.config.get("ARDB_Metadata", "host"), + port=p.config.getint("ARDB_Metadata", "port"), + db=p.config.getint("ARDB_Metadata", "db"), + decode_responses=True) + + # Sent to the logging a description of the module + publisher.info("Binary started") + + regex_binary = '[0-1]{40,}' + re.compile(regex_binary) + + # Endless loop getting messages from the input queue + while True: + # Get one message from the input queue + message = p.get_from_set() + if message is None: + + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue + + filename = message + paste = Paste.Paste(filename) + + signal.alarm(max_execution_time) + try: + # Do something with the message from the queue + #print(filename) + content = paste.get_p_content() + date = str(paste._get_p_date()) + search_binary(content,message, date) + + except TimeoutException: + p.incr_module_timeout_statistic() + print ("{0} processing timeout".format(paste.p_path)) + continue + else: + signal.alarm(0) diff --git a/bin/Decoder.py b/bin/Decoder.py new file mode 100755 index 00000000..af385fed --- /dev/null +++ b/bin/Decoder.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" + Decoder module + + Dectect Binary and decode it +""" +import time +import os +import redis +import base64 +from hashlib import sha1 +import magic +import json +import datetime + +from pubsublogger import publisher + +from Helper import Process +from packages import Paste + +import re +import signal + +class TimeoutException(Exception): + pass + +def timeout_handler(signum, frame): + raise TimeoutException + +signal.signal(signal.SIGALRM, timeout_handler) + +def hex_decoder(hexStr): + #hexStr = ''.join( hex_string.split(" ") ) + return bytes(bytearray([int(hexStr[i:i+2], 16) for i in range(0, len(hexStr), 2)])) + +def binary_decoder(binary_string): + return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)])) + +def base64_decoder(base64_string): + return base64.b64decode(base64_string) + +def decode_string(content, message, date, encoded_list, decoder_name, encoded_min_size): + find = False + for encoded in encoded_list: + if len(encoded) >= encoded_min_size: + decode = decoder_function[decoder_name](encoded) + find = True + + save_hash(decoder_name, message, date, decode) + + #remove encoded from paste content + content = content.replace(encoded, '', 1) + + if(find): + set_out_paste(decoder_name, message) + + return content + +# # TODO: FIXME check db +def save_hash(decoder_name, message, date, decoded): + print(decoder_name) + type = magic.from_buffer(decoded, mime=True) + hash = sha1(decoded).hexdigest() + print(hash) + + data = {} + data['name'] = hash + data['date'] = datetime.datetime.now().strftime("%d/%m/%y") + data['origin'] = message + data['estimated type'] = type + json_data = json.dumps(data) + + date_paste = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8]) + date_key = date[0:4] + date[4:6] + date[6:8] + + serv_metadata.incrby(decoder_name+'_decoded:'+date_key, 1) + serv_metadata.zincrby('hash_date:'+date_key, hash, 1) + serv_metadata.zincrby(decoder_name+'_date:'+date_key, hash, 1) + + # first time we see this hash + if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'): + serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_paste) + serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) + else: + serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) + + # first time we see this hash (all encoding) on this paste + if serv_metadata.zscore('nb_seen_hash:'+hash, message) is None: + serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1) + serv_metadata.sadd('hash_paste:'+message, hash) # paste - hash map + # create hash metadata + serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type) + serv_metadata.sadd('hash_all_type', type) + + # first time we see this hash encoding on this paste + if serv_metadata.zscore(decoder_name+'_hash:'+hash, message) is None: + print('first '+decoder_name) + + serv_metadata.sadd(decoder_name+'_paste:'+message, hash) # paste - hash map + + # create hash metadata + serv_metadata.sadd('hash_'+ decoder_name +'_all_type', type) + + # first time we see this hash today + if serv_metadata.zscore('hash_date:'+date_key, hash) is None: + serv_metadata.zincrby('hash_type:'+type, date_key, 1) + + # first time we see this hash encoding today + if serv_metadata.zscore(decoder_name+'_date:'+date_key, hash) is None: + serv_metadata.zincrby(decoder_name+'_type:'+type, date_key, 1) + + save_hash_on_disk(decoded, type, hash, json_data) + print('found {} '.format(type)) + + serv_metadata.hincrby('metadata_hash:'+hash, decoder_name+'_decoder', 1) + + serv_metadata.zincrby(decoder_name+'_type:'+type, date_key, 1) + + serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map + serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste + + +def save_hash_on_disk(decode, type, hash, json_data): + + local_filename_hash = os.path.join(p.config.get("Directories", "hash"), type, hash[:2], hash) + filename_hash = os.path.join(os.environ['AIL_HOME'], local_filename_hash) + + filename_json = os.path.join(os.environ['AIL_HOME'], + p.config.get("Directories", "hash"), type, hash[:2], hash + '.json') + + dirname = os.path.dirname(filename_hash) + if not os.path.exists(dirname): + os.makedirs(dirname) + + with open(filename_hash, 'wb') as f: + f.write(decode) + + # create hash metadata + serv_metadata.hset('metadata_hash:'+hash, 'saved_path', local_filename_hash) + serv_metadata.hset('metadata_hash:'+hash, 'size', os.path.getsize(filename_hash)) + + with open(filename_json, 'w') as f: + f.write(json_data) + +def set_out_paste(decoder_name, message): + publisher.warning(decoder_name+' decoded') + #Send to duplicate + p.populate_set_out(message, 'Duplicate') + #send to Browse_warning_paste + msg = (decoder_name+';{}'.format(message)) + p.populate_set_out( msg, 'alertHandler') + + msg = 'infoleak:automatic-detection="'+decoder_name+'";{}'.format(message) + p.populate_set_out(msg, 'Tags') + + +if __name__ == '__main__': + # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) + # Port of the redis instance used by pubsublogger + publisher.port = 6380 + # Script is the default channel used for the modules. + publisher.channel = 'Script' + + # Section name in bin/packages/modules.cfg + config_section = 'Decoder' + + # Setup the I/O queues + p = Process(config_section) + + serv_metadata = redis.StrictRedis( + host=p.config.get("ARDB_Metadata", "host"), + port=p.config.getint("ARDB_Metadata", "port"), + db=p.config.getint("ARDB_Metadata", "db"), + decode_responses=True) + + # Sent to the logging a description of the module + publisher.info("Decoder started") + + regex_binary = '[0-1]{40,}' + #regex_hex = '(0[xX])?[A-Fa-f0-9]{40,}' + regex_hex = '[A-Fa-f0-9]{40,}' + regex_base64 = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)' + + re.compile(regex_binary) + re.compile(regex_hex) + re.compile(regex_base64) + + # map decoder function + decoder_function = {'binary':binary_decoder,'hexadecimal':hex_decoder, 'base64':base64_decoder} + + hex_max_execution_time = p.config.getint("Hex", "max_execution_time") + binary_max_execution_time = p.config.getint("Binary", "max_execution_time") + base64_max_execution_time = p.config.getint("Base64", "max_execution_time") + + # list all decoder yith regex, + decoder_binary = {'name': 'binary', 'regex': regex_binary, 'encoded_min_size': 300, 'max_execution_time': binary_max_execution_time} + decoder_hexadecimal = {'name': 'hexadecimal', 'regex': regex_hex, 'encoded_min_size': 300, 'max_execution_time': hex_max_execution_time} + decoder_base64 = {'name': 'base64', 'regex': regex_base64, 'encoded_min_size': 40, 'max_execution_time': base64_max_execution_time} + + decoder_order = [ decoder_base64, decoder_binary, decoder_hexadecimal, decoder_base64] + + for decoder in decoder_order: + serv_metadata.sadd('all_decoder', decoder['name']) + + # Endless loop getting messages from the input queue + while True: + # Get one message from the input queue + message = p.get_from_set() + if message is None: + + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue + + filename = message + paste = Paste.Paste(filename) + + # Do something with the message from the queue + content = paste.get_p_content() + date = str(paste._get_p_date()) + + for decoder in decoder_order: # add threshold and size limit + + # max execution time on regex + signal.alarm(decoder['max_execution_time']) + try: + encoded_list = re.findall(decoder['regex'], content) + except TimeoutException: + encoded_list = [] + p.incr_module_timeout_statistic() # add encoder type + print ("{0} processing timeout".format(paste.p_path)) + continue + else: + signal.alarm(0) + + if(len(encoded_list) > 0): + content = decode_string(content, message, date, encoded_list, decoder['name'], decoder['encoded_min_size']) diff --git a/bin/Helper.py b/bin/Helper.py index c0d836e3..d90388f5 100755 --- a/bin/Helper.py +++ b/bin/Helper.py @@ -135,6 +135,12 @@ class Process(object): db=self.config.get('RedisPubSub', 'db'), decode_responses=True) + self.serv_statistics = redis.StrictRedis( + host=self.config.get('ARDB_Statistics', 'host'), + port=self.config.get('ARDB_Statistics', 'port'), + db=self.config.get('ARDB_Statistics', 'db'), + decode_responses=True) + self.moduleNum = os.getpid() def populate_set_in(self): @@ -164,36 +170,39 @@ class Process(object): return None else: - #try: - if '.gz' in message: - path = message.split(".")[-2].split("/")[-1] - #find start of path with AIL_HOME - index_s = message.find(os.environ['AIL_HOME']) - #Stop when .gz - index_e = message.find(".gz")+3 - if(index_s == -1): - complete_path = message[0:index_e] + try: + if '.gz' in message: + path = message.split(".")[-2].split("/")[-1] + #find start of path with AIL_HOME + index_s = message.find(os.environ['AIL_HOME']) + #Stop when .gz + index_e = message.find(".gz")+3 + if(index_s == -1): + complete_path = message[0:index_e] + else: + complete_path = message[index_s:index_e] + else: - complete_path = message[index_s:index_e] + path = "-" + complete_path = "?" - else: - path = "-" - complete_path = "?" + value = str(timestamp) + ", " + path + self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value) + self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", complete_path) + self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum)) - value = str(timestamp) + ", " + path - self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value) - self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", complete_path) - self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum)) - return message + curr_date = datetime.date.today() + self.serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'paste_by_modules_in:'+self.subscriber_name, 1) + return message - #except: - #print('except') - #path = "?" - #value = str(timestamp) + ", " + path - #self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value) - #self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", "?") - #self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum)) - #return message + except: + print('except') + path = "?" + value = str(timestamp) + ", " + path + self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value) + self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", "?") + self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum)) + return message def populate_set_out(self, msg, channel=None): # multiproc @@ -220,3 +229,7 @@ class Process(object): time.sleep(1) continue self.pubsub.publish(message) + + def incr_module_timeout_statistic(self): + curr_date = datetime.date.today() + self.serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'paste_by_modules_timeout:'+self.subscriber_name, 1) diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 998a676a..c3bfd8cf 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -168,7 +168,7 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Keys" bash -c 'cd '${AIL_BIN}'; ./Keys.py; read x' sleep 0.1 - screen -S "Script_AIL" -X screen -t "Base64" bash -c 'cd '${AIL_BIN}'; ./Base64.py; read x' + screen -S "Script_AIL" -X screen -t "Decoder" bash -c 'cd '${AIL_BIN}'; ./Decoder.py; read x' sleep 0.1 screen -S "Script_AIL" -X screen -t "Bitcoin" bash -c 'cd '${AIL_BIN}'; ./Bitcoin.py; read x' sleep 0.1 diff --git a/bin/MISP_The_Hive_feeder.py b/bin/MISP_The_Hive_feeder.py index d9daa299..7fa6b223 100755 --- a/bin/MISP_The_Hive_feeder.py +++ b/bin/MISP_The_Hive_feeder.py @@ -150,16 +150,18 @@ if __name__ == "__main__": if flag_the_hive: try: HiveApi = TheHiveApi(the_hive_url, the_hive_key, cert = the_hive_verifycert) - r_serv_db.set('ail:thehive', True) except: HiveApi = False flag_the_hive = False r_serv_db.set('ail:thehive', False) print('Not connected to The HIVE') + else: + HiveApi = False if HiveApi != False and flag_the_hive: try: HiveApi.get_alert(0) + r_serv_db.set('ail:thehive', True) print('Connected to The HIVE:', the_hive_url) except thehive4py.exceptions.AlertException: HiveApi = False diff --git a/bin/Release.py b/bin/Release.py index 6e7a8277..43c84b04 100755 --- a/bin/Release.py +++ b/bin/Release.py @@ -37,6 +37,7 @@ if __name__ == "__main__": regex = '|'.join(regexs) while True: + signal.alarm(max_execution_time) filepath = p.get_from_set() if filepath is None: publisher.debug("Script Release is Idling 10s") @@ -47,7 +48,7 @@ if __name__ == "__main__": paste = Paste.Paste(filepath) content = paste.get_p_content() - signal.alarm(max_execution_time) + #signal.alarm(max_execution_time) try: releases = set(re.findall(regex, content)) if len(releases) == 0: @@ -61,7 +62,8 @@ if __name__ == "__main__": publisher.info(to_print) except TimeoutException: - print ("{0} processing timeout".format(paste.p_path)) - continue + p.incr_module_timeout_statistic() + print ("{0} processing timeout".format(paste.p_path)) + continue else: signal.alarm(0) diff --git a/bin/SentimentAnalysis.py b/bin/SentimentAnalysis.py index 34beea3f..8442befa 100755 --- a/bin/SentimentAnalysis.py +++ b/bin/SentimentAnalysis.py @@ -167,6 +167,7 @@ if __name__ == '__main__': try: Analyse(message, server) except TimeoutException: + p.incr_module_timeout_statistic() print ("{0} processing timeout".format(message)) continue else: diff --git a/bin/Tags.py b/bin/Tags.py index 15f8f837..0a178fef 100755 --- a/bin/Tags.py +++ b/bin/Tags.py @@ -11,6 +11,7 @@ This module create tags. import redis import time +import datetime from pubsublogger import publisher from Helper import Process @@ -41,6 +42,12 @@ if __name__ == '__main__': db=p.config.get("ARDB_Metadata", "db"), decode_responses=True) + serv_statistics = redis.StrictRedis( + host=p.config.get('ARDB_Statistics', 'host'), + port=p.config.get('ARDB_Statistics', 'port'), + db=p.config.get('ARDB_Statistics', 'db'), + decode_responses=True) + # Sent to the logging a description of the module publisher.info("Tags module started") @@ -67,4 +74,6 @@ if __name__ == '__main__': print(" tagged: {}".format(tag)) server_metadata.sadd('tag:'+path, tag) + curr_date = datetime.date.today() + serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'paste_tagged:'+tag, 1) p.populate_set_out(message, 'MISP_The_Hive_feeder') diff --git a/bin/Tokenize.py b/bin/Tokenize.py index fdefeb6a..698b4fbc 100755 --- a/bin/Tokenize.py +++ b/bin/Tokenize.py @@ -60,8 +60,9 @@ if __name__ == "__main__": msg = '{} {} {}'.format(paste.p_path, word, score) p.populate_set_out(msg) except TimeoutException: - print ("{0} processing timeout".format(paste.p_path)) - continue + p.incr_module_timeout_statistic() + print ("{0} processing timeout".format(paste.p_path)) + continue else: signal.alarm(0) else: diff --git a/bin/Web.py b/bin/Web.py index 45e5bfbe..3d53e306 100755 --- a/bin/Web.py +++ b/bin/Web.py @@ -124,6 +124,7 @@ if __name__ == "__main__": except ipaddress.AddressValueError: continue cc = getattr(l, 'cc') + asn = '' if getattr(l, 'asn') is not None: asn = getattr(l, 'asn')[2:] #remobe b' diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index 45ed1ed2..524a7665 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -52,7 +52,7 @@ class Paste(object): :Example: - PST = Paste("/home/2013/ZEeGaez5.gz") + PST = Paste("/home/2013/01/12/ZEeGaez5.gz") """ diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index ca86b34a..85566654 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -2,6 +2,7 @@ bloomfilters = Blooms dicofilters = Dicos pastes = PASTES +hash = HASHS base64 = BASE64 crawled = crawled crawled_screenshot = CRAWLED_SCREENSHOT @@ -69,6 +70,14 @@ max_execution_time = 90 path = Base64/ max_execution_time = 60 +[Binary] +path = Base64/ +max_execution_time = 60 + +[Hex] +path = Base64/ +max_execution_time = 60 + [Modules_Duplicates] #Number of month to look back maximum_month_range = 3 diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index d8acf2dc..14605e8c 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -125,7 +125,7 @@ publish = Redis_Duplicate,Redis_alertHandler,Redis_Tags subscribe = Redis_ApiKey publish = Redis_Duplicate,Redis_alertHandler,Redis_Tags -[Base64] +[Decoder] subscribe = Redis_Global publish = Redis_Duplicate,Redis_alertHandler,Redis_Tags diff --git a/bin/submit_paste.py b/bin/submit_paste.py index a9b5e903..a999ec39 100755 --- a/bin/submit_paste.py +++ b/bin/submit_paste.py @@ -55,6 +55,9 @@ def create_paste(uuid, paste_content, ltags, ltagsgalaxies, name): print(' {} send to Global'.format(save_path)) r_serv_log_submit.sadd(uuid + ':paste_submit_link', full_path) + curr_date = datetime.date.today() + serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'submit_paste', 1) + return 0 def addError(uuid, errorMessage): @@ -67,6 +70,8 @@ def addError(uuid, errorMessage): def abord_file_submission(uuid, errorMessage): addError(uuid, errorMessage) r_serv_log_submit.set(uuid + ':end', 1) + curr_date = datetime.date.today() + serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'submit_abord', 1) remove_submit_uuid(uuid) @@ -161,6 +166,12 @@ if __name__ == "__main__": db=cfg.getint("ARDB_Metadata", "db"), decode_responses=True) + serv_statistics = redis.StrictRedis( + host=cfg.get('ARDB_Statistics', 'host'), + port=cfg.getint('ARDB_Statistics', 'port'), + db=cfg.getint('ARDB_Statistics', 'db'), + decode_responses=True) + expire_time = 120 MAX_FILE_SIZE = 1000000000 ALLOWED_EXTENSIONS = ['txt', 'sh', 'pdf'] diff --git a/doc/screenshots/ail-hashedfiles.png b/doc/screenshots/ail-hashedfiles.png new file mode 100644 index 00000000..4589d429 Binary files /dev/null and b/doc/screenshots/ail-hashedfiles.png differ diff --git a/doc/screenshots/hashedfile-graph.png b/doc/screenshots/hashedfile-graph.png new file mode 100644 index 00000000..01f365d8 Binary files /dev/null and b/doc/screenshots/hashedfile-graph.png differ diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index 5424ccc8..34e630f2 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -153,3 +153,18 @@ UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted') SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot")) max_dashboard_logs = int(cfg.get("Flask", "max_dashboard_logs")) + +# VT +try: + from virusTotalKEYS import vt_key + if vt_key != '': + vt_auth = vt_key + vt_enabled = True + print('VT submission is enabled') + else: + vt_enabled = False + print('VT submission is disabled') +except: + vt_auth = {'apikey': cfg.get("Flask", "max_preview_char")} + vt_enabled = False + print('VT submission is disabled') diff --git a/var/www/modules/PasteSubmit/templates/PasteSubmit.html b/var/www/modules/PasteSubmit/templates/PasteSubmit.html index 77a0a636..a0636332 100644 --- a/var/www/modules/PasteSubmit/templates/PasteSubmit.html +++ b/var/www/modules/PasteSubmit/templates/PasteSubmit.html @@ -32,6 +32,8 @@
+ +
diff --git a/var/www/modules/hashDecoded/Flask_hashDecoded.py b/var/www/modules/hashDecoded/Flask_hashDecoded.py new file mode 100644 index 00000000..a40a5a00 --- /dev/null +++ b/var/www/modules/hashDecoded/Flask_hashDecoded.py @@ -0,0 +1,629 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending modules page +''' +import redis +import os +import datetime +import json +from Date import Date + +from io import BytesIO +import zipfile + +import requests +from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, send_file + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv_metadata = Flask_config.r_serv_metadata +vt_enabled = Flask_config.vt_enabled +vt_auth = Flask_config.vt_auth + +hashDecoded = Blueprint('hashDecoded', __name__, template_folder='templates') + +# ============ FUNCTIONS ============ + +def get_date_range(num_day): + curr_date = datetime.date.today() + date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) + date_list = [] + + for i in range(0, num_day+1): + date_list.append(date.substract_day(i)) + + return list(reversed(date_list)) + +def substract_date(date_from, date_to): + date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8])) + date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8])) + delta = date_to - date_from # timedelta + l_date = [] + for i in range(delta.days + 1): + date = date_from + datetime.timedelta(i) + l_date.append( date.strftime('%Y%m%d') ) + return l_date + +def list_sparkline_values(date_range_sparkline, hash): + sparklines_value = [] + for date_day in date_range_sparkline: + nb_seen_this_day = r_serv_metadata.zscore('hash_date:'+date_day, hash) + if nb_seen_this_day is None: + nb_seen_this_day = 0 + sparklines_value.append(int(nb_seen_this_day)) + return sparklines_value + +def get_file_icon(estimated_type): + file_type = estimated_type.split('/')[0] + # set file icon + if file_type == 'application': + file_icon = 'fa-file ' + elif file_type == 'audio': + file_icon = 'fa-file-video-o ' + elif file_type == 'image': + file_icon = 'fa-file-image-o' + elif file_type == 'text': + file_icon = 'fa-file-text-o' + else: + file_icon = 'fa-file-o' + + return file_icon + +def get_file_icon_text(estimated_type): + file_type = estimated_type.split('/')[0] + # set file icon + if file_type == 'application': + file_icon_text = '\uf15b' + elif file_type == 'audio': + file_icon_text = '\uf1c7' + elif file_type == 'image': + file_icon_text = '\uf1c5' + elif file_type == 'text': + file_icon_text = '\uf15c' + else: + file_icon_text = '\uf15b' + + return file_icon_text + +def one(): + return 1 + +# ============= ROUTES ============== +@hashDecoded.route("/hashDecoded/all_hash_search", methods=['POST']) +def all_hash_search(): + date_from = request.form.get('date_from') + date_to = request.form.get('date_to') + type = request.form.get('type') + encoding = request.form.get('encoding') + return redirect(url_for('hashDecoded.hashDecoded_page', date_from=date_from, date_to=date_to, type=type, encoding=encoding)) + +@hashDecoded.route("/hashDecoded/", methods=['GET']) +def hashDecoded_page(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + type = request.args.get('type') + encoding = request.args.get('encoding') + + if type == 'All types': + type = None + + if encoding == 'All encoding': + encoding = None + + #date_from = '20180628' or date_from = '2018-06-28' + #date_to = '20180628' or date_to = '2018-06-28' + + # verify file type input + if type is not None: + #retrieve + char + type = type.replace(' ', '+') + if type not in r_serv_metadata.smembers('hash_all_type'): + type = None + + all_encoding = r_serv_metadata.smembers('all_decoder') + # verify encoding input + if encoding is not None: + if encoding not in all_encoding: + encoding = None + + date_range = [] + if date_from is not None and date_to is not None: + #change format + try: + if len(date_from) != 8: + date_from = date_from[0:4] + date_from[5:7] + date_from[8:10] + date_to = date_to[0:4] + date_to[5:7] + date_to[8:10] + date_range = substract_date(date_from, date_to) + except: + pass + + if not date_range: + date_range.append(datetime.date.today().strftime("%Y%m%d")) + date_from = date_range[0][0:4] + '-' + date_range[0][4:6] + '-' + date_range[0][6:8] + date_to = date_from + + else: + date_from = date_from[0:4] + '-' + date_from[4:6] + '-' + date_from[6:8] + date_to = date_to[0:4] + '-' + date_to[4:6] + '-' + date_to[6:8] + + # display day type bar chart + if len(date_range) == 1 and type is None: + daily_type_chart = True + daily_date = date_range[0] + else: + daily_type_chart = False + daily_date = None + + l_64 = set() + for date in date_range: + if encoding is None: + l_hash = r_serv_metadata.zrange('hash_date:' +date, 0, -1) + else: + l_hash = r_serv_metadata.zrange(encoding+'_date:' +date, 0, -1) + if l_hash: + for hash in l_hash: + l_64.add(hash) + + num_day_sparkline = 6 + date_range_sparkline = get_date_range(num_day_sparkline) + + b64_metadata = [] + l_64 = list(l_64) + for hash in l_64: + # select requested base 64 type + estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') + if type is not None: + if estimated_type is not None: + if estimated_type != type: + continue + + first_seen = r_serv_metadata.hget('metadata_hash:'+hash, 'first_seen') + last_seen = r_serv_metadata.hget('metadata_hash:'+hash, 'last_seen') + nb_seen_in_paste = r_serv_metadata.hget('metadata_hash:'+hash, 'nb_seen_in_all_pastes') + size = r_serv_metadata.hget('metadata_hash:'+hash, 'size') + + if hash is not None and first_seen is not None and \ + last_seen is not None and \ + nb_seen_in_paste is not None and \ + size is not None: + + file_icon = get_file_icon(estimated_type) + + if r_serv_metadata.hexists('metadata_hash:'+hash, 'vt_link'): + b64_vt = True + b64_vt_link = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_link') + b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report') + else: + b64_vt = False + b64_vt_link = '' + b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report') + # hash never refreshed + if b64_vt_report is None: + b64_vt_report = '' + + sparklines_value = list_sparkline_values(date_range_sparkline, hash) + + b64_metadata.append( (file_icon, estimated_type, hash, nb_seen_in_paste, size, first_seen, last_seen, b64_vt, b64_vt_link, b64_vt_report, sparklines_value) ) + + l_type = r_serv_metadata.smembers('hash_all_type') + + return render_template("hashDecoded.html", l_64=b64_metadata, vt_enabled=vt_enabled, l_type=l_type, type=type, daily_type_chart=daily_type_chart, daily_date=daily_date, + encoding=encoding, all_encoding=all_encoding, date_from=date_from, date_to=date_to) + +@hashDecoded.route('/hashDecoded/hash_by_type') +def hash_by_type(): + type = request.args.get('type') + type = 'text/plain' + return render_template('hash_type.html',type = type) + +@hashDecoded.route('/hashDecoded/hash_hash') +def hash_hash(): + hash = request.args.get('hash') + return render_template('hash_hash.html') + +@hashDecoded.route('/hashDecoded/showHash') +def showHash(): + hash = request.args.get('hash') + #hash = 'e02055d3efaad5d656345f6a8b1b6be4fe8cb5ea' + + # TODO FIXME show error + if hash is None: + return hashDecoded_page() + + estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') + # hash not found + # TODO FIXME show error + if estimated_type is None: + return hashDecoded_page() + + else: + file_icon = get_file_icon(estimated_type) + size = r_serv_metadata.hget('metadata_hash:'+hash, 'size') + first_seen = r_serv_metadata.hget('metadata_hash:'+hash, 'first_seen') + last_seen = r_serv_metadata.hget('metadata_hash:'+hash, 'last_seen') + nb_seen_in_all_pastes = r_serv_metadata.hget('metadata_hash:'+hash, 'nb_seen_in_all_pastes') + + # get all encoding for this hash + list_hash_decoder = [] + list_decoder = r_serv_metadata.smembers('all_decoder') + for decoder in list_decoder: + encoding = r_serv_metadata.hget('metadata_hash:'+hash, decoder+'_decoder') + if encoding is not None: + list_hash_decoder.append({'encoding': decoder, 'nb_seen': encoding}) + + num_day_type = 6 + date_range_sparkline = get_date_range(num_day_type) + sparkline_values = list_sparkline_values(date_range_sparkline, hash) + + if r_serv_metadata.hexists('metadata_hash:'+hash, 'vt_link'): + b64_vt = True + b64_vt_link = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_link') + b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report') + else: + b64_vt = False + b64_vt_link = '' + b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report') + # hash never refreshed + if b64_vt_report is None: + b64_vt_report = '' + + return render_template('showHash.html', hash=hash, vt_enabled=vt_enabled, b64_vt=b64_vt, b64_vt_link=b64_vt_link, + b64_vt_report=b64_vt_report, + size=size, estimated_type=estimated_type, file_icon=file_icon, + first_seen=first_seen, list_hash_decoder=list_hash_decoder, + last_seen=last_seen, nb_seen_in_all_pastes=nb_seen_in_all_pastes, sparkline_values=sparkline_values) + +@app.route('/hashDecoded/downloadHash') +def downloadHash(): + hash = request.args.get('hash') + # sanitize hash + hash = hash.split('/')[0] + + # hash exist + if r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') is not None: + + b64_path = r_serv_metadata.hget('metadata_hash:'+hash, 'saved_path') + b64_full_path = os.path.join(os.environ['AIL_HOME'], b64_path) + hash_content = '' + try: + with open(b64_full_path, 'rb') as f: + hash_content = f.read() + + # zip buffer + result = BytesIO() + temp = BytesIO() + temp.write(hash_content) + + with zipfile.ZipFile(result, "w") as zf: + #zf.setpassword(b"infected") + zf.writestr( hash, temp.getvalue()) + + filename = hash + '.zip' + result.seek(0) + + return send_file(result, attachment_filename=filename, as_attachment=True) + except Exception as e: + print(e) + return 'Server Error' + else: + return 'hash: ' + hash + " don't exist" + +@hashDecoded.route('/hashDecoded/hash_by_type_json') +def hash_by_type_json(): + type = request.args.get('type') + + #retrieve + char + type = type.replace(' ', '+') + + num_day_type = 30 + date_range = get_date_range(num_day_type) + + #verify input + if type in r_serv_metadata.smembers('hash_all_type'): + type_value = [] + all_decoder = r_serv_metadata.smembers('all_decoder') + + range_decoder = [] + for date in date_range: + day_decoder = {} + day_decoder['date']= date[0:4] + '-' + date[4:6] + '-' + date[6:8] + for decoder in all_decoder: + num_day_decoder = r_serv_metadata.zscore(decoder+'_type:'+type, date) + if num_day_decoder is None: + num_day_decoder = 0 + day_decoder[decoder]= num_day_decoder + range_decoder.append(day_decoder) + + + + return jsonify(range_decoder) + else: + return jsonify() + +@hashDecoded.route('/hashDecoded/decoder_type_json') +def decoder_type_json(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + + typ = request.args.get('type') + + if typ == 'All types': + typ = None + + # verify file type input + if typ is not None: + #retrieve + char + typ = typ.replace(' ', '+') + if typ not in r_serv_metadata.smembers('hash_all_type'): + typ = None + + all_decoder = r_serv_metadata.smembers('all_decoder') + # sort DESC decoder for color + all_decoder = sorted(all_decoder) + + date_range = [] + if date_from is not None and date_to is not None: + #change format + try: + if len(date_from) != 8: + date_from = date_from[0:4] + date_from[5:7] + date_from[8:10] + date_to = date_to[0:4] + date_to[5:7] + date_to[8:10] + date_range = substract_date(date_from, date_to) + except: + pass + + if not date_range: + date_range.append(datetime.date.today().strftime("%Y%m%d")) + + nb_decoded = {} + for decoder in all_decoder: + nb_decoded[decoder] = 0 + + for date in date_range: + for decoder in all_decoder: + if typ is None: + nb_decod = r_serv_metadata.get(decoder+'_decoded:'+date) + else: + nb_decod = r_serv_metadata.zscore(decoder+'_type:'+typ, date) + + if nb_decod is not None: + nb_decoded[decoder] = nb_decoded[decoder] + int(nb_decod) + + to_json = [] + for decoder in all_decoder: + to_json.append({'name': decoder, 'value': nb_decoded[decoder]}) + return jsonify(to_json) + + +@hashDecoded.route('/hashDecoded/daily_type_json') +def daily_type_json(): + date = request.args.get('date') + + daily_type = set() + l_b64 = r_serv_metadata.zrange('hash_date:' +date, 0, -1) + for hash in l_b64: + estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') + if estimated_type is not None: + daily_type.add(estimated_type) + + type_value = [] + for day_type in daily_type: + num_day_type = r_serv_metadata.zscore('hash_type:'+day_type, date) + type_value.append({ 'date' : day_type, 'value' : int( num_day_type )}) + + return jsonify(type_value) + +@hashDecoded.route('/hashDecoded/range_type_json') +def range_type_json(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + + date_range = [] + if date_from is not None and date_to is not None: + #change format + if len(date_from) != 8: + date_from = date_from[0:4] + date_from[5:7] + date_from[8:10] + date_to = date_to[0:4] + date_to[5:7] + date_to[8:10] + date_range = substract_date(date_from, date_to) + + if not date_range: + date_range.append(datetime.date.today().strftime("%Y%m%d")) + + all_type = set() + for date in date_range: + l_hash = r_serv_metadata.zrange('hash_date:' +date, 0, -1) + if l_hash: + for hash in l_hash: + estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') + all_type.add(estimated_type) + + range_type = [] + + for date in date_range: + if len(date_range) == 1: + if date==date_from and date==date_to: + for type in all_type: + day_type = {} + day_type['date']= type + list_decoder = r_serv_metadata.smembers('all_decoder') + for decoder in list_decoder: + num_day_decoder = r_serv_metadata.zscore(decoder+'_type:'+type, date) + if num_day_decoder is None: + num_day_decoder = 0 + day_type[decoder]= num_day_decoder + range_type.append(day_type) + else: + range_type = '' + else: + day_type = {} + day_type['date']= date[0:4] + '-' + date[4:6] + '-' + date[6:8] + for type in all_type: + num_day_type = r_serv_metadata.zscore('hash_type:'+type, date) + if num_day_type is None: + num_day_type = 0 + day_type[type]= num_day_type + range_type.append(day_type) + + return jsonify(range_type) + +@hashDecoded.route('/hashDecoded/hash_graph_line_json') +def hash_graph_line_json(): + hash = request.args.get('hash') + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + + #hash = '9c748d28d78a64aef99e7ba866a433eb635c6d7a' + + if date_from is None or date_to is None: + nb_days_seen_in_pastes = 30 + else: + # # TODO: # FIXME: + nb_days_seen_in_pastes = 30 + + date_range_seen_in_pastes = get_date_range(nb_days_seen_in_pastes) + + #verify input + if r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') is not None: + json_seen_in_paste = [] + for date in date_range_seen_in_pastes: + nb_seen_this_day = r_serv_metadata.zscore('hash_date:'+date, hash) + if nb_seen_this_day is None: + nb_seen_this_day = 0 + date = date[0:4] + '-' + date[4:6] + '-' + date[6:8] + json_seen_in_paste.append({ 'date' : date, 'value' : int( nb_seen_this_day )}) + + return jsonify(json_seen_in_paste) + else: + return jsonify() + + +@hashDecoded.route('/hashDecoded/hash_graph_node_json') +def hash_graph_node_json(): + hash = request.args.get('hash') + + estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') + + if hash is not None and estimated_type is not None: + + nodes_set_hash = set() + nodes_set_paste = set() + links_set = set() + + url = hash + first_seen = r_serv_metadata.hget('metadata_hash:'+hash, 'first_seen') + last_seen = r_serv_metadata.hget('metadata_hash:'+hash, 'last_seen') + nb_seen_in_paste = r_serv_metadata.hget('metadata_hash:'+hash, 'nb_seen_in_all_pastes') + size = r_serv_metadata.hget('metadata_hash:'+hash, 'size') + + nodes_set_hash.add((hash, 1, first_seen, last_seen, estimated_type, nb_seen_in_paste, size, url)) + + #get related paste + l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1) + for paste in l_pastes: + url = paste + #nb_seen_in_this_paste = nb_in_file = int(r_serv_metadata.zscore('nb_seen_hash:'+hash, paste)) + nb_hash_in_paste = r_serv_metadata.scard('hash_paste:'+paste) + + nodes_set_paste.add((paste, 2,nb_hash_in_paste,url)) + links_set.add((hash, paste)) + + l_hash = r_serv_metadata.smembers('hash_paste:'+paste) + for child_hash in l_hash: + if child_hash != hash: + url = child_hash + first_seen = r_serv_metadata.hget('metadata_hash:'+child_hash, 'first_seen') + last_seen = r_serv_metadata.hget('metadata_hash:'+child_hash, 'last_seen') + nb_seen_in_paste = r_serv_metadata.hget('metadata_hash:'+child_hash, 'nb_seen_in_all_pastes') + size = r_serv_metadata.hget('metadata_hash:'+child_hash, 'size') + estimated_type = r_serv_metadata.hget('metadata_hash:'+child_hash, 'estimated_type') + + nodes_set_hash.add((child_hash, 3, first_seen, last_seen, estimated_type, nb_seen_in_paste, size, url)) + links_set.add((child_hash, paste)) + + #l_pastes_child = r_serv_metadata.zrange('nb_seen_hash:'+child_hash, 0, -1) + #for child_paste in l_pastes_child: + + nodes = [] + for node in nodes_set_hash: + nodes.append({"id": node[0], "group": node[1], "first_seen": node[2], "last_seen": node[3], 'estimated_type': node[4], "nb_seen_in_paste": node[5], "size": node[6], 'icon': get_file_icon_text(node[4]),"url": url_for('hashDecoded.showHash', hash=node[7]), 'hash': True}) + for node in nodes_set_paste: + nodes.append({"id": node[0], "group": node[1], "nb_seen_in_paste": node[2],"url": url_for('showsavedpastes.showsavedpaste', paste=node[3]), 'hash': False}) + links = [] + for link in links_set: + links.append({"source": link[0], "target": link[1]}) + json = {"nodes": nodes, "links": links} + return jsonify(json) + + else: + return jsonify({}) + +@hashDecoded.route('/hashDecoded/hash_types') +def hash_types(): + date_from = 20180701 + date_to = 20180706 + return render_template('hash_types.html', date_from=date_from, date_to=date_to) + +@hashDecoded.route('/hashDecoded/send_file_to_vt_js') +def send_file_to_vt_js(): + hash = request.args.get('hash') + + b64_path = r_serv_metadata.hget('metadata_hash:'+hash, 'saved_path') + b64_full_path = os.path.join(os.environ['AIL_HOME'], b64_path) + b64_content = '' + with open(b64_full_path, 'rb') as f: + b64_content = f.read() + + files = {'file': (hash, b64_content)} + response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan', files=files, params= {'apikey': vt_auth}) + json_response = response.json() + #print(json_response) + + vt_link = json_response['permalink'].split('analysis')[0] + 'analysis/' + r_serv_metadata.hset('metadata_hash:'+hash, 'vt_link', vt_link) + vt_report = 'Please Refresh' + r_serv_metadata.hset('metadata_hash:'+hash, 'vt_report', vt_report) + + return jsonify({'vt_link': vt_link, 'vt_report': vt_report}) + + +@hashDecoded.route('/hashDecoded/update_vt_result') +def update_vt_result(): + hash = request.args.get('hash') + + params = {'apikey': vt_auth, 'resource': hash} + response = requests.get('https://www.virustotal.com/vtapi/v2/file/report',params=params) + if response.status_code == 200: + json_response = response.json() + response_code = json_response['response_code'] + # report exist + if response_code == 1: + total = json_response['total'] + positive = json_response['positives'] + + b64_vt_report = 'Detection {}/{}'.format(positive,total) + # no report found + elif response_code == 0: + b64_vt_report = 'No report found' + pass + # file in queue + elif response_code == -2: + b64_vt_report = 'File in queue' + pass + + r_serv_metadata.hset('metadata_hash:'+hash, 'vt_report', b64_vt_report) + return jsonify(hash=hash, report_vt=b64_vt_report) + elif response.status_code == 403: + Flask_config.vt_enabled = False + print('VT is disabled') + return jsonify() + else: + # TODO FIXME make json response + return jsonify() + +# ========= REGISTRATION ========= +app.register_blueprint(hashDecoded) diff --git a/var/www/modules/hashDecoded/templates/hashDecoded.html b/var/www/modules/hashDecoded/templates/hashDecoded.html new file mode 100644 index 00000000..f65f647b --- /dev/null +++ b/var/www/modules/hashDecoded/templates/hashDecoded.html @@ -0,0 +1,680 @@ + + + + + + + + HashesDecoded - AIL + + + + + + + + + + + + + + + + + + + + + + + {% include 'navbar.html' %} + +
+
+
+

Hashed Files

+
+ +
+
+ + +
+
+
+
+
+ +
+
+
+ Select a date range : + +
+ + +
+
+ + +
+ Encoding : + + File Type : + +
+ + +
+
+
+
+
+
+
+ + + {% if l_64|length != 0 %} + {% if date_from|string == date_to|string %} +

{{ date_from }} Hashed files:

+ {% else %} +

{{ date_from }} to {{ date_to }} Hashed files:

+ {% endif %} + + + + + + + + + + + + + + + {% for b64 in l_64 %} + + + + + + + + + + + {% endfor %} + +
estimated typehashfirst seenlast seennb pastesizeVirus TotalSparkline
  {{ b64[1] }}{{ b64[2] }}{{ b64[5] }}{{ b64[6] }}{{ b64[3] }}{{ b64[4] }} + {% if vt_enabled %} + {% if not b64[7] %} + + + + {% else %} + VT Report + {% endif %} + + {% else %} + Virus Total submission is disabled + {% endif %} + + +
+ {% else %} + {% if date_from|string == date_to|string %} +

{{ date_from }}, No Hashes

+ {% else %} +

{{ date_from }} to {{ date_to }}, No Hashes

+ {% endif %} + {% endif %} +
+ +
+ + + + + + + + + + + + + + + + + + + diff --git a/var/www/modules/hashDecoded/templates/header_hashDecoded.html b/var/www/modules/hashDecoded/templates/header_hashDecoded.html new file mode 100644 index 00000000..69fb9da9 --- /dev/null +++ b/var/www/modules/hashDecoded/templates/header_hashDecoded.html @@ -0,0 +1 @@ +
  • hashesDecoded
  • diff --git a/var/www/modules/hashDecoded/templates/showHash.html b/var/www/modules/hashDecoded/templates/showHash.html new file mode 100644 index 00000000..458c4c92 --- /dev/null +++ b/var/www/modules/hashDecoded/templates/showHash.html @@ -0,0 +1,611 @@ + + + + + + + + Hash Information - AIL + + + + + + + + + + + + + + + + + + + + + + + {% include 'navbar.html' %} + +
    +
    + + +
    + + +
    +
    +

    {{ hash }} :

    +     + 6 / 26 +
    • + +
      +
      + + + + + + + + + + + + + + + + + + + + +
      Estimated typeFirst_seenLast_seenSize (Kb)Nb seen
        {{ estimated_type }}{{ first_seen }}{{ last_seen }}{{ size }}{{ nb_seen_in_all_pastes }}
      +
      +
      +
      +
      +
      +
    + + {% if vt_enabled %} + {% if not b64_vt %} + + + + {% else %} + VT Report + {% endif %} + + {% else %} + Virus Total submission is disabled + {% endif %} + + + + +
    + +
    +
    + +
    +
    + Graph +
    +
    +
    +
    +
    +
    +
    + +
    + +
    +
    + Encoding +
    +
    + {% for encoding in list_hash_decoder %} + + {% endfor %} +
    +
    + +
    +
    + Graph +
    +
    +
    + + + +
      +
    • +
    • +

      Double click on a node to open Hash/Paste

      + + Current Hash
      + + Hashes
      + + Pastes +

      +
    • +
    • + Hash Types: +
    • +
    • + Application
      + Audio
      + Image
      + Text
      + Other +
    • +
    +
    +
    +
    + +
    +
    + Graph +
    +
    +
    +
    +
    +
    + +
    + + + + + + + + + + + + + + + + + + diff --git a/var/www/modules/showpaste/Flask_showpaste.py b/var/www/modules/showpaste/Flask_showpaste.py index 6fa5a983..39e2283e 100644 --- a/var/www/modules/showpaste/Flask_showpaste.py +++ b/var/www/modules/showpaste/Flask_showpaste.py @@ -7,12 +7,18 @@ import redis import os import json +import os import flask +<<<<<<< HEAD from flask import Flask, render_template, jsonify, request, Blueprint, make_response, Response, send_from_directory +======= +from flask import Flask, render_template, jsonify, request, Blueprint, make_response, redirect, url_for, Response, send_from_directory +>>>>>>> master import difflib import ssdeep import Paste +import requests # ============ VARIABLES ============ import Flask_config @@ -30,14 +36,16 @@ DiffMaxLineLength = Flask_config.DiffMaxLineLength bootstrap_label = Flask_config.bootstrap_label misp_event_url = Flask_config.misp_event_url hive_case_url = Flask_config.hive_case_url +vt_enabled = Flask_config.vt_enabled SCREENSHOT_FOLDER = Flask_config.SCREENSHOT_FOLDER showsavedpastes = Blueprint('showsavedpastes', __name__, template_folder='templates') # ============ FUNCTIONS ============ -def showpaste(content_range): - requested_path = request.args.get('paste', '') +def showpaste(content_range, requested_path): + vt_enabled = Flask_config.vt_enabled + paste = Paste.Paste(requested_path) p_date = str(paste._get_p_date()) p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] @@ -121,7 +129,6 @@ def showpaste(content_range): else: automatic = False - tag_hash = ssdeep.hash(tag) if r_serv_statistics.sismember('tp:'+tag, requested_path): tag_status_tp = True else: @@ -133,6 +140,40 @@ def showpaste(content_range): list_tags.append( (tag, automatic, tag_status_tp, tag_status_fp) ) + l_64 = [] + # load hash files + if r_serv_metadata.scard('hash_paste:'+requested_path) > 0: + set_b64 = r_serv_metadata.smembers('hash_paste:'+requested_path) + for hash in set_b64: + nb_in_file = int(r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)) + estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') + file_type = estimated_type.split('/')[0] + # set file icon + if file_type == 'application': + file_icon = 'fa-file-o ' + elif file_type == 'audio': + file_icon = 'fa-file-video-o ' + elif file_type == 'image': + file_icon = 'fa-file-image-o' + elif file_type == 'text': + file_icon = 'fa-file-text-o' + else: + file_icon = 'fa-file' + saved_path = r_serv_metadata.hget('metadata_hash:'+hash, 'saved_path') + if r_serv_metadata.hexists('metadata_hash:'+hash, 'vt_link'): + b64_vt = True + b64_vt_link = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_link') + b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report') + else: + b64_vt = False + b64_vt_link = '' + b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report') + # hash never refreshed + if b64_vt_report is None: + b64_vt_report = '' + + l_64.append( (file_icon, estimated_type, hash, saved_path, nb_in_file, b64_vt, b64_vt_link, b64_vt_report) ) + crawler_metadata = {} if 'infoleak:submission="crawler"' in l_tags: crawler_metadata['get_metadata'] = True @@ -171,13 +212,15 @@ def showpaste(content_range): return render_template("show_saved_paste.html", date=p_date, bootstrap_label=bootstrap_label, active_taxonomies=active_taxonomies, active_galaxies=active_galaxies, list_tags=list_tags, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list, date_list=p_date_list, crawler_metadata=crawler_metadata, - misp=misp, hive=hive, misp_eventid=misp_eventid, misp_url=misp_url, hive_caseid=hive_caseid, hive_url=hive_url) + l_64=l_64, vt_enabled=vt_enabled, misp=misp, hive=hive, misp_eventid=misp_eventid, misp_url=misp_url, hive_caseid=hive_caseid, hive_url=hive_url) # ============ ROUTES ============ @showsavedpastes.route("/showsavedpaste/") #completely shows the paste in a new tab def showsavedpaste(): - return showpaste(0) + requested_path = request.args.get('paste', '') + print(requested_path) + return showpaste(0, requested_path) @showsavedpastes.route("/showsavedrawpaste/") #shows raw def showsavedrawpaste(): @@ -189,7 +232,8 @@ def showsavedrawpaste(): @showsavedpastes.route("/showpreviewpaste/") def showpreviewpaste(): num = request.args.get('num', '') - return "|num|"+num+"|num|"+showpaste(max_preview_modal) + requested_path = request.args.get('paste', '') + return "|num|"+num+"|num|"+showpaste(max_preview_modal, requested_path) @showsavedpastes.route("/getmoredata/") @@ -220,5 +264,26 @@ def showDiff(): def screenshot(filename): return send_from_directory(SCREENSHOT_FOLDER, filename+'.png', as_attachment=True) +@showsavedpastes.route('/send_file_to_vt/', methods=['POST']) +def send_file_to_vt(): + b64_path = request.form['b64_path'] + paste = request.form['paste'] + hash = request.form['hash'] + + b64_full_path = os.path.join(os.environ['AIL_HOME'], b64_path) + b64_content = '' + with open(b64_full_path, 'rb') as f: + b64_content = f.read() + + files = {'file': (hash, b64_content)} + response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan', files=files, params=vt_auth) + json_response = response.json() + print(json_response) + + vt_b64_link = json_response['permalink'].split('analysis')[0] + 'analysis/' + r_serv_metadata.hset('metadata_hash:'+hash, 'vt_link', vt_b64_link) + + return redirect(url_for('showsavedpastes.showsavedpaste', paste=paste)) + # ========= REGISTRATION ========= app.register_blueprint(showsavedpastes) diff --git a/var/www/modules/showpaste/templates/show_saved_paste.html b/var/www/modules/showpaste/templates/show_saved_paste.html index 866f64c1..54ea99b5 100644 --- a/var/www/modules/showpaste/templates/show_saved_paste.html +++ b/var/www/modules/showpaste/templates/show_saved_paste.html @@ -28,6 +28,11 @@ overflow-x: hidden; width:100%; } + + .red_table thead{ + background: #d91f2d; + color: #fff; + } @@ -374,6 +379,47 @@ {% endif %} + {% if l_64|length != 0 %} +

    Hash files:

    + + + + + + + + + + + {% for b64 in l_64 %} + + + + + + + {% endfor %} + +
    estimated typehashsaved_pathVirus Total
      {{ b64[1] }}{{ b64[2] }} ({{ b64[4] }}){{ b64[3] }} + {% if vt_enabled %} + {% if not b64[5] %} + + + + {% else %} + VT Report + {% endif %} + + {% else %} + Virus Total submission is disabled + {% endif %} +
    + {% endif %} + {% if crawler_metadata['get_metadata'] %}
    @@ -443,9 +489,36 @@ }); $('#tableDup').DataTable(); + $('#tableb64').DataTable({ + "aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]], + "iDisplayLength": 5, + "order": [[ 1, "asc" ]] + }); }); + +