From 0e67b569063b146694755d7d471d05c173ad3b44 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 19 Jul 2018 16:52:09 +0200 Subject: [PATCH] add: Decoder module --- bin/Decoder.py | 230 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 230 insertions(+) create mode 100755 bin/Decoder.py diff --git a/bin/Decoder.py b/bin/Decoder.py new file mode 100755 index 00000000..b8ed92aa --- /dev/null +++ b/bin/Decoder.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" + Decoder module + + Dectect Binary and decode it +""" +import time +import os +import redis +import base64 +from hashlib import sha1 +import magic +import json +import datetime + +from pubsublogger import publisher + +from Helper import Process +from packages import Paste + +import re +import signal + +class TimeoutException(Exception): + pass + +def timeout_handler(signum, frame): + raise TimeoutException + +signal.signal(signal.SIGALRM, timeout_handler) + +def hex_decoder(hexStr): + #hexStr = ''.join( hex_string.split(" ") ) + print( bytes(bytearray([int(hexStr[i:i+2], 16) for i in range(0, len(hexStr), 2)])) ) + return bytes(bytearray([int(hexStr[i:i+2], 16) for i in range(0, len(hexStr), 2)])) + +def binary_decoder(binary_string): + return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)])) + +def base64_decoder(base64_string): + return base64.b64decode(base64_string) + +def decode_string(content, message, date, encoded_list, decoder_name, encoded_min_size): + find = False + print('list') + print(encoded_min_size) + print(encoded_list) + for encoded in encoded_list: + print(len(encoded)) + if len(encoded) >= encoded_min_size: + decode = decoder_function[decoder_name](encoded) + find = True + + save_hash(decoder_name, message, date, decode) + + #remove encoded from paste content + content = content.replace(encoded, '', 1) + + if(find): + set_out_paste(decoder_name, message) + + return content + +# # TODO: FIXME check db +def save_hash(decoder_name, message, date, decoded): + type = magic.from_buffer(decoded, mime=True) + print(type) + hash = sha1(decoded).hexdigest() + print(hash) + + data = {} + data['name'] = hash + data['date'] = datetime.datetime.now().strftime("%d/%m/%y") + data['origin'] = message + data['estimated type'] = type + json_data = json.dumps(data) + + date_paste = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8]) + date_key = date[0:4] + date[4:6] + date[6:8] + + serv_metadata.zincrby(decoder_name+'_date:'+date_key, hash, 1) + + # first time we see this hash + if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'): + serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_paste) + serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) + else: + serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) + + # first time we see this file encoding on this paste + if serv_metadata.zscore(decoder_name+'_hash:'+hash, message) is None: + print('first '+decoder_name) + serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1) + + serv_metadata.sadd(decoder_name+'_paste:'+message, hash) # paste - hash map + serv_metadata.sadd(decoder_name+'_paste:'+message, hash) # paste - hash map + serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1)# hash - paste map + + # create hash metadata + serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type) + serv_metadata.sadd('hash_all_type', type) + serv_metadata.sadd('hash_'+ decoder_name +'_all_type', type) + serv_metadata.zincrby(decoder_name+'_type:'+type, date_key, 1) + + save_hash_on_disk(decoded, type, hash, json_data) + print('found {} '.format(type)) + # duplicate + else: + serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste + + +def save_hash_on_disk(decode, type, hash, json_data): + + local_filename_hash = os.path.join(p.config.get("Directories", "hash"), type, hash[:2], hash) + filename_hash = os.path.join(os.environ['AIL_HOME'], local_filename_hash) + + filename_json = os.path.join(os.environ['AIL_HOME'], + p.config.get("Directories", "hash"), type, hash[:2], hash + '.json') + + dirname = os.path.dirname(filename_hash) + if not os.path.exists(dirname): + os.makedirs(dirname) + + with open(filename_hash, 'wb') as f: + f.write(decode) + + # create hash metadata + serv_metadata.hset('metadata_hash:'+hash, 'saved_path', local_filename_hash) + serv_metadata.hset('metadata_hash:'+hash, 'size', os.path.getsize(filename_hash)) + + with open(filename_json, 'w') as f: + f.write(json_data) + +def set_out_paste(decoder_name, message): + publisher.warning(decoder_name+' decoded') + #Send to duplicate + p.populate_set_out(message, 'Duplicate') + #send to Browse_warning_paste + msg = (decoder_name+';{}'.format(message)) + p.populate_set_out( msg, 'alertHandler') + + msg = 'infoleak:automatic-detection="'+decoder_name+'";{}'.format(message) + p.populate_set_out(msg, 'Tags') + + +if __name__ == '__main__': + # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) + # Port of the redis instance used by pubsublogger + publisher.port = 6380 + # Script is the default channel used for the modules. + publisher.channel = 'Script' + + # Section name in bin/packages/modules.cfg + config_section = 'Decoder' + + # Setup the I/O queues + p = Process(config_section) + + serv_metadata = redis.StrictRedis( + host=p.config.get("ARDB_Metadata", "host"), + port=p.config.getint("ARDB_Metadata", "port"), + db=p.config.getint("ARDB_Metadata", "db"), + decode_responses=True) + + # Sent to the logging a description of the module + publisher.info("Decoder started") + + regex_binary = '[0-1]{40,}' + #regex_hex = '(0[xX])?[A-Fa-f0-9]{40,}' + regex_hex = '[A-Fa-f0-9]{40,}' + regex_base64 = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)' + + '''re.compile(regex_binary) + re.compile(regex_hex) + re.compile(regex_base64)''' + + # map decoder function + decoder_function = {'binary':binary_decoder,'hex':hex_decoder, 'base64':base64_decoder} + + hex_max_execution_time = p.config.getint("Hex", "max_execution_time") + binary_max_execution_time = p.config.getint("Binary", "max_execution_time") + base64_max_execution_time = p.config.getint("Base64", "max_execution_time") + + # list all decoder yith regex, the order is use to search content by order + all_decoder = [ {'name': 'binary', 'regex': regex_binary, 'encoded_min_size': 300, 'max_execution_time': binary_max_execution_time}, + {'name': 'hex', 'regex': regex_hex, 'encoded_min_size': 300, 'max_execution_time': hex_max_execution_time}, + {'name': 'base64', 'regex': regex_base64, 'encoded_min_size': 40, 'max_execution_time': base64_max_execution_time}] + + for decoder in all_decoder: + serv_metadata.sadd('all_decoder', decoder['name']) + + # Endless loop getting messages from the input queue + while True: + # Get one message from the input queue + message = p.get_from_set() + if message is None: + + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue + + filename = message + paste = Paste.Paste(filename) + + # Do something with the message from the queue + content = paste.get_p_content() + date = str(paste._get_p_date()) + + for decoder in all_decoder: # add threshold and size limit + print(decoder['name']) + + # max execution time on regex + signal.alarm(decoder['max_execution_time']) + try: + print(content) + encoded_list = re.findall(decoder['regex'], content) + #encoded_list = re.findall(decoder['regex'], content) + print(decoder['regex']) + print(encoded_list) + except TimeoutException: + encoded_list = [] + p.incr_module_timeout_statistic() # add encoder type + print ("{0} processing timeout".format(paste.p_path)) + continue + else: + signal.alarm(0) + + if(len(encoded_list) > 0): + content = decode_string(content, message, date, encoded_list, decoder['name'], decoder['encoded_min_size'])