ail-framework/bin/Decoder.py

171 lines
5.7 KiB
Python
Raw Normal View History

2018-07-19 14:52:09 +00:00
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Decoder module
Dectect Binary and decode it
"""
##################################
# Import External packages
##################################
2018-07-19 14:52:09 +00:00
import time
import os
import redis
import base64
from hashlib import sha1
import magic
import json
import datetime
from pubsublogger import publisher
import re
import signal
from lib import Decoded
2018-07-19 14:52:09 +00:00
##################################
# Import Project packages
##################################
from module.abstract_module import AbstractModule
2018-07-19 14:52:09 +00:00
from Helper import Process
from packages import Item
import ConfigLoader
2018-07-19 14:52:09 +00:00
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
class Decoder(AbstractModule):
"""
Decoder module for AIL framework
"""
2018-07-19 14:52:09 +00:00
# TODO to lambda expr
def hex_decoder(self, hexStr):
#hexStr = ''.join( hex_string.split(" ") )
return bytes(bytearray([int(hexStr[i:i+2], 16) for i in range(0, len(hexStr), 2)]))
2018-07-19 14:52:09 +00:00
# TODO to lambda expr
def binary_decoder(self, binary_string):
return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)]))
2018-07-19 14:52:09 +00:00
# TODO to lambda expr
def base64_decoder(self, base64_string):
return base64.b64decode(base64_string)
2018-07-19 14:52:09 +00:00
def __init__(self):
super(Decoder, self).__init__()
2018-07-19 14:52:09 +00:00
serv_metadata = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Metadata")
2018-07-19 14:52:09 +00:00
regex_binary = '[0-1]{40,}'
#regex_hex = '(0[xX])?[A-Fa-f0-9]{40,}'
regex_hex = '[A-Fa-f0-9]{40,}'
regex_base64 = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)'
2018-07-19 14:52:09 +00:00
cmp_regex_binary = re.compile(regex_binary)
cmp_regex_hex = re.compile(regex_hex)
cmp_regex_base64 = re.compile(regex_base64)
2018-07-19 14:52:09 +00:00
# map decoder function
self.decoder_function = {'binary':self.binary_decoder,'hexadecimal':self.hex_decoder, 'base64':self.base64_decoder}
2018-07-19 14:52:09 +00:00
hex_max_execution_time = self.process.config.getint("Hex", "max_execution_time")
binary_max_execution_time = self.process.config.getint("Binary", "max_execution_time")
base64_max_execution_time = self.process.config.getint("Base64", "max_execution_time")
2018-07-19 14:52:09 +00:00
# list all decoder with regex,
decoder_binary = {'name': 'binary', 'regex': cmp_regex_binary, 'encoded_min_size': 300, 'max_execution_time': binary_max_execution_time}
decoder_hexadecimal = {'name': 'hexadecimal', 'regex': cmp_regex_hex, 'encoded_min_size': 300, 'max_execution_time': hex_max_execution_time}
decoder_base64 = {'name': 'base64', 'regex': cmp_regex_base64, 'encoded_min_size': 40, 'max_execution_time': base64_max_execution_time}
2018-07-19 14:52:09 +00:00
self.decoder_order = [ decoder_base64, decoder_binary, decoder_hexadecimal, decoder_base64]
2018-07-19 14:52:09 +00:00
for decoder in self.decoder_order:
serv_metadata.sadd('all_decoder', decoder['name'])
2018-07-25 08:47:36 +00:00
# Waiting time in secondes between to message proccessed
self.pending_seconds = 1
2018-07-19 14:52:09 +00:00
# Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized')
2018-07-19 14:52:09 +00:00
def compute(self, message):
2018-07-19 14:52:09 +00:00
obj_id = Item.get_item_id(message)
2018-07-19 14:52:09 +00:00
# Extract info from message
content = Item.get_item_content(obj_id)
date = Item.get_item_date(obj_id)
2018-07-19 14:52:09 +00:00
for decoder in self.decoder_order: # add threshold and size limit
2018-07-19 14:52:09 +00:00
# max execution time on regex
signal.alarm(decoder['max_execution_time'])
2018-07-19 14:52:09 +00:00
try:
encoded_list = decoder['regex'].findall(content)
2018-07-19 14:52:09 +00:00
except TimeoutException:
encoded_list = []
self.process.incr_module_timeout_statistic() # add encoder type
self.redis_logger.debug(f"{obj_id} processing timeout")
2018-07-19 14:52:09 +00:00
continue
else:
signal.alarm(0)
if(len(encoded_list) > 0):
content = self.decode_string(content, message, date, encoded_list, decoder['name'], decoder['encoded_min_size'])
def decode_string(self, content, item_id, item_date, encoded_list, decoder_name, encoded_min_size):
find = False
for encoded in encoded_list:
if len(encoded) >= encoded_min_size:
decoded_file = self.decoder_function[decoder_name](encoded)
find = True
sha1_string = sha1(decoded_file).hexdigest()
mimetype = Decoded.get_file_mimetype(decoded_file)
if not mimetype:
self.redis_logger.debug(item_id)
self.redis_logger.debug(sha1_string)
raise Exception('Invalid mimetype')
Decoded.save_decoded_file_content(sha1_string, decoded_file, item_date, mimetype=mimetype)
Decoded.save_item_relationship(sha1_string, item_id)
Decoded.create_decoder_matadata(sha1_string, item_id, decoder_name)
#remove encoded from item content
content = content.replace(encoded, '', 1)
self.redis_logger.debug(f'{item_id} : {decoder_name} - {mimetype}')
if(find):
self.set_out_item(decoder_name, item_id)
return content
def set_out_item(self, decoder_name, item_id):
self.redis_logger.warning(f'{decoder_name} decoded')
# Send to duplicate
self.process.populate_set_out(item_id, 'Duplicate')
# Send to Tags
msg = f'infoleak:automatic-detection="{decoder_name}";{item_id}'
self.process.populate_set_out(msg, 'Tags')
if __name__ == '__main__':
module = Decoder()
module.run()