ail-framework/bin/Decoder.py

160 lines
5.4 KiB
Python
Raw Permalink Normal View History

2018-07-19 14:52:09 +00:00
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Decoder module
Dectect Binary and decode it
"""
import time
import os
import redis
import base64
from hashlib import sha1
import magic
import json
import datetime
from pubsublogger import publisher
from Helper import Process
from packages import Item
2018-07-19 14:52:09 +00:00
from lib import Decoded
2018-07-19 14:52:09 +00:00
import re
import signal
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
def hex_decoder(hexStr):
#hexStr = ''.join( hex_string.split(" ") )
return bytes(bytearray([int(hexStr[i:i+2], 16) for i in range(0, len(hexStr), 2)]))
def binary_decoder(binary_string):
return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)]))
def base64_decoder(base64_string):
return base64.b64decode(base64_string)
def decode_string(content, item_id, item_date, encoded_list, decoder_name, encoded_min_size):
2018-07-19 14:52:09 +00:00
find = False
for encoded in encoded_list:
if len(encoded) >= encoded_min_size:
decoded_file = decoder_function[decoder_name](encoded)
2018-07-19 14:52:09 +00:00
find = True
sha1_string = sha1(decoded_file).hexdigest()
mimetype = Decoded.get_file_mimetype(decoded_file)
if not mimetype:
print(item_id)
print(sha1_string)
raise Exception('Invalid mimetype')
Decoded.save_decoded_file_content(sha1_string, decoded_file, item_date, mimetype=mimetype)
Decoded.save_item_relationship(sha1_string, item_id)
Decoded.create_decoder_matadata(sha1_string, item_id, decoder_name)
2018-07-19 14:52:09 +00:00
#remove encoded from item content
2018-07-19 14:52:09 +00:00
content = content.replace(encoded, '', 1)
print('{} : {} - {}'.format(item_id, decoder_name, mimetype))
2018-07-19 14:52:09 +00:00
if(find):
set_out_item(decoder_name, item_id)
2018-07-19 14:52:09 +00:00
return content
def set_out_item(decoder_name, item_id):
2018-07-19 14:52:09 +00:00
publisher.warning(decoder_name+' decoded')
#Send to duplicate
p.populate_set_out(item_id, 'Duplicate')
2018-07-19 14:52:09 +00:00
msg = 'infoleak:automatic-detection="'+decoder_name+'";{}'.format(item_id)
2018-07-19 14:52:09 +00:00
p.populate_set_out(msg, 'Tags')
if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
# Port of the redis instance used by pubsublogger
publisher.port = 6380
# Script is the default channel used for the modules.
publisher.channel = 'Script'
# Section name in bin/packages/modules.cfg
config_section = 'Decoder'
# Setup the I/O queues
p = Process(config_section)
serv_metadata = redis.StrictRedis(
host=p.config.get("ARDB_Metadata", "host"),
port=p.config.getint("ARDB_Metadata", "port"),
db=p.config.getint("ARDB_Metadata", "db"),
decode_responses=True)
# Sent to the logging a description of the module
publisher.info("Decoder started")
regex_binary = '[0-1]{40,}'
#regex_hex = '(0[xX])?[A-Fa-f0-9]{40,}'
regex_hex = '[A-Fa-f0-9]{40,}'
regex_base64 = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)'
2018-07-20 07:43:09 +00:00
re.compile(regex_binary)
2018-07-19 14:52:09 +00:00
re.compile(regex_hex)
2018-07-20 07:43:09 +00:00
re.compile(regex_base64)
2018-07-19 14:52:09 +00:00
# map decoder function
decoder_function = {'binary':binary_decoder,'hexadecimal':hex_decoder, 'base64':base64_decoder}
2018-07-19 14:52:09 +00:00
hex_max_execution_time = p.config.getint("Hex", "max_execution_time")
binary_max_execution_time = p.config.getint("Binary", "max_execution_time")
base64_max_execution_time = p.config.getint("Base64", "max_execution_time")
2018-07-25 08:47:36 +00:00
# list all decoder yith regex,
decoder_binary = {'name': 'binary', 'regex': regex_binary, 'encoded_min_size': 300, 'max_execution_time': binary_max_execution_time}
decoder_hexadecimal = {'name': 'hexadecimal', 'regex': regex_hex, 'encoded_min_size': 300, 'max_execution_time': hex_max_execution_time}
decoder_base64 = {'name': 'base64', 'regex': regex_base64, 'encoded_min_size': 40, 'max_execution_time': base64_max_execution_time}
decoder_order = [ decoder_base64, decoder_binary, decoder_hexadecimal, decoder_base64]
2018-07-19 14:52:09 +00:00
2018-07-25 08:47:36 +00:00
for decoder in decoder_order:
2018-07-19 14:52:09 +00:00
serv_metadata.sadd('all_decoder', decoder['name'])
# Endless loop getting messages from the input queue
while True:
# Get one message from the input queue
message = p.get_from_set()
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
time.sleep(1)
continue
obj_id = Item.get_item_id(message)
2018-07-19 14:52:09 +00:00
# Do something with the message from the queue
content = Item.get_item_content(obj_id)
date = Item.get_item_date(obj_id)
2018-07-19 14:52:09 +00:00
2018-07-25 08:47:36 +00:00
for decoder in decoder_order: # add threshold and size limit
2018-07-19 14:52:09 +00:00
# max execution time on regex
signal.alarm(decoder['max_execution_time'])
try:
encoded_list = re.findall(decoder['regex'], content)
except TimeoutException:
encoded_list = []
p.incr_module_timeout_statistic() # add encoder type
print ("{0} processing timeout".format(obj_id))
2018-07-19 14:52:09 +00:00
continue
else:
signal.alarm(0)
if(len(encoded_list) > 0):
content = decode_string(content, message, date, encoded_list, decoder['name'], decoder['encoded_min_size'])