mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
perf: use defined compiled regex
re.compile(regex) definition was not used use compile_regex.findAll() directely instead of re.findall(regex)
This commit is contained in:
parent
4d6de7a397
commit
22693dca1c
1 changed files with 111 additions and 97 deletions
210
bin/Decoder.py
210
bin/Decoder.py
|
@ -5,6 +5,10 @@
|
||||||
|
|
||||||
Dectect Binary and decode it
|
Dectect Binary and decode it
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import External packages
|
||||||
|
##################################
|
||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
import redis
|
import redis
|
||||||
|
@ -13,16 +17,19 @@ from hashlib import sha1
|
||||||
import magic
|
import magic
|
||||||
import json
|
import json
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
import re
|
||||||
|
import signal
|
||||||
|
from lib import Decoded
|
||||||
|
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from module.abstract_module import AbstractModule
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
from packages import Item
|
from packages import Item
|
||||||
|
|
||||||
from lib import Decoded
|
|
||||||
|
|
||||||
import re
|
|
||||||
import signal
|
|
||||||
|
|
||||||
class TimeoutException(Exception):
|
class TimeoutException(Exception):
|
||||||
pass
|
pass
|
||||||
|
@ -32,28 +39,108 @@ def timeout_handler(signum, frame):
|
||||||
|
|
||||||
signal.signal(signal.SIGALRM, timeout_handler)
|
signal.signal(signal.SIGALRM, timeout_handler)
|
||||||
|
|
||||||
def hex_decoder(hexStr):
|
|
||||||
|
class Decoder(AbstractModule):
|
||||||
|
"""
|
||||||
|
Decoder module for AIL framework
|
||||||
|
"""
|
||||||
|
|
||||||
|
# TODO to lambda expr
|
||||||
|
def hex_decoder(self, hexStr):
|
||||||
#hexStr = ''.join( hex_string.split(" ") )
|
#hexStr = ''.join( hex_string.split(" ") )
|
||||||
return bytes(bytearray([int(hexStr[i:i+2], 16) for i in range(0, len(hexStr), 2)]))
|
return bytes(bytearray([int(hexStr[i:i+2], 16) for i in range(0, len(hexStr), 2)]))
|
||||||
|
|
||||||
def binary_decoder(binary_string):
|
|
||||||
|
# TODO to lambda expr
|
||||||
|
def binary_decoder(self, binary_string):
|
||||||
return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)]))
|
return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)]))
|
||||||
|
|
||||||
def base64_decoder(base64_string):
|
|
||||||
|
# TODO to lambda expr
|
||||||
|
def base64_decoder(self, base64_string):
|
||||||
return base64.b64decode(base64_string)
|
return base64.b64decode(base64_string)
|
||||||
|
|
||||||
def decode_string(content, item_id, item_date, encoded_list, decoder_name, encoded_min_size):
|
|
||||||
|
def __init__(self):
|
||||||
|
super(Decoder, self).__init__(logger_channel="script:decoder")
|
||||||
|
|
||||||
|
serv_metadata = redis.StrictRedis(
|
||||||
|
host=self.process.config.get("ARDB_Metadata", "host"),
|
||||||
|
port=self.process.config.getint("ARDB_Metadata", "port"),
|
||||||
|
db=self.process.config.getint("ARDB_Metadata", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
regex_binary = '[0-1]{40,}'
|
||||||
|
#regex_hex = '(0[xX])?[A-Fa-f0-9]{40,}'
|
||||||
|
regex_hex = '[A-Fa-f0-9]{40,}'
|
||||||
|
regex_base64 = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)'
|
||||||
|
|
||||||
|
cmp_regex_binary = re.compile(regex_binary)
|
||||||
|
cmp_regex_hex = re.compile(regex_hex)
|
||||||
|
cmp_regex_base64 = re.compile(regex_base64)
|
||||||
|
|
||||||
|
# map decoder function
|
||||||
|
self.decoder_function = {'binary':self.binary_decoder,'hexadecimal':self.hex_decoder, 'base64':self.base64_decoder}
|
||||||
|
|
||||||
|
hex_max_execution_time = self.process.config.getint("Hex", "max_execution_time")
|
||||||
|
binary_max_execution_time = self.process.config.getint("Binary", "max_execution_time")
|
||||||
|
base64_max_execution_time = self.process.config.getint("Base64", "max_execution_time")
|
||||||
|
|
||||||
|
# list all decoder with regex,
|
||||||
|
decoder_binary = {'name': 'binary', 'regex': cmp_regex_binary, 'encoded_min_size': 300, 'max_execution_time': binary_max_execution_time}
|
||||||
|
decoder_hexadecimal = {'name': 'hexadecimal', 'regex': cmp_regex_hex, 'encoded_min_size': 300, 'max_execution_time': hex_max_execution_time}
|
||||||
|
decoder_base64 = {'name': 'base64', 'regex': cmp_regex_base64, 'encoded_min_size': 40, 'max_execution_time': base64_max_execution_time}
|
||||||
|
|
||||||
|
self.decoder_order = [ decoder_base64, decoder_binary, decoder_hexadecimal, decoder_base64]
|
||||||
|
|
||||||
|
for decoder in self.decoder_order:
|
||||||
|
serv_metadata.sadd('all_decoder', decoder['name'])
|
||||||
|
|
||||||
|
# Waiting time in secondes between to message proccessed
|
||||||
|
self.pending_seconds = 1
|
||||||
|
|
||||||
|
# Send module state to logs
|
||||||
|
self.redis_logger.info(f'Module {self.module_name} initialized')
|
||||||
|
|
||||||
|
|
||||||
|
def compute(self, message):
|
||||||
|
|
||||||
|
obj_id = Item.get_item_id(message)
|
||||||
|
|
||||||
|
# Extract info from message
|
||||||
|
content = Item.get_item_content(obj_id)
|
||||||
|
date = Item.get_item_date(obj_id)
|
||||||
|
|
||||||
|
for decoder in self.decoder_order: # add threshold and size limit
|
||||||
|
# max execution time on regex
|
||||||
|
signal.alarm(decoder['max_execution_time'])
|
||||||
|
|
||||||
|
try:
|
||||||
|
encoded_list = decoder['regex'].findall(content)
|
||||||
|
except TimeoutException:
|
||||||
|
encoded_list = []
|
||||||
|
self.process.incr_module_timeout_statistic() # add encoder type
|
||||||
|
self.redis_logger.debug(f"{obj_id} processing timeout")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
signal.alarm(0)
|
||||||
|
|
||||||
|
if(len(encoded_list) > 0):
|
||||||
|
content = self.decode_string(content, message, date, encoded_list, decoder['name'], decoder['encoded_min_size'])
|
||||||
|
|
||||||
|
|
||||||
|
def decode_string(self, content, item_id, item_date, encoded_list, decoder_name, encoded_min_size):
|
||||||
find = False
|
find = False
|
||||||
for encoded in encoded_list:
|
for encoded in encoded_list:
|
||||||
if len(encoded) >= encoded_min_size:
|
if len(encoded) >= encoded_min_size:
|
||||||
decoded_file = decoder_function[decoder_name](encoded)
|
decoded_file = self.decoder_function[decoder_name](encoded)
|
||||||
find = True
|
find = True
|
||||||
|
|
||||||
sha1_string = sha1(decoded_file).hexdigest()
|
sha1_string = sha1(decoded_file).hexdigest()
|
||||||
mimetype = Decoded.get_file_mimetype(decoded_file)
|
mimetype = Decoded.get_file_mimetype(decoded_file)
|
||||||
if not mimetype:
|
if not mimetype:
|
||||||
print(item_id)
|
self.redis_logger.debug(item_id)
|
||||||
print(sha1_string)
|
self.redis_logger.debug(sha1_string)
|
||||||
raise Exception('Invalid mimetype')
|
raise Exception('Invalid mimetype')
|
||||||
Decoded.save_decoded_file_content(sha1_string, decoded_file, item_date, mimetype=mimetype)
|
Decoded.save_decoded_file_content(sha1_string, decoded_file, item_date, mimetype=mimetype)
|
||||||
Decoded.save_item_relationship(sha1_string, item_id)
|
Decoded.save_item_relationship(sha1_string, item_id)
|
||||||
|
@ -62,98 +149,25 @@ def decode_string(content, item_id, item_date, encoded_list, decoder_name, encod
|
||||||
#remove encoded from item content
|
#remove encoded from item content
|
||||||
content = content.replace(encoded, '', 1)
|
content = content.replace(encoded, '', 1)
|
||||||
|
|
||||||
print('{} : {} - {}'.format(item_id, decoder_name, mimetype))
|
self.redis_logger.debug(f'{item_id} : {decoder_name} - {mimetype}')
|
||||||
if(find):
|
if(find):
|
||||||
set_out_item(decoder_name, item_id)
|
self.set_out_item(decoder_name, item_id)
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
def set_out_item(decoder_name, item_id):
|
|
||||||
publisher.warning(decoder_name+' decoded')
|
def set_out_item(self, decoder_name, item_id):
|
||||||
|
|
||||||
|
self.redis_logger.warning(f'{decoder_name} decoded')
|
||||||
|
|
||||||
# Send to duplicate
|
# Send to duplicate
|
||||||
p.populate_set_out(item_id, 'Duplicate')
|
self.process.populate_set_out(item_id, 'Duplicate')
|
||||||
|
|
||||||
msg = 'infoleak:automatic-detection="'+decoder_name+'";{}'.format(item_id)
|
|
||||||
p.populate_set_out(msg, 'Tags')
|
|
||||||
|
|
||||||
|
# Send to Tags
|
||||||
|
msg = f'infoleak:automatic-detection="{decoder_name}";{item_id}'
|
||||||
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
|
||||||
# Port of the redis instance used by pubsublogger
|
|
||||||
publisher.port = 6380
|
|
||||||
# Script is the default channel used for the modules.
|
|
||||||
publisher.channel = 'Script'
|
|
||||||
|
|
||||||
# Section name in bin/packages/modules.cfg
|
module = Decoder()
|
||||||
config_section = 'Decoder'
|
module.run()
|
||||||
|
|
||||||
# Setup the I/O queues
|
|
||||||
p = Process(config_section)
|
|
||||||
|
|
||||||
serv_metadata = redis.StrictRedis(
|
|
||||||
host=p.config.get("ARDB_Metadata", "host"),
|
|
||||||
port=p.config.getint("ARDB_Metadata", "port"),
|
|
||||||
db=p.config.getint("ARDB_Metadata", "db"),
|
|
||||||
decode_responses=True)
|
|
||||||
|
|
||||||
# Sent to the logging a description of the module
|
|
||||||
publisher.info("Decoder started")
|
|
||||||
|
|
||||||
regex_binary = '[0-1]{40,}'
|
|
||||||
#regex_hex = '(0[xX])?[A-Fa-f0-9]{40,}'
|
|
||||||
regex_hex = '[A-Fa-f0-9]{40,}'
|
|
||||||
regex_base64 = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)'
|
|
||||||
|
|
||||||
re.compile(regex_binary)
|
|
||||||
re.compile(regex_hex)
|
|
||||||
re.compile(regex_base64)
|
|
||||||
|
|
||||||
# map decoder function
|
|
||||||
decoder_function = {'binary':binary_decoder,'hexadecimal':hex_decoder, 'base64':base64_decoder}
|
|
||||||
|
|
||||||
hex_max_execution_time = p.config.getint("Hex", "max_execution_time")
|
|
||||||
binary_max_execution_time = p.config.getint("Binary", "max_execution_time")
|
|
||||||
base64_max_execution_time = p.config.getint("Base64", "max_execution_time")
|
|
||||||
|
|
||||||
# list all decoder yith regex,
|
|
||||||
decoder_binary = {'name': 'binary', 'regex': regex_binary, 'encoded_min_size': 300, 'max_execution_time': binary_max_execution_time}
|
|
||||||
decoder_hexadecimal = {'name': 'hexadecimal', 'regex': regex_hex, 'encoded_min_size': 300, 'max_execution_time': hex_max_execution_time}
|
|
||||||
decoder_base64 = {'name': 'base64', 'regex': regex_base64, 'encoded_min_size': 40, 'max_execution_time': base64_max_execution_time}
|
|
||||||
|
|
||||||
decoder_order = [ decoder_base64, decoder_binary, decoder_hexadecimal, decoder_base64]
|
|
||||||
|
|
||||||
for decoder in decoder_order:
|
|
||||||
serv_metadata.sadd('all_decoder', decoder['name'])
|
|
||||||
|
|
||||||
# Endless loop getting messages from the input queue
|
|
||||||
while True:
|
|
||||||
# Get one message from the input queue
|
|
||||||
message = p.get_from_set()
|
|
||||||
if message is None:
|
|
||||||
|
|
||||||
publisher.debug("{} queue is empty, waiting".format(config_section))
|
|
||||||
time.sleep(1)
|
|
||||||
continue
|
|
||||||
|
|
||||||
obj_id = Item.get_item_id(message)
|
|
||||||
|
|
||||||
# Do something with the message from the queue
|
|
||||||
content = Item.get_item_content(obj_id)
|
|
||||||
date = Item.get_item_date(obj_id)
|
|
||||||
|
|
||||||
for decoder in decoder_order: # add threshold and size limit
|
|
||||||
|
|
||||||
# max execution time on regex
|
|
||||||
signal.alarm(decoder['max_execution_time'])
|
|
||||||
try:
|
|
||||||
encoded_list = re.findall(decoder['regex'], content)
|
|
||||||
except TimeoutException:
|
|
||||||
encoded_list = []
|
|
||||||
p.incr_module_timeout_statistic() # add encoder type
|
|
||||||
print ("{0} processing timeout".format(obj_id))
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
signal.alarm(0)
|
|
||||||
|
|
||||||
if(len(encoded_list) > 0):
|
|
||||||
content = decode_string(content, message, date, encoded_list, decoder['name'], decoder['encoded_min_size'])
|
|
||||||
|
|
Loading…
Reference in a new issue