diff --git a/HOWTO.md b/HOWTO.md
index d4a7b962..1a66402b 100644
--- a/HOWTO.md
+++ b/HOWTO.md
@@ -6,7 +6,7 @@ How to feed the AIL framework
For the moment, there are three different ways to feed AIL with data:
-1. Be a collaborator of CIRCL and ask to access our feed. It will be sent to the static IP your are using for AIL.
+1. Be a collaborator of CIRCL and ask to access our feed. It will be sent to the static IP you are using for AIL.
2. You can setup [pystemon](https://github.com/CIRCL/pystemon) and use the custom feeder provided by AIL (see below).
diff --git a/OVERVIEW.md b/OVERVIEW.md
index cf40bad9..effb387d 100644
--- a/OVERVIEW.md
+++ b/OVERVIEW.md
@@ -20,6 +20,7 @@ Redis and ARDB overview
- DB 0 - Lines duplicate
- DB 1 - Hashes
+
ARDB overview
---------------------------
ARDB_DB
@@ -31,3 +32,42 @@ ARDB_DB
* DB 6 - Tags
* DB 7 - Metadata
* DB 8 - Statistics
+
+* DB 7 - Metadata:
+ ----------------------------------------- BASE64 ----------------------------------------
+
+ HSET - 'metadata_hash:'+hash 'saved_path' saved_path
+ 'size' size
+ 'first_seen' first_seen
+ 'last_seen' last_seen
+ 'estimated_type' estimated_type
+ 'vt_link' vt_link
+ 'vt_report' vt_report
+ 'nb_seen_in_all_pastes' nb_seen_in_all_pastes
+ 'base64_decoder' nb_encoded
+ 'binary_decoder' nb_encoded
+
+ SET - 'all_decoder' decoder*
+
+ SET - 'hash_all_type' hash_type *
+ SET - 'hash_base64_all_type' hash_type *
+ SET - 'hash_binary_all_type' hash_type *
+
+ SET - 'hash_paste:'+paste hash *
+ SET - 'base64_paste:'+paste hash *
+ SET - 'binary_paste:'+paste hash *
+
+ ZADD - 'hash_date:'+20180622 hash * nb_seen_this_day
+ ZADD - 'base64_date:'+20180622 hash * nb_seen_this_day
+ ZADD - 'binary_date:'+20180622 hash * nb_seen_this_day
+
+ ZADD - 'nb_seen_hash:'+hash paste * nb_seen_in_paste
+ ZADD - 'base64_hash:'+hash paste * nb_seen_in_paste
+ ZADD - 'binary_hash:'+hash paste * nb_seen_in_paste
+
+ ZADD - 'hash_type:'+type date nb_seen
+ ZADD - 'base64_type:'+type date nb_seen
+ ZADD - 'binary_type:'+type date nb_seen
+
+ GET - 'base64_decoded:'+date nd_decoded
+ GET - 'binary_decoded:'+date nd_decoded
diff --git a/README.md b/README.md
index 0c500efd..83bb0b86 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,3 @@
-[![Build Status](https://travis-ci.org/CIRCL/AIL-framework.svg?branch=master)](https://travis-ci.org/CIRCL/AIL-framework)
-
AIL
===
@@ -11,6 +9,22 @@ AIL is a modular framework to analyse potential information leaks from unstructu
![Dashboard](./doc/screenshots/dashboard.png?raw=true "AIL framework dashboard")
+
+
+
Latest Release
+
+
+
+
Contributors
+
+
+
+
License
+
+
+
+
+
Features
--------
@@ -31,14 +45,17 @@ Features
* Terms, Set of terms and Regex tracking and occurrence
* Many more modules for extracting phone numbers, credentials and others
* Alerting to [MISP](https://github.com/MISP/MISP) to share found leaks within a threat intelligence platform using [MISP standard](https://www.misp-project.org/objects.html#_ail_leak)
-* Detect and decode Base64 and store files
+* Detect and decode encoded file (Base64, hex encoded or your own decoding scheme) and store files
* Detect Amazon AWS and Google API keys
* Detect Bitcoin address and Bitcoin private keys
-* Detect private keys and certificate
+* Detect private keys, certificate, keys (including SSH, OpenVPN)
+* Detect IBAN bank accounts
* Tagging system with [MISP Galaxy](https://github.com/MISP/misp-galaxy) and [MISP Taxonomies](https://github.com/MISP/misp-taxonomies) tags
* UI paste submission
* Create events on [MISP](https://github.com/MISP/MISP) and cases on [The Hive](https://github.com/TheHive-Project/TheHive)
* Automatic paste export at detection on [MISP](https://github.com/MISP/MISP) (events) and [The Hive](https://github.com/TheHive-Project/TheHive) (alerts) on selected tags
+* Extracted and decoded files can be searched by date range, type of file (mime-type) and encoding discovered
+* Graph relationships between decoded file (hashes)
Installation
------------
@@ -152,6 +169,12 @@ Trending charts
![Trending-Web](./doc/screenshots/trending-web.png?raw=true "AIL framework webtrending")
![Trending-Modules](./doc/screenshots/trending-module.png?raw=true "AIL framework modulestrending")
+Extracted encoded files from pastes
+-----------------------------------
+
+![Extracted files from pastes](./doc/screenshots/ail-hashedfiles.png?raw=true "AIL extracted decoded files statistics")
+![Relationships between extracted files from encoded file in unstructured data](./doc/screenshots/hashedfile-graph.png?raw=true "Relationships between extracted files from encoded file in unstructured data")
+
Browsing
--------
diff --git a/bin/ApiKey.py b/bin/ApiKey.py
index e7ded9b2..faf4b2d9 100755
--- a/bin/ApiKey.py
+++ b/bin/ApiKey.py
@@ -86,8 +86,7 @@ if __name__ == "__main__":
if message is not None:
- search_api_key(message)
-
+ search_api_key(message)
else:
publisher.debug("Script ApiKey is Idling 10s")
diff --git a/bin/Base64.py b/bin/Base64.py
index 960ca6de..e8b3fbc5 100755
--- a/bin/Base64.py
+++ b/bin/Base64.py
@@ -8,6 +8,7 @@
import time
import os
import datetime
+import redis
from pubsublogger import publisher
@@ -31,7 +32,7 @@ def timeout_handler(signum, frame):
signal.signal(signal.SIGALRM, timeout_handler)
-def search_base64(content, message):
+def search_base64(content, message, date):
find = False
base64_list = re.findall(regex_base64, content)
if(len(base64_list) > 0):
@@ -39,6 +40,7 @@ def search_base64(content, message):
for b64 in base64_list:
if len(b64) >= 40 :
decode = base64.b64decode(b64)
+ print(decode)
type = magic.from_buffer(decode, mime=True)
#print(type)
@@ -46,6 +48,8 @@ def search_base64(content, message):
find = True
hash = sha1(decode).hexdigest()
+ print(message)
+ print(hash)
data = {}
data['name'] = hash
@@ -54,8 +58,37 @@ def search_base64(content, message):
data['estimated type'] = type
json_data = json.dumps(data)
- save_base64_as_file(decode, type, hash, json_data)
- print('found {} '.format(type))
+ date_paste = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8])
+ date_key = date[0:4] + date[4:6] + date[6:8]
+
+ serv_metadata.zincrby('base64_date:'+date_key, hash, 1)
+
+ # first time we see this hash
+ if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'):
+ serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_paste)
+ serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste)
+ else:
+ serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste)
+
+ # first time we see this file on this paste
+ if serv_metadata.zscore('base64_hash:'+hash, message) is None:
+ print('first')
+ serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1)
+
+ serv_metadata.sadd('base64_paste:'+message, hash) # paste - hash map
+ serv_metadata.zincrby('base64_hash:'+hash, message, 1)# hash - paste map
+
+ # create hash metadata
+ serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type)
+ serv_metadata.sadd('hash_all_type', type)
+ serv_metadata.sadd('hash_base64_all_type', type)
+ serv_metadata.zincrby('base64_type:'+type, date_key, 1)
+
+ save_base64_as_file(decode, type, hash, json_data, id)
+ print('found {} '.format(type))
+ # duplicate
+ else:
+ serv_metadata.zincrby('base64_hash:'+hash, message, 1) # number of b64 on this paste
if(find):
publisher.warning('base64 decoded')
@@ -68,10 +101,10 @@ def search_base64(content, message):
msg = 'infoleak:automatic-detection="base64";{}'.format(message)
p.populate_set_out(msg, 'Tags')
-def save_base64_as_file(decode, type, hash, json_data):
+def save_base64_as_file(decode, type, hash, json_data, id):
- filename_b64 = os.path.join(os.environ['AIL_HOME'],
- p.config.get("Directories", "base64"), type, hash[:2], hash)
+ local_filename_b64 = os.path.join(p.config.get("Directories", "base64"), type, hash[:2], hash)
+ filename_b64 = os.path.join(os.environ['AIL_HOME'], local_filename_b64)
filename_json = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "base64"), type, hash[:2], hash + '.json')
@@ -83,6 +116,10 @@ def save_base64_as_file(decode, type, hash, json_data):
with open(filename_b64, 'wb') as f:
f.write(decode)
+ # create hash metadata
+ serv_metadata.hset('metadata_hash:'+hash, 'saved_path', local_filename_b64)
+ serv_metadata.hset('metadata_hash:'+hash, 'size', os.path.getsize(filename_b64))
+
with open(filename_json, 'w') as f:
f.write(json_data)
@@ -103,6 +140,12 @@ if __name__ == '__main__':
p = Process(config_section)
max_execution_time = p.config.getint("Base64", "max_execution_time")
+ serv_metadata = redis.StrictRedis(
+ host=p.config.get("ARDB_Metadata", "host"),
+ port=p.config.getint("ARDB_Metadata", "port"),
+ db=p.config.getint("ARDB_Metadata", "db"),
+ decode_responses=True)
+
# Sent to the logging a description of the module
publisher.info("Base64 started")
@@ -127,13 +170,12 @@ if __name__ == '__main__':
# Do something with the message from the queue
#print(filename)
content = paste.get_p_content()
- search_base64(content,message)
-
- # (Optional) Send that thing to the next queue
- #p.populate_set_out(something_has_been_done)
+ date = str(paste._get_p_date())
+ search_base64(content,message, date)
except TimeoutException:
- print ("{0} processing timeout".format(paste.p_path))
- continue
+ p.incr_module_timeout_statistic()
+ print ("{0} processing timeout".format(paste.p_path))
+ continue
else:
signal.alarm(0)
diff --git a/bin/Binary.py b/bin/Binary.py
new file mode 100755
index 00000000..29d6f2c5
--- /dev/null
+++ b/bin/Binary.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+"""
+ Binary module
+
+ Dectect Binary and decode it
+"""
+import time
+import os
+import datetime
+import redis
+
+from pubsublogger import publisher
+
+from Helper import Process
+from packages import Paste
+
+import re
+from hashlib import sha1
+import magic
+import json
+
+import signal
+
+class TimeoutException(Exception):
+ pass
+
+def timeout_handler(signum, frame):
+ raise TimeoutException
+
+signal.signal(signal.SIGALRM, timeout_handler)
+
+def decode_binary_string(binary_string):
+ return ''.join(chr(int(s[i*8:i*8+8],2)) for i in range(len(s)//8))
+
+def decode_binary(binary_string):
+ return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)]))
+
+
+def search_binary(content, message, date):
+ find = False
+ binary_list = re.findall(regex_binary, content)
+ if(len(binary_list) > 0):
+
+ for binary in binary_list:
+ if len(binary) >= 40 :
+ decode = decode_binary(binary)
+ print(message)
+
+ type = magic.from_buffer(decode, mime=True)
+ print(type)
+
+ find = True
+ hash = sha1(decode).hexdigest()
+ print(hash)
+
+ data = {}
+ data['name'] = hash
+ data['date'] = datetime.datetime.now().strftime("%d/%m/%y")
+ data['origin'] = message
+ data['estimated type'] = type
+ json_data = json.dumps(data)
+
+ date_paste = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8])
+ date_key = date[0:4] + date[4:6] + date[6:8]
+
+ serv_metadata.zincrby('binary_date:'+date_key, hash, 1)
+
+ # first time we see this hash
+ if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'):
+ serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_paste)
+ serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste)
+ else:
+ serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste)
+
+ # first time we see this file encoding on this paste
+ if serv_metadata.zscore('binary_hash:'+hash, message) is None:
+ print('first binary')
+ serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1)
+
+ serv_metadata.sadd('binary_paste:'+message, hash) # paste - hash map
+ serv_metadata.zincrby('binary_hash:'+hash, message, 1)# hash - paste map
+
+ # create hash metadata
+ serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type)
+ serv_metadata.sadd('hash_all_type', type)
+ serv_metadata.sadd('hash_binary_all_type', type)
+ serv_metadata.zincrby('binary_type:'+type, date_key, 1)
+
+ save_binary_as_file(decode, type, hash, json_data, id)
+ print('found {} '.format(type))
+ # duplicate
+ else:
+ serv_metadata.zincrby('binary_hash:'+hash, message, 1) # number of b64 on this paste
+
+ if(find):
+ publisher.warning('binary decoded')
+ #Send to duplicate
+ p.populate_set_out(message, 'Duplicate')
+ #send to Browse_warning_paste
+ msg = ('binary;{}'.format(message))
+ p.populate_set_out( msg, 'alertHandler')
+
+ msg = 'infoleak:automatic-detection="binary";{}'.format(message)
+ p.populate_set_out(msg, 'Tags')
+
+def save_binary_as_file(decode, type, hash, json_data, id):
+
+ local_filename_b64 = os.path.join(p.config.get("Directories", "base64"), type, hash[:2], hash)
+ filename_b64 = os.path.join(os.environ['AIL_HOME'], local_filename_b64)
+
+ filename_json = os.path.join(os.environ['AIL_HOME'],
+ p.config.get("Directories", "base64"), type, hash[:2], hash + '.json')
+
+ dirname = os.path.dirname(filename_b64)
+ if not os.path.exists(dirname):
+ os.makedirs(dirname)
+
+ with open(filename_b64, 'wb') as f:
+ f.write(decode)
+
+ # create hash metadata
+ serv_metadata.hset('metadata_hash:'+hash, 'saved_path', local_filename_b64)
+ serv_metadata.hset('metadata_hash:'+hash, 'size', os.path.getsize(filename_b64))
+
+ with open(filename_json, 'w') as f:
+ f.write(json_data)
+
+
+
+
+if __name__ == '__main__':
+ # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
+ # Port of the redis instance used by pubsublogger
+ publisher.port = 6380
+ # Script is the default channel used for the modules.
+ publisher.channel = 'Script'
+
+ # Section name in bin/packages/modules.cfg
+ config_section = 'Binary'
+
+ # Setup the I/O queues
+ p = Process(config_section)
+ max_execution_time = p.config.getint("Binary", "max_execution_time")
+
+ serv_metadata = redis.StrictRedis(
+ host=p.config.get("ARDB_Metadata", "host"),
+ port=p.config.getint("ARDB_Metadata", "port"),
+ db=p.config.getint("ARDB_Metadata", "db"),
+ decode_responses=True)
+
+ # Sent to the logging a description of the module
+ publisher.info("Binary started")
+
+ regex_binary = '[0-1]{40,}'
+ re.compile(regex_binary)
+
+ # Endless loop getting messages from the input queue
+ while True:
+ # Get one message from the input queue
+ message = p.get_from_set()
+ if message is None:
+
+ publisher.debug("{} queue is empty, waiting".format(config_section))
+ time.sleep(1)
+ continue
+
+ filename = message
+ paste = Paste.Paste(filename)
+
+ signal.alarm(max_execution_time)
+ try:
+ # Do something with the message from the queue
+ #print(filename)
+ content = paste.get_p_content()
+ date = str(paste._get_p_date())
+ search_binary(content,message, date)
+
+ except TimeoutException:
+ p.incr_module_timeout_statistic()
+ print ("{0} processing timeout".format(paste.p_path))
+ continue
+ else:
+ signal.alarm(0)
diff --git a/bin/Decoder.py b/bin/Decoder.py
new file mode 100755
index 00000000..af385fed
--- /dev/null
+++ b/bin/Decoder.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+"""
+ Decoder module
+
+ Dectect Binary and decode it
+"""
+import time
+import os
+import redis
+import base64
+from hashlib import sha1
+import magic
+import json
+import datetime
+
+from pubsublogger import publisher
+
+from Helper import Process
+from packages import Paste
+
+import re
+import signal
+
+class TimeoutException(Exception):
+ pass
+
+def timeout_handler(signum, frame):
+ raise TimeoutException
+
+signal.signal(signal.SIGALRM, timeout_handler)
+
+def hex_decoder(hexStr):
+ #hexStr = ''.join( hex_string.split(" ") )
+ return bytes(bytearray([int(hexStr[i:i+2], 16) for i in range(0, len(hexStr), 2)]))
+
+def binary_decoder(binary_string):
+ return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)]))
+
+def base64_decoder(base64_string):
+ return base64.b64decode(base64_string)
+
+def decode_string(content, message, date, encoded_list, decoder_name, encoded_min_size):
+ find = False
+ for encoded in encoded_list:
+ if len(encoded) >= encoded_min_size:
+ decode = decoder_function[decoder_name](encoded)
+ find = True
+
+ save_hash(decoder_name, message, date, decode)
+
+ #remove encoded from paste content
+ content = content.replace(encoded, '', 1)
+
+ if(find):
+ set_out_paste(decoder_name, message)
+
+ return content
+
+# # TODO: FIXME check db
+def save_hash(decoder_name, message, date, decoded):
+ print(decoder_name)
+ type = magic.from_buffer(decoded, mime=True)
+ hash = sha1(decoded).hexdigest()
+ print(hash)
+
+ data = {}
+ data['name'] = hash
+ data['date'] = datetime.datetime.now().strftime("%d/%m/%y")
+ data['origin'] = message
+ data['estimated type'] = type
+ json_data = json.dumps(data)
+
+ date_paste = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8])
+ date_key = date[0:4] + date[4:6] + date[6:8]
+
+ serv_metadata.incrby(decoder_name+'_decoded:'+date_key, 1)
+ serv_metadata.zincrby('hash_date:'+date_key, hash, 1)
+ serv_metadata.zincrby(decoder_name+'_date:'+date_key, hash, 1)
+
+ # first time we see this hash
+ if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'):
+ serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_paste)
+ serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste)
+ else:
+ serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste)
+
+ # first time we see this hash (all encoding) on this paste
+ if serv_metadata.zscore('nb_seen_hash:'+hash, message) is None:
+ serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1)
+ serv_metadata.sadd('hash_paste:'+message, hash) # paste - hash map
+ # create hash metadata
+ serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type)
+ serv_metadata.sadd('hash_all_type', type)
+
+ # first time we see this hash encoding on this paste
+ if serv_metadata.zscore(decoder_name+'_hash:'+hash, message) is None:
+ print('first '+decoder_name)
+
+ serv_metadata.sadd(decoder_name+'_paste:'+message, hash) # paste - hash map
+
+ # create hash metadata
+ serv_metadata.sadd('hash_'+ decoder_name +'_all_type', type)
+
+ # first time we see this hash today
+ if serv_metadata.zscore('hash_date:'+date_key, hash) is None:
+ serv_metadata.zincrby('hash_type:'+type, date_key, 1)
+
+ # first time we see this hash encoding today
+ if serv_metadata.zscore(decoder_name+'_date:'+date_key, hash) is None:
+ serv_metadata.zincrby(decoder_name+'_type:'+type, date_key, 1)
+
+ save_hash_on_disk(decoded, type, hash, json_data)
+ print('found {} '.format(type))
+
+ serv_metadata.hincrby('metadata_hash:'+hash, decoder_name+'_decoder', 1)
+
+ serv_metadata.zincrby(decoder_name+'_type:'+type, date_key, 1)
+
+ serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map
+ serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste
+
+
+def save_hash_on_disk(decode, type, hash, json_data):
+
+ local_filename_hash = os.path.join(p.config.get("Directories", "hash"), type, hash[:2], hash)
+ filename_hash = os.path.join(os.environ['AIL_HOME'], local_filename_hash)
+
+ filename_json = os.path.join(os.environ['AIL_HOME'],
+ p.config.get("Directories", "hash"), type, hash[:2], hash + '.json')
+
+ dirname = os.path.dirname(filename_hash)
+ if not os.path.exists(dirname):
+ os.makedirs(dirname)
+
+ with open(filename_hash, 'wb') as f:
+ f.write(decode)
+
+ # create hash metadata
+ serv_metadata.hset('metadata_hash:'+hash, 'saved_path', local_filename_hash)
+ serv_metadata.hset('metadata_hash:'+hash, 'size', os.path.getsize(filename_hash))
+
+ with open(filename_json, 'w') as f:
+ f.write(json_data)
+
+def set_out_paste(decoder_name, message):
+ publisher.warning(decoder_name+' decoded')
+ #Send to duplicate
+ p.populate_set_out(message, 'Duplicate')
+ #send to Browse_warning_paste
+ msg = (decoder_name+';{}'.format(message))
+ p.populate_set_out( msg, 'alertHandler')
+
+ msg = 'infoleak:automatic-detection="'+decoder_name+'";{}'.format(message)
+ p.populate_set_out(msg, 'Tags')
+
+
+if __name__ == '__main__':
+ # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
+ # Port of the redis instance used by pubsublogger
+ publisher.port = 6380
+ # Script is the default channel used for the modules.
+ publisher.channel = 'Script'
+
+ # Section name in bin/packages/modules.cfg
+ config_section = 'Decoder'
+
+ # Setup the I/O queues
+ p = Process(config_section)
+
+ serv_metadata = redis.StrictRedis(
+ host=p.config.get("ARDB_Metadata", "host"),
+ port=p.config.getint("ARDB_Metadata", "port"),
+ db=p.config.getint("ARDB_Metadata", "db"),
+ decode_responses=True)
+
+ # Sent to the logging a description of the module
+ publisher.info("Decoder started")
+
+ regex_binary = '[0-1]{40,}'
+ #regex_hex = '(0[xX])?[A-Fa-f0-9]{40,}'
+ regex_hex = '[A-Fa-f0-9]{40,}'
+ regex_base64 = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)'
+
+ re.compile(regex_binary)
+ re.compile(regex_hex)
+ re.compile(regex_base64)
+
+ # map decoder function
+ decoder_function = {'binary':binary_decoder,'hexadecimal':hex_decoder, 'base64':base64_decoder}
+
+ hex_max_execution_time = p.config.getint("Hex", "max_execution_time")
+ binary_max_execution_time = p.config.getint("Binary", "max_execution_time")
+ base64_max_execution_time = p.config.getint("Base64", "max_execution_time")
+
+ # list all decoder yith regex,
+ decoder_binary = {'name': 'binary', 'regex': regex_binary, 'encoded_min_size': 300, 'max_execution_time': binary_max_execution_time}
+ decoder_hexadecimal = {'name': 'hexadecimal', 'regex': regex_hex, 'encoded_min_size': 300, 'max_execution_time': hex_max_execution_time}
+ decoder_base64 = {'name': 'base64', 'regex': regex_base64, 'encoded_min_size': 40, 'max_execution_time': base64_max_execution_time}
+
+ decoder_order = [ decoder_base64, decoder_binary, decoder_hexadecimal, decoder_base64]
+
+ for decoder in decoder_order:
+ serv_metadata.sadd('all_decoder', decoder['name'])
+
+ # Endless loop getting messages from the input queue
+ while True:
+ # Get one message from the input queue
+ message = p.get_from_set()
+ if message is None:
+
+ publisher.debug("{} queue is empty, waiting".format(config_section))
+ time.sleep(1)
+ continue
+
+ filename = message
+ paste = Paste.Paste(filename)
+
+ # Do something with the message from the queue
+ content = paste.get_p_content()
+ date = str(paste._get_p_date())
+
+ for decoder in decoder_order: # add threshold and size limit
+
+ # max execution time on regex
+ signal.alarm(decoder['max_execution_time'])
+ try:
+ encoded_list = re.findall(decoder['regex'], content)
+ except TimeoutException:
+ encoded_list = []
+ p.incr_module_timeout_statistic() # add encoder type
+ print ("{0} processing timeout".format(paste.p_path))
+ continue
+ else:
+ signal.alarm(0)
+
+ if(len(encoded_list) > 0):
+ content = decode_string(content, message, date, encoded_list, decoder['name'], decoder['encoded_min_size'])
diff --git a/bin/Helper.py b/bin/Helper.py
index 0bb4b410..52097ef6 100755
--- a/bin/Helper.py
+++ b/bin/Helper.py
@@ -136,6 +136,12 @@ class Process(object):
db=self.config.get('RedisPubSub', 'db'),
decode_responses=True)
+ self.serv_statistics = redis.StrictRedis(
+ host=self.config.get('ARDB_Statistics', 'host'),
+ port=self.config.get('ARDB_Statistics', 'port'),
+ db=self.config.get('ARDB_Statistics', 'db'),
+ decode_responses=True)
+
self.moduleNum = os.getpid()
def populate_set_in(self):
@@ -165,36 +171,39 @@ class Process(object):
return None
else:
- #try:
- if '.gz' in message:
- path = message.split(".")[-2].split("/")[-1]
- #find start of path with AIL_HOME
- index_s = message.find(os.environ['AIL_HOME'])
- #Stop when .gz
- index_e = message.find(".gz")+3
- if(index_s == -1):
- complete_path = message[0:index_e]
+ try:
+ if '.gz' in message:
+ path = message.split(".")[-2].split("/")[-1]
+ #find start of path with AIL_HOME
+ index_s = message.find(os.environ['AIL_HOME'])
+ #Stop when .gz
+ index_e = message.find(".gz")+3
+ if(index_s == -1):
+ complete_path = message[0:index_e]
+ else:
+ complete_path = message[index_s:index_e]
+
else:
- complete_path = message[index_s:index_e]
+ path = "-"
+ complete_path = "?"
- else:
- path = "-"
- complete_path = "?"
+ value = str(timestamp) + ", " + path
+ self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value)
+ self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", complete_path)
+ self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum))
- value = str(timestamp) + ", " + path
- self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value)
- self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", complete_path)
- self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum))
- return message
+ curr_date = datetime.date.today()
+ self.serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'paste_by_modules_in:'+self.subscriber_name, 1)
+ return message
- #except:
- #print('except')
- #path = "?"
- #value = str(timestamp) + ", " + path
- #self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value)
- #self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", "?")
- #self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum))
- #return message
+ except:
+ print('except')
+ path = "?"
+ value = str(timestamp) + ", " + path
+ self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value)
+ self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", "?")
+ self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum))
+ return message
def populate_set_out(self, msg, channel=None):
# multiproc
@@ -221,3 +230,7 @@ class Process(object):
time.sleep(1)
continue
self.pubsub.publish(message)
+
+ def incr_module_timeout_statistic(self):
+ curr_date = datetime.date.today()
+ self.serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'paste_by_modules_timeout:'+self.subscriber_name, 1)
diff --git a/bin/Keys.py b/bin/Keys.py
index 7b1ec7dc..d3c292ba 100755
--- a/bin/Keys.py
+++ b/bin/Keys.py
@@ -71,6 +71,14 @@ def search_key(paste):
p.populate_set_out(msg, 'Tags')
find = True
+ if '---- BEGIN SSH2 ENCRYPTED PRIVATE KEY ----' in content:
+ publisher.warning('{} has an ssh2 private key message'.format(paste.p_name))
+ print('SSH2 private key message found')
+
+ msg = 'infoleak:automatic-detection="private-ssh-key";{}'.format(message)
+ p.populate_set_out(msg, 'Tags')
+ find = True
+
if '-----BEGIN OpenVPN Static key V1-----' in content:
publisher.warning('{} has an openssh private key message'.format(paste.p_name))
print('OpenVPN Static key message found')
diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh
index 998a676a..c3bfd8cf 100755
--- a/bin/LAUNCH.sh
+++ b/bin/LAUNCH.sh
@@ -168,7 +168,7 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "Keys" bash -c 'cd '${AIL_BIN}'; ./Keys.py; read x'
sleep 0.1
- screen -S "Script_AIL" -X screen -t "Base64" bash -c 'cd '${AIL_BIN}'; ./Base64.py; read x'
+ screen -S "Script_AIL" -X screen -t "Decoder" bash -c 'cd '${AIL_BIN}'; ./Decoder.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "Bitcoin" bash -c 'cd '${AIL_BIN}'; ./Bitcoin.py; read x'
sleep 0.1
diff --git a/bin/MISP_The_Hive_feeder.py b/bin/MISP_The_Hive_feeder.py
index d9daa299..7fa6b223 100755
--- a/bin/MISP_The_Hive_feeder.py
+++ b/bin/MISP_The_Hive_feeder.py
@@ -150,16 +150,18 @@ if __name__ == "__main__":
if flag_the_hive:
try:
HiveApi = TheHiveApi(the_hive_url, the_hive_key, cert = the_hive_verifycert)
- r_serv_db.set('ail:thehive', True)
except:
HiveApi = False
flag_the_hive = False
r_serv_db.set('ail:thehive', False)
print('Not connected to The HIVE')
+ else:
+ HiveApi = False
if HiveApi != False and flag_the_hive:
try:
HiveApi.get_alert(0)
+ r_serv_db.set('ail:thehive', True)
print('Connected to The HIVE:', the_hive_url)
except thehive4py.exceptions.AlertException:
HiveApi = False
diff --git a/bin/Mixer.py b/bin/Mixer.py
index 98709ea5..96f20815 100755
--- a/bin/Mixer.py
+++ b/bin/Mixer.py
@@ -68,6 +68,12 @@ if __name__ == '__main__':
db=cfg.getint("Redis_Mixer_Cache", "db"),
decode_responses=True)
+ server_cache = redis.StrictRedis(
+ host=cfg.get("Redis_Log_submit", "host"),
+ port=cfg.getint("Redis_Log_submit", "port"),
+ db=cfg.getint("Redis_Log_submit", "db"),
+ decode_responses=True)
+
# LOGGING #
publisher.info("Feed Script started to receive & publish.")
@@ -184,7 +190,17 @@ if __name__ == '__main__':
publisher.debug("Empty Paste: {0} not processed".format(message))
else:
print("Empty Queues: Waiting...")
+
if int(time.time() - time_1) > refresh_time:
+ # update internal feeder
+ list_feeder = server_cache.hkeys("mixer_cache:list_feeder")
+ if list_feeder:
+ for feeder in list_feeder:
+ count = int(server_cache.hget("mixer_cache:list_feeder", feeder))
+ if count is None:
+ count = 0
+ processed_paste_per_feeder[feeder] = processed_paste_per_feeder.get(feeder, 0) + count
+ processed_paste = processed_paste + count
print(processed_paste_per_feeder)
to_print = 'Mixer; ; ; ;mixer_all All_feeders Processed {0} paste(s) in {1}sec'.format(processed_paste, refresh_time)
print(to_print)
@@ -204,5 +220,8 @@ if __name__ == '__main__':
duplicated_paste_per_feeder[feeder] = 0
time_1 = time.time()
+
+ # delete internal feeder list
+ server_cache.delete("mixer_cache:list_feeder")
time.sleep(0.5)
continue
diff --git a/bin/Release.py b/bin/Release.py
index 6e7a8277..43c84b04 100755
--- a/bin/Release.py
+++ b/bin/Release.py
@@ -37,6 +37,7 @@ if __name__ == "__main__":
regex = '|'.join(regexs)
while True:
+ signal.alarm(max_execution_time)
filepath = p.get_from_set()
if filepath is None:
publisher.debug("Script Release is Idling 10s")
@@ -47,7 +48,7 @@ if __name__ == "__main__":
paste = Paste.Paste(filepath)
content = paste.get_p_content()
- signal.alarm(max_execution_time)
+ #signal.alarm(max_execution_time)
try:
releases = set(re.findall(regex, content))
if len(releases) == 0:
@@ -61,7 +62,8 @@ if __name__ == "__main__":
publisher.info(to_print)
except TimeoutException:
- print ("{0} processing timeout".format(paste.p_path))
- continue
+ p.incr_module_timeout_statistic()
+ print ("{0} processing timeout".format(paste.p_path))
+ continue
else:
signal.alarm(0)
diff --git a/bin/SentimentAnalysis.py b/bin/SentimentAnalysis.py
index 34beea3f..8442befa 100755
--- a/bin/SentimentAnalysis.py
+++ b/bin/SentimentAnalysis.py
@@ -167,6 +167,7 @@ if __name__ == '__main__':
try:
Analyse(message, server)
except TimeoutException:
+ p.incr_module_timeout_statistic()
print ("{0} processing timeout".format(message))
continue
else:
diff --git a/bin/Tags.py b/bin/Tags.py
index 15f8f837..0a178fef 100755
--- a/bin/Tags.py
+++ b/bin/Tags.py
@@ -11,6 +11,7 @@ This module create tags.
import redis
import time
+import datetime
from pubsublogger import publisher
from Helper import Process
@@ -41,6 +42,12 @@ if __name__ == '__main__':
db=p.config.get("ARDB_Metadata", "db"),
decode_responses=True)
+ serv_statistics = redis.StrictRedis(
+ host=p.config.get('ARDB_Statistics', 'host'),
+ port=p.config.get('ARDB_Statistics', 'port'),
+ db=p.config.get('ARDB_Statistics', 'db'),
+ decode_responses=True)
+
# Sent to the logging a description of the module
publisher.info("Tags module started")
@@ -67,4 +74,6 @@ if __name__ == '__main__':
print(" tagged: {}".format(tag))
server_metadata.sadd('tag:'+path, tag)
+ curr_date = datetime.date.today()
+ serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'paste_tagged:'+tag, 1)
p.populate_set_out(message, 'MISP_The_Hive_feeder')
diff --git a/bin/Tokenize.py b/bin/Tokenize.py
index fdefeb6a..698b4fbc 100755
--- a/bin/Tokenize.py
+++ b/bin/Tokenize.py
@@ -60,8 +60,9 @@ if __name__ == "__main__":
msg = '{} {} {}'.format(paste.p_path, word, score)
p.populate_set_out(msg)
except TimeoutException:
- print ("{0} processing timeout".format(paste.p_path))
- continue
+ p.incr_module_timeout_statistic()
+ print ("{0} processing timeout".format(paste.p_path))
+ continue
else:
signal.alarm(0)
else:
diff --git a/bin/Web.py b/bin/Web.py
index 45e5bfbe..3d53e306 100755
--- a/bin/Web.py
+++ b/bin/Web.py
@@ -124,6 +124,7 @@ if __name__ == "__main__":
except ipaddress.AddressValueError:
continue
cc = getattr(l, 'cc')
+ asn = ''
if getattr(l, 'asn') is not None:
asn = getattr(l, 'asn')[2:] #remobe b'
diff --git a/bin/feeder/pystemon-feeder.py b/bin/feeder/pystemon-feeder.py
index 50ffaeba..a59a0a5b 100755
--- a/bin/feeder/pystemon-feeder.py
+++ b/bin/feeder/pystemon-feeder.py
@@ -62,12 +62,13 @@ while True:
print(paste)
if paste is None:
continue
- socket.send("%d %s" % (topic, paste))
+ socket.send_string("%d %s" % (topic, paste))
topic = 102
try:
- messagedata = open(pystemonpath+paste).read()
- socket.send("%d %s %s" % (topic, paste, base64.b64encode(messagedata)))
- sleep_inc = sleep_inc-0.01 if sleep_inc-0.01 > 0 else 0
+ with open(pystemonpath+paste, 'rb') as f: #.read()
+ messagedata = f.read()
+ socket.send_string("%d %s %s" % (topic, paste, base64.b64encode(messagedata).decode()))
+ sleep_inc = sleep_inc-0.01 if sleep_inc-0.01 > 0 else 0
except IOError as e:
# file not found, could be a buffering issue -> increase sleeping time
print('IOError: Increasing sleep time')
diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py
index d1e3f0d3..6942cb31 100755
--- a/bin/packages/Paste.py
+++ b/bin/packages/Paste.py
@@ -52,7 +52,7 @@ class Paste(object):
:Example:
- PST = Paste("/home/2013/ZEeGaez5.gz")
+ PST = Paste("/home/2013/01/12/ZEeGaez5.gz")
"""
diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample
index 2ed662c1..eead0357 100644
--- a/bin/packages/config.cfg.sample
+++ b/bin/packages/config.cfg.sample
@@ -2,6 +2,7 @@
bloomfilters = Blooms
dicofilters = Dicos
pastes = PASTES
+hash = HASHS
base64 = BASE64
wordtrending_csv = var/www/static/csv/wordstrendingdata
@@ -31,6 +32,8 @@ sender_port = 1337
##### Flask #####
[Flask]
+#Number of logs to display in the dashboard
+max_dashboard_logs = 15
#Maximum number of character to display in the toolip
max_preview_char = 250
#Maximum number of character to display in the modal
@@ -65,6 +68,14 @@ max_execution_time = 90
path = Base64/
max_execution_time = 60
+[Binary]
+path = Base64/
+max_execution_time = 60
+
+[Hex]
+path = Base64/
+max_execution_time = 60
+
[Modules_Duplicates]
#Number of month to look back
maximum_month_range = 3
diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg
index 452850f7..8cd8f570 100644
--- a/bin/packages/modules.cfg
+++ b/bin/packages/modules.cfg
@@ -125,7 +125,7 @@ publish = Redis_Duplicate,Redis_alertHandler,Redis_Tags
subscribe = Redis_ApiKey
publish = Redis_Duplicate,Redis_alertHandler,Redis_Tags
-[Base64]
+[Decoder]
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_alertHandler,Redis_Tags
diff --git a/bin/submit_paste.py b/bin/submit_paste.py
index 5f18fd6a..a999ec39 100755
--- a/bin/submit_paste.py
+++ b/bin/submit_paste.py
@@ -40,6 +40,9 @@ def create_paste(uuid, paste_content, ltags, ltagsgalaxies, name):
relay_message = "{0} {1}".format(save_path, gzip64encoded)
p.populate_set_out(relay_message, 'Mixer')
+ # increase nb of paste by feeder name
+ r_serv_log_submit.hincrby("mixer_cache:list_feeder", "submitted", 1)
+
# add tags
add_tags(ltags, ltagsgalaxies, full_path)
@@ -52,6 +55,9 @@ def create_paste(uuid, paste_content, ltags, ltagsgalaxies, name):
print(' {} send to Global'.format(save_path))
r_serv_log_submit.sadd(uuid + ':paste_submit_link', full_path)
+ curr_date = datetime.date.today()
+ serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'submit_paste', 1)
+
return 0
def addError(uuid, errorMessage):
@@ -64,6 +70,8 @@ def addError(uuid, errorMessage):
def abord_file_submission(uuid, errorMessage):
addError(uuid, errorMessage)
r_serv_log_submit.set(uuid + ':end', 1)
+ curr_date = datetime.date.today()
+ serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'submit_abord', 1)
remove_submit_uuid(uuid)
@@ -158,6 +166,12 @@ if __name__ == "__main__":
db=cfg.getint("ARDB_Metadata", "db"),
decode_responses=True)
+ serv_statistics = redis.StrictRedis(
+ host=cfg.get('ARDB_Statistics', 'host'),
+ port=cfg.getint('ARDB_Statistics', 'port'),
+ db=cfg.getint('ARDB_Statistics', 'db'),
+ decode_responses=True)
+
expire_time = 120
MAX_FILE_SIZE = 1000000000
ALLOWED_EXTENSIONS = ['txt', 'sh', 'pdf']
diff --git a/doc/screenshots/ail-hashedfiles.png b/doc/screenshots/ail-hashedfiles.png
new file mode 100644
index 00000000..4589d429
Binary files /dev/null and b/doc/screenshots/ail-hashedfiles.png differ
diff --git a/doc/screenshots/hashedfile-graph.png b/doc/screenshots/hashedfile-graph.png
new file mode 100644
index 00000000..01f365d8
Binary files /dev/null and b/doc/screenshots/hashedfile-graph.png differ
diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py
index 2c3e736a..256ea3a8 100644
--- a/var/www/modules/Flask_config.py
+++ b/var/www/modules/Flask_config.py
@@ -145,3 +145,18 @@ bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted')
max_dashboard_logs = int(cfg.get("Flask", "max_dashboard_logs"))
+
+# VT
+try:
+ from virusTotalKEYS import vt_key
+ if vt_key != '':
+ vt_auth = vt_key
+ vt_enabled = True
+ print('VT submission is enabled')
+ else:
+ vt_enabled = False
+ print('VT submission is disabled')
+except:
+ vt_auth = {'apikey': cfg.get("Flask", "max_preview_char")}
+ vt_enabled = False
+ print('VT submission is disabled')
diff --git a/var/www/modules/PasteSubmit/templates/PasteSubmit.html b/var/www/modules/PasteSubmit/templates/PasteSubmit.html
index ce1fb29f..a0636332 100644
--- a/var/www/modules/PasteSubmit/templates/PasteSubmit.html
+++ b/var/www/modules/PasteSubmit/templates/PasteSubmit.html
@@ -5,7 +5,8 @@
- Analysis Information Leak framework Dashboard
+ Submit Paste - AIL
+
@@ -31,6 +32,8 @@