improve Duplicate + tlsh + add time out handler + debug + clean

This commit is contained in:
Terrtia 2018-05-02 17:07:10 +02:00
parent 92977201fd
commit c52caebe7c
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
20 changed files with 133 additions and 64 deletions

View file

@ -32,17 +32,8 @@ def search_api_key(message):
aws_secret_key = regex_aws_secret_key.findall(content)
google_api_key = regex_google_api_key.findall(content)
print(aws_access_key)
print(aws_secret_key)
print(google_api_key)
if(len(aws_access_key) > 0 or len(aws_secret_key) > 0 or len(google_api_key) > 0):
print('-------------------------------')
print(aws_access_key)
print(aws_secret_key)
print(google_api_key)
to_print = 'ApiKey;{};{};{};'.format(
paste.p_source, paste.p_date, paste.p_name)
if(len(google_api_key) > 0):
@ -99,5 +90,4 @@ if __name__ == "__main__":
else:
publisher.debug("Script ApiKey is Idling 10s")
#print('Sleeping')
time.sleep(10)

View file

@ -20,6 +20,16 @@ from hashlib import sha1
import magic
import json
import signal
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
def search_base64(content, message):
find = False
@ -88,6 +98,7 @@ if __name__ == '__main__':
# Setup the I/O queues
p = Process(config_section)
max_execution_time = p.config.getint("Base64", "max_execution_time")
# Sent to the logging a description of the module
publisher.info("Base64 started")
@ -105,14 +116,21 @@ if __name__ == '__main__':
time.sleep(1)
continue
# Do something with the message from the queue
filename = message
paste = Paste.Paste(filename)
content = paste.get_p_content()
#print(filename)
search_base64(content,message)
signal.alarm(max_execution_time)
try:
# Do something with the message from the queue
#print(filename)
content = paste.get_p_content()
search_base64(content,message)
# (Optional) Send that thing to the next queue
#p.populate_set_out(something_has_been_done)
# (Optional) Send that thing to the next queue
#p.populate_set_out(something_has_been_done)
except TimeoutException:
print ("{0} processing timeout".format(paste.p_path))
continue
else:
signal.alarm(0)

View file

@ -74,18 +74,9 @@ if __name__ == "__main__":
filepath, count = message.split(' ')
#if count < minTopPassList:
# Less than 5 matches from the top password list, false positive.
#print("false positive:", count)
#continue
paste = Paste.Paste(filepath)
content = paste.get_p_content()
creds = set(re.findall(regex_cred, content))
print(len(creds))
print(creds)
print(content)
print('-----')
publisher.warning('to_print')

View file

@ -24,10 +24,11 @@ def main():
config_section = 'DomClassifier'
p = Process(config_section)
addr_dns = p.config.get("DomClassifier", "dns")
publisher.info("""ZMQ DomainClassifier is Running""")
c = DomainClassifier.domainclassifier.Extract(rawtext="")
c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
cc = p.config.get("DomClassifier", "cc")
cc_tld = p.config.get("DomClassifier", "cc_tld")

View file

@ -62,7 +62,7 @@ if __name__ == "__main__":
while True:
try:
hash_dico = {}
dupl = []
dupl = set()
dico_range_list = []
x = time.time()
@ -124,6 +124,8 @@ if __name__ == "__main__":
percent = 100-ssdeep.compare(dico_hash, paste_hash)
else:
percent = tlsh.diffxlen(dico_hash, paste_hash)
if percent > 100:
percent = 100
threshold_duplicate = threshold_set[hash_type]
if percent < threshold_duplicate:
@ -163,14 +165,16 @@ if __name__ == "__main__":
if len(hash_dico) != 0:
# paste_tuple = (hash_type, date, paste_path, percent)
for dico_hash, paste_tuple in hash_dico.items():
dupl.append(paste_tuple)
dupl.add(paste_tuple)
# Creating the object attribute and save it.
to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name)
if dupl != []:
dupl = list(dupl)
PST.__setattr__("p_duplicate", dupl)
PST.save_attribute_redis("p_duplicate", dupl)
PST.save_others_pastes_attribute_duplicate("p_duplicate", dupl)
publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path))
print('{}Detected {}'.format(to_print, len(dupl)))

View file

@ -29,8 +29,9 @@ from Helper import Process
import magic
import io
import gzip
#import gzip
'''
def gunzip_bytes_obj(bytes_obj):
in_ = io.BytesIO()
in_.write(bytes_obj)
@ -38,7 +39,7 @@ def gunzip_bytes_obj(bytes_obj):
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
gunzipped_bytes_obj = fo.read()
return gunzipped_bytes_obj.decode()
return gunzipped_bytes_obj.decode()'''
if __name__ == '__main__':
publisher.port = 6380
@ -80,7 +81,7 @@ if __name__ == '__main__':
# Creating the full filepath
filename = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "pastes"), paste)
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
os.makedirs(dirname)
@ -89,7 +90,7 @@ if __name__ == '__main__':
with open(filename, 'wb') as f:
f.write(decoded)
try:
'''try:
decoded2 = gunzip_bytes_obj(decoded)
except:
decoded2 =''
@ -101,8 +102,7 @@ if __name__ == '__main__':
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
print(filename)
print(type)
print(decoded2)
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
'''
p.populate_set_out(filename)
processed_paste+=1

View file

@ -27,6 +27,7 @@ def search_key(paste):
find = False
if '-----BEGIN PGP MESSAGE-----' in content:
publisher.warning('{} has a PGP enc message'.format(paste.p_name))
find = True
if '-----BEGIN CERTIFICATE-----' in content:
@ -35,18 +36,27 @@ def search_key(paste):
if '-----BEGIN RSA PRIVATE KEY-----' in content:
publisher.warning('{} has a RSA private key message'.format(paste.p_name))
print('rsa private key message found')
find = True
if '-----BEGIN PRIVATE KEY-----' in content:
publisher.warning('{} has a private key message'.format(paste.p_name))
print('private key message found')
find = True
if '-----BEGIN ENCRYPTED PRIVATE KEY-----' in content:
publisher.warning('{} has an encrypted private key message'.format(paste.p_name))
print('encrypted private key message found')
find = True
if '-----BEGIN OPENSSH PRIVATE KEY-----' in content:
publisher.warning('{} has an openssh private key message'.format(paste.p_name))
print('openssh private key message found')
find = True
if '-----BEGIN OpenVPN Static key V1-----' in content:
publisher.warning('{} has an openssh private key message'.format(paste.p_name))
print('OpenVPN Static key message found')
find = True
if '-----BEGIN DSA PRIVATE KEY-----' in content:

View file

@ -130,7 +130,7 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "Lines" bash -c 'python3 Lines.py; read x'
sleep 0.1
#screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "Categ" bash -c 'python3 Categ.py; read x'
sleep 0.1

View file

@ -28,6 +28,7 @@ if __name__ == "__main__":
config_section = 'Mail'
p = Process(config_section)
addr_dns = p.config.get("Mail", "dns")
# REDIS #
r_serv2 = redis.StrictRedis(
@ -56,7 +57,7 @@ if __name__ == "__main__":
if prec_filename is None or filename != prec_filename:
PST = Paste.Paste(filename)
MX_values = lib_refine.checking_MX_record(
r_serv2, PST.get_regex(email_regex))
r_serv2, PST.get_regex(email_regex), addr_dns)
if MX_values[0] >= 1:

View file

@ -29,6 +29,7 @@ def get_date_range(num_day):
def compute_most_posted(server, message):
print(message)
module, num, keyword, paste_date = message.split(';')
redis_progression_name_set = 'top_'+ module +'_set_' + paste_date

View file

@ -617,7 +617,9 @@ def fetchQueueData():
for moduleNum in server.smembers(keySet):
moduleNum = moduleNum.decode('utf8')
value = ( server.get(key + str(moduleNum)) ).decode('utf8')
complete_paste_path = ( server.get(key + str(moduleNum) + "_PATH") ).decode('utf8')
complete_paste_path = ( server.get(key + str(moduleNum) + "_PATH") )
if(complete_paste_path is not None):
complete_paste_path = complete_paste_path.decode('utf8')
COMPLETE_PASTE_PATH_PER_PID[moduleNum] = complete_paste_path
if value is not None:

View file

@ -76,9 +76,6 @@ if __name__ == "__main__":
dico_regex, dico_regexname_to_redis = refresh_dicos()
print('dico got refreshed')
print(dico_regex)
print(dico_regexname_to_redis)
filename = message
temp = filename.split('/')
timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0))

View file

@ -6,6 +6,16 @@ from pubsublogger import publisher
from Helper import Process
import re
import signal
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
'''
This module takes its input from the global module.
It applies some regex and publish matched content
@ -16,6 +26,7 @@ if __name__ == "__main__":
publisher.channel = "Script"
config_section = "Release"
p = Process(config_section)
max_execution_time = p.config.getint("Curve", "max_execution_time")
publisher.info("Release scripts to find release names")
movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+"
@ -35,13 +46,22 @@ if __name__ == "__main__":
paste = Paste.Paste(filepath)
content = paste.get_p_content()
releases = set(re.findall(regex, content))
if len(releases) == 0:
continue
to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path)
print(to_print)
if len(releases) > 30:
publisher.warning(to_print)
signal.alarm(max_execution_time)
try:
releases = set(re.findall(regex, content))
if len(releases) == 0:
continue
to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path)
print(to_print)
if len(releases) > 30:
publisher.warning(to_print)
else:
publisher.info(to_print)
except TimeoutException:
print ("{0} processing timeout".format(paste.p_path))
continue
else:
publisher.info(to_print)
signal.alarm(0)

View file

@ -329,6 +329,27 @@ class Paste(object):
else:
self.store.hset(self.p_path, attr_name, json.dumps(value))
def save_others_pastes_attribute_duplicate(self, attr_name, list_value):
"""
Save a new duplicate on others pastes
"""
for hash_type, path, percent, date in list_value:
print(hash_type, path, percent, date)
#get json
json_duplicate = self.store.hget(path, attr_name)
#json save on redis
if json_duplicate is not None:
list_duplicate = json.loads(json_duplicate.decode('utf8'))
# add new duplicate
list_duplicate.append([hash_type, self.p_path, percent, date])
self.store.hset(path, attr_name, json.dumps(list_duplicate))
else:
# create the new list
list_duplicate = [[hash_type, self.p_path, percent, date]]
self.store.hset(path, attr_name, json.dumps(list_duplicate))
def _get_from_redis(self, r_serv):
ans = {}
for hash_name, the_hash in self.p_hash:

View file

@ -54,6 +54,10 @@ criticalNumberToAlert=8
#Will be considered as false positive if less that X matches from the top password list
minTopPassList=5
[Base64]
path = Base64/
max_execution_time = 60
[Modules_Duplicates]
#Number of month to look back
maximum_month_range = 3
@ -145,6 +149,10 @@ cc_critical = DE
[DomClassifier]
cc = DE
cc_tld = r'\.de$'
dns = 8.8.8.8
[Mail]
dns = 8.8.8.8
# Indexer configuration
[Indexer]

View file

@ -17,19 +17,18 @@ def is_luhn_valid(card_number):
return (sum(r[0::2]) + sum(sum(divmod(d*2, 10)) for d in r[1::2])) % 10 == 0
def checking_MX_record(r_serv, adress_set):
def checking_MX_record(r_serv, adress_set, addr_dns):
"""Check if emails MX domains are responding.
:param r_serv: -- Redis connexion database
:param adress_set: -- (set) This is a set of emails adress
:param adress_set: -- (str) This is a server dns address
:return: (int) Number of adress with a responding and valid MX domains
This function will split the email adress and try to resolve their domains
names: on example@gmail.com it will try to resolve gmail.com
"""
print('mails:')
print(adress_set)
#remove duplicate
adress_set = list(set(adress_set))
@ -40,7 +39,7 @@ def checking_MX_record(r_serv, adress_set):
# Transforming the set into a string
MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower())
resolver = dns.resolver.Resolver()
resolver.nameservers = ['149.13.33.69']
resolver.nameservers = [addr_dns]
resolver.timeout = 5
resolver.lifetime = 2
if MXdomains != []:
@ -64,21 +63,27 @@ def checking_MX_record(r_serv, adress_set):
except dns.resolver.NoNameservers:
publisher.debug('NoNameserver, No non-broken nameservers are available to answer the query.')
print('NoNameserver, No non-broken nameservers are available to answer the query.')
except dns.resolver.NoAnswer:
publisher.debug('NoAnswer, The response did not contain an answer to the question.')
print('NoAnswer, The response did not contain an answer to the question.')
except dns.name.EmptyLabel:
publisher.debug('SyntaxError: EmptyLabel')
print('SyntaxError: EmptyLabel')
except dns.resolver.NXDOMAIN:
r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
publisher.debug('The query name does not exist.')
print('The query name does not exist.')
except dns.name.LabelTooLong:
publisher.debug('The Label is too long')
print('The Label is too long')
except dns.resolver.Timeout:
print('timeout')
r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
except Exception as e:

View file

@ -81,7 +81,7 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
to keep the timeline of the curve correct.
"""
threshold = 50
threshold = 30
first_day = date(year, month, 1)
last_day = date(year, month, calendar.monthrange(year, month)[1])
words = []
@ -135,6 +135,7 @@ def create_curve_from_redis_set(server, csvfilename, set_to_plot, year, month):
redis_set_name = set_to_plot + "_set_" + str(year) + str(month).zfill(2)
words = list(server.smembers(redis_set_name))
words = [x.decode('utf-8') for x in words]
headers = ['Date'] + words
with open(csvfilename+'.csv', 'w') as f:
@ -153,5 +154,5 @@ def create_curve_from_redis_set(server, csvfilename, set_to_plot, year, month):
row.append(0)
else:
# if the word have a value for the day
row.append(value)
row.append(value.decode('utf8'))
writer.writerow(row)

View file

@ -62,5 +62,4 @@ r_serv_pasteName = redis.StrictRedis(
max_preview_char = int(cfg.get("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip
max_preview_modal = int(cfg.get("Flask", "max_preview_modal")) # Maximum number of character to display in the modal
tlsh_to_percent = 1000.0 #Use to display the estimated percentage instead of a raw value
DiffMaxLineLength = int(cfg.get("Flask", "DiffMaxLineLength"))#Use to display the estimated percentage instead of a raw value

View file

@ -20,7 +20,6 @@ cfg = Flask_config.cfg
r_serv_pasteName = Flask_config.r_serv_pasteName
max_preview_char = Flask_config.max_preview_char
max_preview_modal = Flask_config.max_preview_modal
tlsh_to_percent = Flask_config.tlsh_to_percent
DiffMaxLineLength = Flask_config.DiffMaxLineLength
showsavedpastes = Blueprint('showsavedpastes', __name__, template_folder='templates')
@ -48,8 +47,10 @@ def showpaste(content_range):
for dup_list in p_duplicate_full_list:
if dup_list[0] == "tlsh":
dup_list[2] = int(((tlsh_to_percent - float(dup_list[2])) / tlsh_to_percent)*100)
dup_list[2] = 100 - int(dup_list[2])
else:
print('dup_list')
print(dup_list)
dup_list[2] = int(dup_list[2])
#p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True)
@ -64,12 +65,13 @@ def showpaste(content_range):
hash_types = []
comp_vals = []
for i in indices:
hash_types.append(p_duplicate_full_list[i][0].encode('utf8'))
hash_types.append(p_duplicate_full_list[i][0])
comp_vals.append(p_duplicate_full_list[i][2])
dup_list_removed.append(i)
hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types)
comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals)
if len(p_duplicate_full_list[dup_list_index]) > 3:
try:
date_paste = str(int(p_duplicate_full_list[dup_list_index][3]))
@ -91,7 +93,6 @@ def showpaste(content_range):
if content_range != 0:
p_content = p_content[0:content_range]
return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list, date_list=p_date_list)
# ============ ROUTES ============

View file

@ -69,13 +69,12 @@
<tbody>
{% for dup_path in duplicate_list %}
<tr>
<td>{{ hashtype_list[i] }}</td>
<td>Similarity: {{ simil_list[i] }}%</td>
<td>{{ date_list[i] }}</td>
<td>{{ hashtype_list[loop.index - 1] }}</td>
<td>Similarity: {{ simil_list[loop.index - 1] }}%</td>
<td>{{ date_list[loop.index - 1] }}</td>
<td><a target="_blank" href="{{ url_for('showsavedpastes.showsavedpaste') }}?paste={{ dup_path }}" id='dup_path'>{{ dup_path }}</a></td>
<td><a target="_blank" href="{{ url_for('showsavedpastes.showDiff') }}?s1={{ request.args.get('paste') }}&s2={{ dup_path }}" class="fa fa-columns" title="Show differences"></a></td>
</tr>
{% set i = i + 1 %}
{% endfor %}
</tbody>
</table>