mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-27 00:07:16 +00:00
Merge branch 'duplicate-module' into uptodate-duplicate-module
This commit is contained in:
commit
b685f81752
16 changed files with 420 additions and 7 deletions
|
@ -50,6 +50,8 @@ if __name__ == "__main__":
|
||||||
if len(creds) > critical:
|
if len(creds) > critical:
|
||||||
print("========> Found more than 10 credentials in this file : {}".format(filepath))
|
print("========> Found more than 10 credentials in this file : {}".format(filepath))
|
||||||
publisher.warning(to_print)
|
publisher.warning(to_print)
|
||||||
|
#Send to duplicate
|
||||||
|
p.populate_set_out(filepath)
|
||||||
if sites:
|
if sites:
|
||||||
print("=======> Probably on : {}".format(', '.join(sites)))
|
print("=======> Probably on : {}".format(', '.join(sites)))
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -65,6 +65,8 @@ if __name__ == "__main__":
|
||||||
if (len(creditcard_set) > 0):
|
if (len(creditcard_set) > 0):
|
||||||
publisher.warning('{}Checked {} valid number(s)'.format(
|
publisher.warning('{}Checked {} valid number(s)'.format(
|
||||||
to_print, len(creditcard_set)))
|
to_print, len(creditcard_set)))
|
||||||
|
#Send to duplicate
|
||||||
|
p.populate_set_out(filename)
|
||||||
else:
|
else:
|
||||||
publisher.info('{}CreditCard related'.format(to_print))
|
publisher.info('{}CreditCard related'.format(to_print))
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -74,9 +74,9 @@ if __name__ == "__main__":
|
||||||
# Creating the bloom filter name: bloomyyyymm
|
# Creating the bloom filter name: bloomyyyymm
|
||||||
filebloompath = os.path.join(bloompath, 'bloom' + PST.p_date.year +
|
filebloompath = os.path.join(bloompath, 'bloom' + PST.p_date.year +
|
||||||
PST.p_date.month)
|
PST.p_date.month)
|
||||||
|
|
||||||
if os.path.exists(filebloompath):
|
if os.path.exists(filebloompath):
|
||||||
bloom = BloomFilter.open(filebloompath)
|
bloom = BloomFilter.open(filebloompath)
|
||||||
|
bloop_path_set.add(filebloompath)
|
||||||
else:
|
else:
|
||||||
bloom = BloomFilter(100000000, 0.01, filebloompath)
|
bloom = BloomFilter(100000000, 0.01, filebloompath)
|
||||||
bloop_path_set.add(filebloompath)
|
bloop_path_set.add(filebloompath)
|
||||||
|
@ -94,7 +94,6 @@ if __name__ == "__main__":
|
||||||
for bloo in bloop_path_set:
|
for bloo in bloop_path_set:
|
||||||
# Opening blooms
|
# Opening blooms
|
||||||
opened_bloom.append(BloomFilter.open(bloo))
|
opened_bloom.append(BloomFilter.open(bloo))
|
||||||
|
|
||||||
# For each hash of the paste
|
# For each hash of the paste
|
||||||
for line_hash in PST._get_hash_lines(min=5, start=1, jump=0):
|
for line_hash in PST._get_hash_lines(min=5, start=1, jump=0):
|
||||||
nb_hash_current += 1
|
nb_hash_current += 1
|
||||||
|
@ -105,7 +104,6 @@ if __name__ == "__main__":
|
||||||
r_serv1.sadd("HASHS", line_hash)
|
r_serv1.sadd("HASHS", line_hash)
|
||||||
# Adding the hash in the bloom of the month
|
# Adding the hash in the bloom of the month
|
||||||
bloom.add(line_hash)
|
bloom.add(line_hash)
|
||||||
|
|
||||||
# Go throught the Database of the bloom filter (of the month)
|
# Go throught the Database of the bloom filter (of the month)
|
||||||
for bloo in opened_bloom:
|
for bloo in opened_bloom:
|
||||||
if line_hash in bloo:
|
if line_hash in bloo:
|
||||||
|
@ -148,6 +146,8 @@ if __name__ == "__main__":
|
||||||
percentage = round((count/float(nb_hash_current))*100, 2)
|
percentage = round((count/float(nb_hash_current))*100, 2)
|
||||||
if percentage >= 50:
|
if percentage >= 50:
|
||||||
dupl.append((paste, percentage))
|
dupl.append((paste, percentage))
|
||||||
|
else:
|
||||||
|
print 'percentage: ' + str(percentage)
|
||||||
|
|
||||||
# Creating the object attribute and save it.
|
# Creating the object attribute and save it.
|
||||||
to_print = 'Duplicate;{};{};{};'.format(
|
to_print = 'Duplicate;{};{};{};'.format(
|
||||||
|
@ -156,6 +156,7 @@ if __name__ == "__main__":
|
||||||
PST.__setattr__("p_duplicate", dupl)
|
PST.__setattr__("p_duplicate", dupl)
|
||||||
PST.save_attribute_redis("p_duplicate", dupl)
|
PST.save_attribute_redis("p_duplicate", dupl)
|
||||||
publisher.info('{}Detected {}'.format(to_print, len(dupl)))
|
publisher.info('{}Detected {}'.format(to_print, len(dupl)))
|
||||||
|
print '{}Detected {}'.format(to_print, len(dupl))
|
||||||
|
|
||||||
y = time.time()
|
y = time.time()
|
||||||
|
|
||||||
|
|
182
bin/Duplicate_ssdeep.py
Executable file
182
bin/Duplicate_ssdeep.py
Executable file
|
@ -0,0 +1,182 @@
|
||||||
|
#!/usr/bin/env python2
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
"""
|
||||||
|
The Duplicate module
|
||||||
|
====================
|
||||||
|
|
||||||
|
This huge module is, in short term, checking duplicates.
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
-------------
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
import redis
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import ssdeep
|
||||||
|
from packages import Paste
|
||||||
|
from pubsublogger import publisher
|
||||||
|
|
||||||
|
from Helper import Process
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
publisher.port = 6380
|
||||||
|
publisher.channel = "Script"
|
||||||
|
|
||||||
|
config_section = 'Duplicates'
|
||||||
|
save_dico_and_reload = 1 #min
|
||||||
|
time_1 = time.time()
|
||||||
|
flag_reload_from_disk = True
|
||||||
|
flag_write_to_disk = False
|
||||||
|
|
||||||
|
p = Process(config_section)
|
||||||
|
|
||||||
|
# REDIS #
|
||||||
|
# DB OBJECT & HASHS ( DISK )
|
||||||
|
# FIXME increase flexibility
|
||||||
|
dico_redis = {}
|
||||||
|
for year in xrange(2013, datetime.date.today().year+1):
|
||||||
|
for month in xrange(0, 16):
|
||||||
|
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
|
||||||
|
host=p.config.get("Redis_Level_DB", "host"), port=year,
|
||||||
|
db=month)
|
||||||
|
#print("dup: "+str(year)+str(month).zfill(2)+"\n")
|
||||||
|
|
||||||
|
# FUNCTIONS #
|
||||||
|
publisher.info("Script duplicate started")
|
||||||
|
|
||||||
|
dicopath = os.path.join(os.environ['AIL_HOME'],
|
||||||
|
p.config.get("Directories", "dicofilters"))
|
||||||
|
|
||||||
|
dico_path_set = set()
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
hash_dico = {}
|
||||||
|
dupl = []
|
||||||
|
|
||||||
|
x = time.time()
|
||||||
|
|
||||||
|
message = p.get_from_set()
|
||||||
|
if message is not None:
|
||||||
|
path = message
|
||||||
|
PST = Paste.Paste(path)
|
||||||
|
else:
|
||||||
|
publisher.debug("Script Attribute is idling 10s")
|
||||||
|
time.sleep(10)
|
||||||
|
continue
|
||||||
|
|
||||||
|
PST._set_p_hash_kind("ssdeep")
|
||||||
|
|
||||||
|
# Assignate the correct redis connexion
|
||||||
|
r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
|
||||||
|
|
||||||
|
# Creating the dicor name: dicoyyyymm
|
||||||
|
filedicopath = os.path.join(dicopath, 'dico' + PST.p_date.year +
|
||||||
|
PST.p_date.month)
|
||||||
|
filedicopath_today = filedicopath
|
||||||
|
|
||||||
|
# Save I/O
|
||||||
|
if time.time() - time_1 > save_dico_and_reload*60:
|
||||||
|
flag_write_to_disk = True
|
||||||
|
|
||||||
|
if os.path.exists(filedicopath):
|
||||||
|
if flag_reload_from_disk == True:
|
||||||
|
flag_reload_from_disk = False
|
||||||
|
print 'Reloading'
|
||||||
|
with open(filedicopath, 'r') as fp:
|
||||||
|
today_dico = json.load(fp)
|
||||||
|
else:
|
||||||
|
today_dico = {}
|
||||||
|
with open(filedicopath, 'w') as fp:
|
||||||
|
json.dump(today_dico, fp)
|
||||||
|
|
||||||
|
# For now, just use monthly dico
|
||||||
|
dico_path_set.add(filedicopath)
|
||||||
|
|
||||||
|
# UNIQUE INDEX HASHS TABLE
|
||||||
|
yearly_index = str(datetime.date.today().year)+'00'
|
||||||
|
r_serv0 = dico_redis[yearly_index]
|
||||||
|
r_serv0.incr("current_index")
|
||||||
|
index = r_serv0.get("current_index")+str(PST.p_date)
|
||||||
|
|
||||||
|
# For each dico
|
||||||
|
opened_dico = []
|
||||||
|
for dico in dico_path_set:
|
||||||
|
# Opening dico
|
||||||
|
if dico == filedicopath_today:
|
||||||
|
opened_dico.append([dico, today_dico])
|
||||||
|
else:
|
||||||
|
with open(dico, 'r') as fp:
|
||||||
|
opened_dico.append([dico, json.load(fp)])
|
||||||
|
|
||||||
|
|
||||||
|
#retrieve hash from paste
|
||||||
|
paste_hash = PST._get_p_hash()
|
||||||
|
|
||||||
|
# Go throught the Database of the dico (of the month)
|
||||||
|
threshold_dup = 99
|
||||||
|
for dico_name, dico in opened_dico:
|
||||||
|
for dico_key, dico_hash in dico.items():
|
||||||
|
percent = ssdeep.compare(dico_hash, paste_hash)
|
||||||
|
if percent > threshold_dup:
|
||||||
|
db = dico_name[-6:]
|
||||||
|
# Go throught the Database of the dico filter (month)
|
||||||
|
r_serv_dico = dico_redis[db]
|
||||||
|
|
||||||
|
# index of paste
|
||||||
|
index_current = r_serv_dico.get(dico_hash)
|
||||||
|
paste_path = r_serv_dico.get(index_current)
|
||||||
|
if paste_path != None:
|
||||||
|
hash_dico[dico_hash] = (paste_path, percent)
|
||||||
|
|
||||||
|
#print 'comparing: ' + str(dico_hash[:20]) + ' and ' + str(paste_hash[:20]) + ' percentage: ' + str(percent)
|
||||||
|
print ' '+ PST.p_path[44:] +', '+ paste_path[44:] + ', ' + str(percent)
|
||||||
|
|
||||||
|
# Add paste in DB to prevent its analyse twice
|
||||||
|
# HASHTABLES PER MONTH (because of r_serv1 changing db)
|
||||||
|
r_serv1.set(index, PST.p_path)
|
||||||
|
r_serv1.sadd("INDEX", index)
|
||||||
|
# Adding the hash in Redis
|
||||||
|
r_serv1.set(paste_hash, index)
|
||||||
|
r_serv1.sadd("HASHS", paste_hash)
|
||||||
|
##################### Similarity found #######################
|
||||||
|
|
||||||
|
# if there is data in this dictionnary
|
||||||
|
if len(hash_dico) != 0:
|
||||||
|
for dico_hash, paste_tuple in hash_dico.items():
|
||||||
|
paste_path, percent = paste_tuple
|
||||||
|
dupl.append((paste_path, percent))
|
||||||
|
|
||||||
|
# Creating the object attribute and save it.
|
||||||
|
to_print = 'Duplicate;{};{};{};'.format(
|
||||||
|
PST.p_source, PST.p_date, PST.p_name)
|
||||||
|
if dupl != []:
|
||||||
|
PST.__setattr__("p_duplicate", dupl)
|
||||||
|
PST.save_attribute_redis("p_duplicate", dupl)
|
||||||
|
publisher.info('{}Detected {}'.format(to_print, len(dupl)))
|
||||||
|
print '{}Detected {}'.format(to_print, len(dupl))
|
||||||
|
|
||||||
|
y = time.time()
|
||||||
|
|
||||||
|
publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
|
||||||
|
|
||||||
|
|
||||||
|
# Adding the hash in the dico of the month
|
||||||
|
today_dico[index] = paste_hash
|
||||||
|
|
||||||
|
if flag_write_to_disk:
|
||||||
|
time_1 = time.time()
|
||||||
|
flag_write_to_disk = False
|
||||||
|
flag_reload_from_disk = True
|
||||||
|
print 'writing'
|
||||||
|
with open(filedicopath, 'w') as fp:
|
||||||
|
json.dump(today_dico, fp)
|
||||||
|
except IOError:
|
||||||
|
to_print = 'Duplicate;{};{};{};'.format(
|
||||||
|
PST.p_source, PST.p_date, PST.p_name)
|
||||||
|
print "CRC Checksum Failed on :", PST.p_path
|
||||||
|
publisher.error('{}CRC Checksum Failed'.format(to_print))
|
161
bin/Duplicate_ssdeep_v2.py
Executable file
161
bin/Duplicate_ssdeep_v2.py
Executable file
|
@ -0,0 +1,161 @@
|
||||||
|
#!/usr/bin/env python2
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
"""
|
||||||
|
The Duplicate module
|
||||||
|
====================
|
||||||
|
|
||||||
|
This huge module is, in short term, checking duplicates.
|
||||||
|
Its input comes from other modules, namely:
|
||||||
|
Credential, CreditCard, Keys, Mails and Phone
|
||||||
|
|
||||||
|
This one differ from v1 by only using redis and not json file stored on disk
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
-------------
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
import redis
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import json
|
||||||
|
import ssdeep
|
||||||
|
from packages import Paste
|
||||||
|
from pubsublogger import publisher
|
||||||
|
|
||||||
|
from Helper import Process
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
publisher.port = 6380
|
||||||
|
publisher.channel = "Script"
|
||||||
|
|
||||||
|
config_section = 'Duplicates'
|
||||||
|
|
||||||
|
p = Process(config_section)
|
||||||
|
|
||||||
|
maximum_month_range = int(p.config.get("Modules_Duplicates", "maximum_month_range"))
|
||||||
|
threshold_duplicate = int(p.config.get("Modules_Duplicates", "threshold_duplicate"))
|
||||||
|
min_paste_size = float(p.config.get("Modules_Duplicates", "min_paste_size"))
|
||||||
|
|
||||||
|
# REDIS #
|
||||||
|
dico_redis = {}
|
||||||
|
date_today = datetime.today()
|
||||||
|
for year in xrange(2013, date_today.year+1):
|
||||||
|
for month in xrange(0, 13):
|
||||||
|
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
|
||||||
|
host=p.config.get("Redis_Level_DB", "host"), port=year,
|
||||||
|
db=month)
|
||||||
|
#print("dup: "+str(year)+str(month).zfill(2)+"\n")
|
||||||
|
|
||||||
|
# FUNCTIONS #
|
||||||
|
publisher.info("Script duplicate started")
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
hash_dico = {}
|
||||||
|
dupl = []
|
||||||
|
dico_range_list = []
|
||||||
|
|
||||||
|
x = time.time()
|
||||||
|
|
||||||
|
message = p.get_from_set()
|
||||||
|
if message is not None:
|
||||||
|
path = message
|
||||||
|
PST = Paste.Paste(path)
|
||||||
|
else:
|
||||||
|
publisher.debug("Script Attribute is idling 10s")
|
||||||
|
time.sleep(10)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# the paste is too small
|
||||||
|
if (PST._get_p_size() < min_paste_size):
|
||||||
|
continue
|
||||||
|
|
||||||
|
PST._set_p_hash_kind("ssdeep")
|
||||||
|
|
||||||
|
# Assignate the correct redis connexion
|
||||||
|
r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
|
||||||
|
|
||||||
|
# Creating the dico name: yyyymm
|
||||||
|
# Get the date of the range
|
||||||
|
date_range = date_today - timedelta(days = maximum_month_range*30.4166666)
|
||||||
|
num_of_month = (date_today.year - date_range.year)*12 + (date_today.month - date_range.month)
|
||||||
|
for diff_month in xrange(0, num_of_month+1):
|
||||||
|
curr_date_range = date_today - timedelta(days = diff_month*30.4166666)
|
||||||
|
to_append = str(curr_date_range.year)+str(curr_date_range.month).zfill(2)
|
||||||
|
dico_range_list.append(to_append)
|
||||||
|
|
||||||
|
# Use all dico in range
|
||||||
|
dico_range_list = dico_range_list[0:maximum_month_range]
|
||||||
|
|
||||||
|
# UNIQUE INDEX HASHS TABLE
|
||||||
|
yearly_index = str(date_today.year)+'00'
|
||||||
|
r_serv0 = dico_redis[yearly_index]
|
||||||
|
r_serv0.incr("current_index")
|
||||||
|
index = r_serv0.get("current_index")+str(PST.p_date)
|
||||||
|
|
||||||
|
# Open selected dico range
|
||||||
|
opened_dico = []
|
||||||
|
for dico_name in dico_range_list:
|
||||||
|
opened_dico.append([dico_name, dico_redis[dico_name]])
|
||||||
|
|
||||||
|
# retrieve hash from paste
|
||||||
|
paste_hash = PST._get_p_hash()
|
||||||
|
|
||||||
|
# Go throught the Database of the dico (of the month)
|
||||||
|
for curr_dico_name, curr_dico_redis in opened_dico:
|
||||||
|
for dico_hash in curr_dico_redis.smembers('HASHS'):
|
||||||
|
try:
|
||||||
|
percent = ssdeep.compare(dico_hash, paste_hash)
|
||||||
|
if percent > threshold_duplicate:
|
||||||
|
# Go throught the Database of the dico filter (month)
|
||||||
|
r_serv_dico = dico_redis[curr_dico_name]
|
||||||
|
|
||||||
|
# index of paste
|
||||||
|
index_current = r_serv_dico.get(dico_hash)
|
||||||
|
paste_path = r_serv_dico.get(index_current)
|
||||||
|
if paste_path != None:
|
||||||
|
hash_dico[dico_hash] = (paste_path, percent)
|
||||||
|
|
||||||
|
#print 'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent)
|
||||||
|
except:
|
||||||
|
# ssdeep hash not comparable
|
||||||
|
print 'ssdeep hash not comparable'
|
||||||
|
publisher.error('ssdeep hash not comparable')
|
||||||
|
|
||||||
|
# Add paste in DB after checking to prevent its analysis twice
|
||||||
|
# hash_i -> index_i AND index_i -> PST.PATH
|
||||||
|
r_serv1.set(index, PST.p_path)
|
||||||
|
r_serv1.sadd("INDEX", index)
|
||||||
|
# Adding the hash in Redis
|
||||||
|
r_serv1.set(paste_hash, index)
|
||||||
|
r_serv1.sadd("HASHS", paste_hash)
|
||||||
|
##################### Similarity found #######################
|
||||||
|
|
||||||
|
# if there is data in this dictionnary
|
||||||
|
if len(hash_dico) != 0:
|
||||||
|
# paste_tuple = (paste_path, percent)
|
||||||
|
for dico_hash, paste_tuple in hash_dico.items():
|
||||||
|
dupl.append(paste_tuple)
|
||||||
|
|
||||||
|
# Creating the object attribute and save it.
|
||||||
|
to_print = 'Duplicate;{};{};{};'.format(
|
||||||
|
PST.p_source, PST.p_date, PST.p_name)
|
||||||
|
if dupl != []:
|
||||||
|
PST.__setattr__("p_duplicate", dupl)
|
||||||
|
PST.save_attribute_redis("p_duplicate", dupl)
|
||||||
|
publisher.info('{}Detected {}'.format(to_print, len(dupl)))
|
||||||
|
print '{}Detected {}'.format(to_print, len(dupl))
|
||||||
|
|
||||||
|
y = time.time()
|
||||||
|
|
||||||
|
publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
|
||||||
|
#print '{}Processed in {} sec'.format(to_print, y-x)
|
||||||
|
|
||||||
|
except IOError:
|
||||||
|
to_print = 'Duplicate;{};{};{};'.format(
|
||||||
|
PST.p_source, PST.p_date, PST.p_name)
|
||||||
|
print "CRC Checksum Failed on :", PST.p_path
|
||||||
|
publisher.error('{}CRC Checksum Failed'.format(to_print))
|
|
@ -16,6 +16,8 @@ def search_gpg(message):
|
||||||
content = paste.get_p_content()
|
content = paste.get_p_content()
|
||||||
if '-----BEGIN PGP MESSAGE-----' in content:
|
if '-----BEGIN PGP MESSAGE-----' in content:
|
||||||
publisher.warning('{} has a PGP enc message'.format(paste.p_name))
|
publisher.warning('{} has a PGP enc message'.format(paste.p_name))
|
||||||
|
#Send to duplicate
|
||||||
|
p.populate_set_out(message)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -105,7 +105,7 @@ function launching_scripts {
|
||||||
|
|
||||||
screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
|
screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate.py; read x'
|
screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x'
|
screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
|
|
|
@ -60,6 +60,8 @@ if __name__ == "__main__":
|
||||||
MX_values[0])
|
MX_values[0])
|
||||||
if MX_values[0] > is_critical:
|
if MX_values[0] > is_critical:
|
||||||
publisher.warning(to_print)
|
publisher.warning(to_print)
|
||||||
|
#Send to duplicate
|
||||||
|
p.populate_set_out(filename)
|
||||||
else:
|
else:
|
||||||
publisher.info(to_print)
|
publisher.info(to_print)
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
|
|
|
@ -23,6 +23,8 @@ def search_phone(message):
|
||||||
if len(results) > 4:
|
if len(results) > 4:
|
||||||
print results
|
print results
|
||||||
publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))
|
publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))
|
||||||
|
#Send to duplicate
|
||||||
|
p.populate_set_out(message)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
import crcmod
|
import crcmod
|
||||||
import mmh3
|
import mmh3
|
||||||
|
import ssdeep
|
||||||
|
|
||||||
|
|
||||||
class Hash(object):
|
class Hash(object):
|
||||||
|
@ -32,4 +33,7 @@ class Hash(object):
|
||||||
elif self.name == "murmur":
|
elif self.name == "murmur":
|
||||||
hash = mmh3.hash(string)
|
hash = mmh3.hash(string)
|
||||||
|
|
||||||
|
elif self.name == "ssdeep":
|
||||||
|
hash = ssdeep.hash(string)
|
||||||
|
|
||||||
return hash
|
return hash
|
||||||
|
|
|
@ -91,6 +91,7 @@ class Paste(object):
|
||||||
self.p_langage = None
|
self.p_langage = None
|
||||||
self.p_nb_lines = None
|
self.p_nb_lines = None
|
||||||
self.p_max_length_line = None
|
self.p_max_length_line = None
|
||||||
|
self.p_duplicate = None
|
||||||
|
|
||||||
def get_p_content(self):
|
def get_p_content(self):
|
||||||
"""
|
"""
|
||||||
|
@ -277,6 +278,10 @@ class Paste(object):
|
||||||
return True, var
|
return True, var
|
||||||
else:
|
else:
|
||||||
return False, var
|
return False, var
|
||||||
|
|
||||||
|
def _get_p_duplicate(self):
|
||||||
|
self.p_duplicate = self.store.hget(self.p_path, "p_duplicate")
|
||||||
|
return self.p_duplicate if self.p_duplicate is not None else []
|
||||||
|
|
||||||
def save_all_attributes_redis(self, key=None):
|
def save_all_attributes_redis(self, key=None):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -25,6 +25,16 @@ max_preview_modal = 800
|
||||||
#Default number of header to display in trending graphs
|
#Default number of header to display in trending graphs
|
||||||
default_display = 10
|
default_display = 10
|
||||||
|
|
||||||
|
#### Modules ####
|
||||||
|
[Modules_Duplicates]
|
||||||
|
#Number of month to look back
|
||||||
|
maximum_month_range = 3
|
||||||
|
#The value where two pastes are considerate duplicate.
|
||||||
|
threshold_duplicate = 50
|
||||||
|
#Minimum size of the paste considered
|
||||||
|
min_paste_size = 0.3
|
||||||
|
|
||||||
|
|
||||||
##### Redis #####
|
##### Redis #####
|
||||||
[Redis_Cache]
|
[Redis_Cache]
|
||||||
host = localhost
|
host = localhost
|
||||||
|
|
|
@ -3,7 +3,7 @@ subscribe = ZMQ_Global
|
||||||
publish = Redis_Global
|
publish = Redis_Global
|
||||||
|
|
||||||
[Duplicates]
|
[Duplicates]
|
||||||
subscribe = Redis_Global
|
subscribe = Redis_Duplicate
|
||||||
|
|
||||||
[Indexer]
|
[Indexer]
|
||||||
subscribe = Redis_Global
|
subscribe = Redis_Global
|
||||||
|
@ -31,9 +31,11 @@ publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Re
|
||||||
|
|
||||||
[CreditCards]
|
[CreditCards]
|
||||||
subscribe = Redis_CreditCards
|
subscribe = Redis_CreditCards
|
||||||
|
publish = Redis_Duplicate
|
||||||
|
|
||||||
[Mail]
|
[Mail]
|
||||||
subscribe = Redis_Mail
|
subscribe = Redis_Mail
|
||||||
|
publish = Redis_Duplicate
|
||||||
|
|
||||||
[Onion]
|
[Onion]
|
||||||
subscribe = Redis_Onion
|
subscribe = Redis_Onion
|
||||||
|
@ -55,15 +57,18 @@ subscribe = Redis_Global
|
||||||
|
|
||||||
[Credential]
|
[Credential]
|
||||||
subscribe = Redis_Credential
|
subscribe = Redis_Credential
|
||||||
|
publish = Redis_Duplicate
|
||||||
|
|
||||||
[Cve]
|
[Cve]
|
||||||
subscribe = Redis_Cve
|
subscribe = Redis_Cve
|
||||||
|
|
||||||
[Phone]
|
[Phone]
|
||||||
subscribe = Redis_Global
|
subscribe = Redis_Global
|
||||||
|
publish = Redis_Duplicate
|
||||||
|
|
||||||
[SourceCode]
|
[SourceCode]
|
||||||
subscribe = Redis_SourceCode
|
subscribe = Redis_SourceCode
|
||||||
|
|
||||||
[Keys]
|
[Keys]
|
||||||
subscribe = Redis_Global
|
subscribe = Redis_Global
|
||||||
|
publish = Redis_Duplicate
|
||||||
|
|
|
@ -17,6 +17,7 @@ nltk
|
||||||
# Hashlib
|
# Hashlib
|
||||||
crcmod
|
crcmod
|
||||||
mmh3
|
mmh3
|
||||||
|
simhash
|
||||||
|
|
||||||
#Others
|
#Others
|
||||||
python-magic
|
python-magic
|
||||||
|
|
|
@ -58,6 +58,21 @@ def list_len(s):
|
||||||
return len(s)
|
return len(s)
|
||||||
app.jinja_env.filters['list_len'] = list_len
|
app.jinja_env.filters['list_len'] = list_len
|
||||||
|
|
||||||
|
def parseStringToList(the_string):
|
||||||
|
strList = ""
|
||||||
|
elemList = []
|
||||||
|
for c in the_string:
|
||||||
|
if c != ']':
|
||||||
|
if c != '[' and c !=' ' and c != '"':
|
||||||
|
strList += c
|
||||||
|
else:
|
||||||
|
the_list = strList.split(',')
|
||||||
|
if len(the_list) == 2:
|
||||||
|
elemList.append(the_list)
|
||||||
|
elif len(the_list) > 1:
|
||||||
|
elemList.append(the_list[1:])
|
||||||
|
strList = ""
|
||||||
|
return elemList
|
||||||
|
|
||||||
def showpaste(content_range):
|
def showpaste(content_range):
|
||||||
requested_path = request.args.get('paste', '')
|
requested_path = request.args.get('paste', '')
|
||||||
|
@ -71,10 +86,19 @@ def showpaste(content_range):
|
||||||
p_mime = paste.p_mime
|
p_mime = paste.p_mime
|
||||||
p_lineinfo = paste.get_lines_info()
|
p_lineinfo = paste.get_lines_info()
|
||||||
p_content = paste.get_p_content().decode('utf-8', 'ignore')
|
p_content = paste.get_p_content().decode('utf-8', 'ignore')
|
||||||
|
p_duplicate_full_list = parseStringToList(paste._get_p_duplicate())
|
||||||
|
p_duplicate_list = []
|
||||||
|
p_simil_list = []
|
||||||
|
|
||||||
|
for dup_list in p_duplicate_full_list:
|
||||||
|
path, simil_percent = dup_list
|
||||||
|
p_duplicate_list.append(path)
|
||||||
|
p_simil_list.append(simil_percent)
|
||||||
|
|
||||||
if content_range != 0:
|
if content_range != 0:
|
||||||
p_content = p_content[0:content_range]
|
p_content = p_content[0:content_range]
|
||||||
|
|
||||||
return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content))
|
return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/_logs")
|
@app.route("/_logs")
|
||||||
|
|
|
@ -42,7 +42,17 @@
|
||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
<div class="panel-body" id="panel-body">
|
<div class="panel-body" id="panel-body">
|
||||||
<h4> Content: </h4>
|
{% if duplicate_list|length == 0 %}
|
||||||
|
<h4> No Duplicate </h4>
|
||||||
|
{% else %}
|
||||||
|
<h4> Duplicate list: </h4>
|
||||||
|
{% set i = 0 %}
|
||||||
|
{% for dup_path in duplicate_list %}
|
||||||
|
Similarity: {{ simil_list[i] }}% - <a target="_blank" href="{{ url_for('showsavedpaste') }}?paste={{ dup_path }}" id='dup_path'>{{ dup_path }}</a></br>
|
||||||
|
{% set i = i + 1 %}
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
<h4> Content: </h4>
|
||||||
<p data-initsize="{{ initsize }}"> <xmp id="paste-holder">{{ content }}</xmp></p>
|
<p data-initsize="{{ initsize }}"> <xmp id="paste-holder">{{ content }}</xmp></p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
Loading…
Reference in a new issue