mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-23 06:37:15 +00:00
fix Duplicate, save list of duplicates on disk + prevent empty hash creation
This commit is contained in:
parent
225fe76c96
commit
f66a528bc2
6 changed files with 52 additions and 31 deletions
|
@ -158,6 +158,10 @@ if __name__ == "__main__":
|
||||||
# Adding hashes in Redis
|
# Adding hashes in Redis
|
||||||
for hash_type, paste_hash in paste_hashes.items():
|
for hash_type, paste_hash in paste_hashes.items():
|
||||||
r_serv1.set(paste_hash, index)
|
r_serv1.set(paste_hash, index)
|
||||||
|
#bad hash
|
||||||
|
if paste_hash == '':
|
||||||
|
print('bad Hash: ' + hash_type)
|
||||||
|
else:
|
||||||
r_serv1.sadd("HASHS_"+hash_type, paste_hash)
|
r_serv1.sadd("HASHS_"+hash_type, paste_hash)
|
||||||
|
|
||||||
##################### Similarity found #######################
|
##################### Similarity found #######################
|
||||||
|
@ -174,10 +178,11 @@ if __name__ == "__main__":
|
||||||
if dupl != []:
|
if dupl != []:
|
||||||
dupl = list(dupl)
|
dupl = list(dupl)
|
||||||
PST.__setattr__("p_duplicate", dupl)
|
PST.__setattr__("p_duplicate", dupl)
|
||||||
PST.save_attribute_redis("p_duplicate", dupl)
|
PST.save_attribute_duplicate(dupl)
|
||||||
PST.save_others_pastes_attribute_duplicate("p_duplicate", dupl)
|
PST.save_others_pastes_attribute_duplicate(dupl)
|
||||||
publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path))
|
publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path))
|
||||||
print('{}Detected {}'.format(to_print, len(dupl)))
|
print('{}Detected {}'.format(to_print, len(dupl)))
|
||||||
|
print('')
|
||||||
|
|
||||||
y = time.time()
|
y = time.time()
|
||||||
|
|
||||||
|
|
|
@ -110,8 +110,6 @@ function launching_scripts {
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script_AIL" -X screen -t "Duplicates" bash -c './Duplicates.py; read x'
|
screen -S "Script_AIL" -X screen -t "Duplicates" bash -c './Duplicates.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script_AIL" -X screen -t "Attributes" bash -c './Attributes.py; read x'
|
|
||||||
sleep 0.1
|
|
||||||
screen -S "Script_AIL" -X screen -t "Lines" bash -c './Lines.py; read x'
|
screen -S "Script_AIL" -X screen -t "Lines" bash -c './Lines.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
|
screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
|
||||||
|
|
|
@ -76,6 +76,11 @@ class Paste(object):
|
||||||
port=cfg.getint("Redis_Data_Merging", "port"),
|
port=cfg.getint("Redis_Data_Merging", "port"),
|
||||||
db=cfg.getint("Redis_Data_Merging", "db"),
|
db=cfg.getint("Redis_Data_Merging", "db"),
|
||||||
decode_responses=True)
|
decode_responses=True)
|
||||||
|
self.store_duplicate = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Metadata", "host"),
|
||||||
|
port=cfg.getint("ARDB_Metadata", "port"),
|
||||||
|
db=cfg.getint("ARDB_Metadata", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
self.p_path = p_path
|
self.p_path = p_path
|
||||||
self.p_name = os.path.basename(self.p_path)
|
self.p_name = os.path.basename(self.p_path)
|
||||||
|
@ -272,9 +277,9 @@ class Paste(object):
|
||||||
return False, var
|
return False, var
|
||||||
|
|
||||||
def _get_p_duplicate(self):
|
def _get_p_duplicate(self):
|
||||||
self.p_duplicate = self.store.hget(self.p_path, "p_duplicate")
|
self.p_duplicate = self.store_duplicate.smembers('dup:'+self.p_path)
|
||||||
if self.p_duplicate is not None:
|
if self.p_duplicate is not None:
|
||||||
return self.p_duplicate
|
return list(self.p_duplicate)
|
||||||
else:
|
else:
|
||||||
return '[]'
|
return '[]'
|
||||||
|
|
||||||
|
@ -323,27 +328,20 @@ class Paste(object):
|
||||||
else:
|
else:
|
||||||
self.store.hset(self.p_path, attr_name, json.dumps(value))
|
self.store.hset(self.p_path, attr_name, json.dumps(value))
|
||||||
|
|
||||||
def save_others_pastes_attribute_duplicate(self, attr_name, list_value):
|
def save_attribute_duplicate(self, value):
|
||||||
|
"""
|
||||||
|
Save an attribute as a field
|
||||||
|
"""
|
||||||
|
for tuple in value:
|
||||||
|
self.store_duplicate.sadd('dup:'+self.p_path, tuple)
|
||||||
|
|
||||||
|
def save_others_pastes_attribute_duplicate(self, list_value):
|
||||||
"""
|
"""
|
||||||
Save a new duplicate on others pastes
|
Save a new duplicate on others pastes
|
||||||
"""
|
"""
|
||||||
for hash_type, path, percent, date in list_value:
|
for hash_type, path, percent, date in list_value:
|
||||||
#get json
|
|
||||||
json_duplicate = self.store.hget(path, attr_name)
|
|
||||||
#json save on redis
|
|
||||||
if json_duplicate is not None:
|
|
||||||
list_duplicate = (json.loads(json_duplicate))
|
|
||||||
# avoid duplicate, a paste can be send by multiples modules
|
|
||||||
to_add = [hash_type, self.p_path, percent, date]
|
to_add = [hash_type, self.p_path, percent, date]
|
||||||
if to_add not in list_duplicate:
|
self.store_duplicate.sadd('dup:'+path,to_add)
|
||||||
list_duplicate.append(to_add)
|
|
||||||
self.store.hset(path, attr_name, json.dumps(list_duplicate))
|
|
||||||
|
|
||||||
else:
|
|
||||||
# create the new list
|
|
||||||
list_duplicate = [[hash_type, self.p_path, percent, date]]
|
|
||||||
self.store.hset(path, attr_name, json.dumps(list_duplicate))
|
|
||||||
|
|
||||||
|
|
||||||
def _get_from_redis(self, r_serv):
|
def _get_from_redis(self, r_serv):
|
||||||
ans = {}
|
ans = {}
|
||||||
|
|
|
@ -152,6 +152,11 @@ host = localhost
|
||||||
port = 6382
|
port = 6382
|
||||||
db = 6
|
db = 6
|
||||||
|
|
||||||
|
[ARDB_Metadata]
|
||||||
|
host = localhost
|
||||||
|
port = 6382
|
||||||
|
db = 7
|
||||||
|
|
||||||
[Url]
|
[Url]
|
||||||
cc_critical = DE
|
cc_critical = DE
|
||||||
|
|
||||||
|
|
|
@ -65,6 +65,18 @@ r_serv_pasteName = redis.StrictRedis(
|
||||||
db=cfg.getint("Redis_Paste_Name", "db"),
|
db=cfg.getint("Redis_Paste_Name", "db"),
|
||||||
decode_responses=True)
|
decode_responses=True)
|
||||||
|
|
||||||
|
r_serv_tags = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Tags", "host"),
|
||||||
|
port=cfg.getint("ARDB_Tags", "port"),
|
||||||
|
db=cfg.getint("ARDB_Tags", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
r_serv_metadata = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Metadata", "host"),
|
||||||
|
port=cfg.getint("ARDB_Metadata", "port"),
|
||||||
|
db=cfg.getint("ARDB_Metadata", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
# VARIABLES #
|
# VARIABLES #
|
||||||
max_preview_char = int(cfg.get("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip
|
max_preview_char = int(cfg.get("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip
|
||||||
max_preview_modal = int(cfg.get("Flask", "max_preview_modal")) # Maximum number of character to display in the modal
|
max_preview_modal = int(cfg.get("Flask", "max_preview_modal")) # Maximum number of character to display in the modal
|
||||||
|
|
|
@ -18,6 +18,7 @@ import Flask_config
|
||||||
app = Flask_config.app
|
app = Flask_config.app
|
||||||
cfg = Flask_config.cfg
|
cfg = Flask_config.cfg
|
||||||
r_serv_pasteName = Flask_config.r_serv_pasteName
|
r_serv_pasteName = Flask_config.r_serv_pasteName
|
||||||
|
r_serv_metadata = Flask_config.r_serv_metadata
|
||||||
max_preview_char = Flask_config.max_preview_char
|
max_preview_char = Flask_config.max_preview_char
|
||||||
max_preview_modal = Flask_config.max_preview_modal
|
max_preview_modal = Flask_config.max_preview_modal
|
||||||
DiffMaxLineLength = Flask_config.DiffMaxLineLength
|
DiffMaxLineLength = Flask_config.DiffMaxLineLength
|
||||||
|
@ -38,20 +39,22 @@ def showpaste(content_range):
|
||||||
p_mime = paste.p_mime
|
p_mime = paste.p_mime
|
||||||
p_lineinfo = paste.get_lines_info()
|
p_lineinfo = paste.get_lines_info()
|
||||||
p_content = paste.get_p_content()
|
p_content = paste.get_p_content()
|
||||||
p_duplicate_full_list = json.loads(paste._get_p_duplicate())
|
p_duplicate_str_full_list = paste._get_p_duplicate()
|
||||||
|
|
||||||
|
p_duplicate_full_list = []
|
||||||
p_duplicate_list = []
|
p_duplicate_list = []
|
||||||
p_simil_list = []
|
p_simil_list = []
|
||||||
p_date_list = []
|
p_date_list = []
|
||||||
p_hashtype_list = []
|
p_hashtype_list = []
|
||||||
|
|
||||||
|
|
||||||
for dup_list in p_duplicate_full_list:
|
for dup_list in p_duplicate_str_full_list:
|
||||||
|
dup_list = dup_list[1:-1].replace('\'', '').replace(' ', '').split(',')
|
||||||
if dup_list[0] == "tlsh":
|
if dup_list[0] == "tlsh":
|
||||||
dup_list[2] = 100 - int(dup_list[2])
|
dup_list[2] = 100 - int(dup_list[2])
|
||||||
else:
|
else:
|
||||||
print('dup_list')
|
|
||||||
print(dup_list)
|
|
||||||
dup_list[2] = int(dup_list[2])
|
dup_list[2] = int(dup_list[2])
|
||||||
|
p_duplicate_full_list.append(dup_list)
|
||||||
|
|
||||||
#p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True)
|
#p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True)
|
||||||
|
|
||||||
|
@ -69,8 +72,8 @@ def showpaste(content_range):
|
||||||
comp_vals.append(p_duplicate_full_list[i][2])
|
comp_vals.append(p_duplicate_full_list[i][2])
|
||||||
dup_list_removed.append(i)
|
dup_list_removed.append(i)
|
||||||
|
|
||||||
hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types)
|
#hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types)
|
||||||
comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals)
|
#comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals)
|
||||||
|
|
||||||
if len(p_duplicate_full_list[dup_list_index]) > 3:
|
if len(p_duplicate_full_list[dup_list_index]) > 3:
|
||||||
try:
|
try:
|
||||||
|
@ -80,7 +83,7 @@ def showpaste(content_range):
|
||||||
date_paste = str(p_duplicate_full_list[dup_list_index][3])
|
date_paste = str(p_duplicate_full_list[dup_list_index][3])
|
||||||
else:
|
else:
|
||||||
date_paste = "No date available"
|
date_paste = "No date available"
|
||||||
new_dup_list.append([hash_types.replace("'", ""), p_duplicate_full_list[dup_list_index][1], comp_vals, date_paste])
|
new_dup_list.append([hash_types, p_duplicate_full_list[dup_list_index][1], comp_vals, date_paste])
|
||||||
|
|
||||||
# Create the list to pass to the webpage
|
# Create the list to pass to the webpage
|
||||||
for dup_list in new_dup_list:
|
for dup_list in new_dup_list:
|
||||||
|
|
Loading…
Reference in a new issue