diff --git a/bin/Duplicate_ssdeep_v2.py b/bin/Duplicate_ssdeep_v2.py index 67fc14e2..22498b90 100755 --- a/bin/Duplicate_ssdeep_v2.py +++ b/bin/Duplicate_ssdeep_v2.py @@ -11,6 +11,8 @@ Its input comes from other modules, namely: This one differ from v1 by only using redis and not json file stored on disk +Perform comparisions with ssdeep and tlsh + Requirements: ------------- @@ -130,18 +132,17 @@ if __name__ == "__main__": print '['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent) except Exception,e: print str(e) - # ssdeep hash not comparable #print 'hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash - #curr_dico_redis.srem('HASHS', dico_hash) # Add paste in DB after checking to prevent its analysis twice - # hash_i -> index_i AND index_i -> PST.PATH + # hash_type_i -> index_i AND index_i -> PST.PATH r_serv1.set(index, PST.p_path) r_serv1.sadd("INDEX", index) - # Adding the hash in Redis + # Adding hashes in Redis for hash_type, paste_hash in paste_hashes.iteritems(): r_serv1.set(paste_hash, index) r_serv1.sadd("HASHS_"+hash_type, paste_hash) + ##################### Similarity found ####################### # if there is data in this dictionnary diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index 90f7cae5..f03114f1 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -165,7 +165,7 @@ class Paste(object): """ Setting the hash of the paste as a kind of "uniq" identificator - :return: hash string (md5, sha1....) + :return: a dictionnary of hash string (md5, sha1....) :Example: PST._get_p_hash() diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 11836de4..242919a2 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -14,7 +14,7 @@ import Paste from Date import Date # CONFIG # -tlsh_to_percent = 1000.0 +tlsh_to_percent = 1000.0 #Use to display the estimated percentage instead of a raw value configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') if not os.path.exists(configfile): @@ -124,6 +124,7 @@ def showpaste(content_range): p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True) + # Combine multiple duplicate paste name and format for display new_dup_list = [] dup_list_removed = [] for dup_list_index in range(0, len(p_duplicate_full_list)): @@ -141,6 +142,7 @@ def showpaste(content_range): comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals) new_dup_list.append([hash_types.replace("'", ""), p_duplicate_full_list[dup_list_index][1], comp_vals]) + # Create the list to pass to the webpage for dup_list in new_dup_list: hash_type, path, simil_percent = dup_list p_duplicate_list.append(path)