ail-framework/bin/packages/lib_search.py

104 lines
3.6 KiB
Python
Raw Normal View History

import redis
import string
def create_common_hash_file(r_serv, zmin, zmax, filename):
""" Create a "top100".txt file.
:param r_serv: -- connexion to redis database
:param zmin: -- (int) Offset of the top list
:param zmax: -- (int) Number of element wanted to be in the top list.
:param filename: -- the pathname to the created file.
This Function create a ranking list between zmin and zman of the most common
hashs.
Line are written as follow in the file:
hash:[md5hash]:[cardinality]:[line]
All hashes represent a full line which mean it can be one char or more...
"""
with open(filename, 'wb') as F:
for h, num in r_serv.zrevrangebyscore("hash", "+inf", "-inf", zmin, zmax, True):
F.write("hash:{0}:{1}:{2}\n".format(h, num, list(r_serv.smembers('L:'+h))))
def paste_searching(r_serv, filename, pastename, mincard, maxcard):
"""Search similar hashs from a given file.
:param r_serv: -- connexion to redis database
:param filename: -- the pathname to the created file.
:param pastename: -- the name of the paste used to search in redis database.
:param mincard: -- the minimum occurence needed of an hash to be taken in count.
:param maxcard: -- the maximum occurence needed of an hash to be taken in count.
This function return a text file which is a kind of synthesis about
where (in the others pastes) the hash of the given pastename have been found.
"""
P = set([pastename])
tmp_h = str()
tmp_set = set([])
with open(filename, 'wb') as F:
F.write("Paste: {0}\nOptions used:\nMincard: {1}\nMaxcard: {2}\n\nContaining Following Hash:\n".format(pastename,mincard,maxcard))
for h in r_serv.smembers("hash"):
if (r_serv.smembers(h).intersection(P) and r_serv.scard(h) >= mincard and r_serv.scard(h) <= maxcard):
F.write(h+'\n')
tmp_set = tmp_set.union(r_serv.smembers(h).union(r_serv.smembers(tmp_h)))
tmp_h = h
F.write("\nSimilar Files:\n")
for n, s in enumerate(tmp_set):
F.write(str(n) + ': ' + s + '\n')
def paste_searching2(r_serv, filename, pastename, mincard, maxcard):
"""Search similar hashs from a given file.
(On another kind of redis data structure)
:param r_serv: -- connexion to redis database
:param filename: -- the pathname to the created file.
:param pastename: -- the name of the paste used to search in redis database.
:param mincard: -- the minimum occurence needed of an hash to be taken in count.
:param maxcard: -- the maximum occurence needed of an hash to be taken in count.
This function return a text file which is a kind of synthesis about
where (in the others pastes) the hash of the given pastename have been found.
"""
P = set([pastename])
tmp_h = str()
tmp_set = set([])
with open(filename, 'wb') as F:
F.write("Paste: {0}\nOptions used:\nMincard: {1}\nMaxcard: {2}\n\n###Containing Following Hash:### ###Occur### ###### Corresponding Line ######\n".format(pastename,mincard,maxcard))
for h in r_serv.zrange("hash", 0, -1):
if (r_serv.smembers(h).intersection(P) and r_serv.scard(h) >= mincard and r_serv.scard(h) <= maxcard):
F.write(h + ' -- ' + str(r_serv.zscore("hash",h)) + ' -- ' + str(list(r_serv.smembers('L:' + h))) + '\n')
tmp_set = tmp_set.union(r_serv.smembers(h).union(r_serv.smembers(tmp_h)))
tmp_h = h
F.write("\nSimilar Files:\n")
for n, s in enumerate(tmp_set):
F.write(str(n) + ': ' + s + '\n')