fix track term and avoid duplicate mail address

This commit is contained in:
Terrtia 2018-04-24 16:44:37 +02:00
parent a7bd01ed9a
commit 8738b7cf75
3 changed files with 20 additions and 4 deletions

View file

@ -49,7 +49,7 @@ top_termFreq_setName_month = ["TopTermFreq_set_month", 31]
top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month]
def check_if_tracked_term(term, path):
if term in server_term.smembers(TrackedTermsSet_Name):
if term.encode('utf8') in server_term.smembers(TrackedTermsSet_Name):
#add_paste to tracked_word_set
set_name = "tracked_" + term
server_term.sadd(set_name, path)
@ -132,7 +132,7 @@ if __name__ == "__main__":
curr_word_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), low_word, int(1)))
# Add in set only if term is not in the blacklist
if low_word not in server_term.smembers(BlackListTermsSet_Name):
if low_word.encode('utf8') not in server_term.smembers(BlackListTermsSet_Name):
#consider the num of occurence of this term
server_term.zincrby(curr_set, low_word, float(score))
#1 term per paste

View file

@ -28,6 +28,12 @@ def checking_MX_record(r_serv, adress_set):
names: on example@gmail.com it will try to resolve gmail.com
"""
print('mails:')
print(adress_set)
#remove duplicate
adress_set = list(set(adress_set))
score = 0
num = len(adress_set)
WalidMX = set([])

View file

@ -158,6 +158,7 @@ def terms_management():
trackReg_list_num_of_paste = []
for tracked_regex in r_serv_term.smembers(TrackedRegexSet_Name):
tracked_regex = tracked_regex.decode('utf8')
print(tracked_regex)
notificationEMailTermMapping[tracked_regex] = "\n".join( (r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_regex)).decode('utf8') )
@ -211,6 +212,8 @@ def terms_management():
track_list_num_of_paste = []
for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name):
tracked_term = tracked_term.decode('utf8')
print('tracked_term : .')
print(tracked_term)
#print(TrackedTermsNotificationEmailsPrefix_Name)
print(r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_term))
@ -226,7 +229,11 @@ def terms_management():
term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term)
set_paste_name = "tracked_" + tracked_term
print('set_paste_name : .')
print(set_paste_name)
track_list_num_of_paste.append( r_serv_term.scard(set_paste_name) )
print('track_list_num_of_paste : .')
print(track_list_num_of_paste)
term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded"
value_range.append(term_date)
track_list_values.append(value_range)
@ -252,6 +259,8 @@ def terms_management():
@terms.route("/terms_management_query_paste/")
def terms_management_query_paste():
term = request.args.get('term')
print('term :')
print(term)
paste_info = []
# check if regex or not
@ -263,10 +272,10 @@ def terms_management_query_paste():
track_list_path = r_serv_term.smembers(set_paste_name)
else:
set_paste_name = "tracked_" + term
print(r_serv_term.smembers(set_paste_name))
track_list_path = r_serv_term.smembers(set_paste_name)
for path in track_list_path:
path = path.decode('utf8')
paste = Paste.Paste(path)
p_date = str(paste._get_p_date())
p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4]
@ -523,6 +532,7 @@ def credentials_management_query_paste():
paste_info = []
for pathNum in allPath:
path = r_serv_cred.hget(REDIS_KEY_ALL_PATH_SET_REV, pathNum)
path = path.decode('utf8')
paste = Paste.Paste(path)
p_date = str(paste._get_p_date())
p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4]
@ -531,7 +541,7 @@ def credentials_management_query_paste():
p_size = paste.p_size
p_mime = paste.p_mime
p_lineinfo = paste.get_lines_info()
p_content = paste.get_p_content().decode('utf-8', 'ignore')
p_content = paste.get_p_content()
if p_content != 0:
p_content = p_content[0:400]
paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content})