From cc1b49baafc7d3ec26c1ad60dc41dea301607512 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Tue, 26 Jul 2016 10:45:02 +0200 Subject: [PATCH] Fixed variable bug in moduleStats and added draft of credential stats --- bin/Credential.py | 42 +++++++++++++-------------- bin/ModuleStats.py | 10 +++---- var/www/templates/Moduletrending.html | 10 +++---- 3 files changed, 29 insertions(+), 33 deletions(-) diff --git a/bin/Credential.py b/bin/Credential.py index 84fe49ee..29a418c7 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -1,10 +1,12 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* import time +import sys from packages import Paste from pubsublogger import publisher from Helper import Process import re +from pyfaup.faup import Faup if __name__ == "__main__": publisher.port = 6380 @@ -38,16 +40,12 @@ if __name__ == "__main__": if len(creds) == 0: continue - sites_for_stats = [] - for elem in re.findall(regex_site_for_stats, content): - sites.append(elem[1:-1]) - - sites = set(re.findall(regex_web, content)) - sites_for_stats = set(sites_for_stats) + sites= re.findall(regex_web, content) #Use to count occurences + sites_set = set(re.findall(regex_web, content)) message = 'Checked {} credentials found.'.format(len(creds)) - if sites: - message += ' Related websites: {}'.format(', '.join(sites)) + if sites_set: + message += ' Related websites: {}'.format(', '.join(sites_set)) to_print = 'Credential;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message) @@ -59,21 +57,21 @@ if __name__ == "__main__": #Send to duplicate p.populate_set_out(filepath, 'Duplicate') - #Put in form, then send to moduleStats + #Put in form, count occurences, then send to moduleStats creds_sites = {} - for cred in creds: - user_and_site, password = cred.split(':') - site = user_web.split('@')[1] - if site in sites: # if the parsing went fine - if site in creds_sites.keys(): # check if the key already exists - creds_sites[site] = creds_sites[web]+1 - else: - creds_sites[site] = 1 - for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats - print 'Credential;{};{};{}'.format(num, site, paste.p_date) - #p.populate_set_out('Credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats') + faup = Faup() + for url in sites: + faup.decode(url) + domain = faup.get()['domain'] + if domain in creds_sites.keys(): + creds_sites[domain] += 1 + else: + creds_sites[domain] = 1 - if sites: - print("=======> Probably on : {}".format(', '.join(sites))) + for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats + p.populate_set_out('credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats') + + if sites_set: + print("=======> Probably on : {}".format(', '.join(sites_set))) else: publisher.info(to_print) diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index 705d84e7..26ca5ce8 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -32,7 +32,6 @@ def compute_most_posted(server, message, num_day): module, num, keyword, paste_date = message.split(';') redis_progression_name_set = 'top_'+ module +'_set' - # Add/Update in Redis prev_score = server.hget(paste_date, module+'-'+keyword) if prev_score is not None: @@ -55,7 +54,9 @@ def compute_most_posted(server, message, num_day): #Check value for all members member_set = [] for keyw in server.smembers(redis_progression_name_set): - member_set.append((keyw, int(server.hget(paste_date, module+'-'+keyw)))) + keyw_value = server.hget(paste_date, module+'-'+keyw) + if keyw_value is not None: + member_set.append((keyw, int(keyw_value))) member_set.sort(key=lambda tup: tup[1]) if member_set[0][1] < keyword_total_sum: #remove min from set and add the new one @@ -111,10 +112,8 @@ def compute_provider_size(server, path, num_day_to_look): # for date in date_range: curr_size = server.hget(provider+'_size', paste_date) curr_num = server.hget(provider+'_num', paste_date) - print curr_size if (curr_size is not None) and (curr_num is not None): curr_avg += float(curr_size) / float(curr_num) - print str(curr_avg) member_set.append((provider, curr_avg)) member_set.sort(key=lambda tup: tup[1]) if member_set[0][1] < new_avg: @@ -156,12 +155,11 @@ if __name__ == '__main__': if message is None: publisher.debug("{} queue is empty, waiting".format(config_section)) print 'sleeping' - time.sleep(2) + time.sleep(20) continue else: # Do something with the message from the queue - print message.split(';') if len(message.split(';')) > 1: compute_most_posted(r_serv_trend, message, num_day_to_look) else: diff --git a/var/www/templates/Moduletrending.html b/var/www/templates/Moduletrending.html index 1240f5ef..bd4218e3 100644 --- a/var/www/templates/Moduletrending.html +++ b/var/www/templates/Moduletrending.html @@ -81,11 +81,11 @@ }; - moduleCharts = "size" == module_name ? "sizeCharts" : "moduleCharts"; + var moduleCharts = "size" == module_name ? "sizeCharts" : "moduleCharts"; // Graph1 $.getJSON($SCRIPT_ROOT+"/_"+moduleCharts+"?moduleName="+module_name+"&num_day="+chart_1_num_day, function(data) { - temp_data_pie = []; + var temp_data_pie = []; for(i=0; i