Fixed variable bug in moduleStats and added draft of credential stats

This commit is contained in:
Mokaddem 2016-07-26 10:45:02 +02:00
parent 06be1f129a
commit cc1b49baaf
3 changed files with 29 additions and 33 deletions

View file

@ -1,10 +1,12 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import time import time
import sys
from packages import Paste from packages import Paste
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process
import re import re
from pyfaup.faup import Faup
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
@ -38,16 +40,12 @@ if __name__ == "__main__":
if len(creds) == 0: if len(creds) == 0:
continue continue
sites_for_stats = [] sites= re.findall(regex_web, content) #Use to count occurences
for elem in re.findall(regex_site_for_stats, content): sites_set = set(re.findall(regex_web, content))
sites.append(elem[1:-1])
sites = set(re.findall(regex_web, content))
sites_for_stats = set(sites_for_stats)
message = 'Checked {} credentials found.'.format(len(creds)) message = 'Checked {} credentials found.'.format(len(creds))
if sites: if sites_set:
message += ' Related websites: {}'.format(', '.join(sites)) message += ' Related websites: {}'.format(', '.join(sites_set))
to_print = 'Credential;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message) to_print = 'Credential;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message)
@ -59,21 +57,21 @@ if __name__ == "__main__":
#Send to duplicate #Send to duplicate
p.populate_set_out(filepath, 'Duplicate') p.populate_set_out(filepath, 'Duplicate')
#Put in form, then send to moduleStats #Put in form, count occurences, then send to moduleStats
creds_sites = {} creds_sites = {}
for cred in creds: faup = Faup()
user_and_site, password = cred.split(':') for url in sites:
site = user_web.split('@')[1] faup.decode(url)
if site in sites: # if the parsing went fine domain = faup.get()['domain']
if site in creds_sites.keys(): # check if the key already exists if domain in creds_sites.keys():
creds_sites[site] = creds_sites[web]+1 creds_sites[domain] += 1
else: else:
creds_sites[site] = 1 creds_sites[domain] = 1
for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats
print 'Credential;{};{};{}'.format(num, site, paste.p_date)
#p.populate_set_out('Credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats')
if sites: for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats
print("=======> Probably on : {}".format(', '.join(sites))) p.populate_set_out('credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats')
if sites_set:
print("=======> Probably on : {}".format(', '.join(sites_set)))
else: else:
publisher.info(to_print) publisher.info(to_print)

View file

@ -32,7 +32,6 @@ def compute_most_posted(server, message, num_day):
module, num, keyword, paste_date = message.split(';') module, num, keyword, paste_date = message.split(';')
redis_progression_name_set = 'top_'+ module +'_set' redis_progression_name_set = 'top_'+ module +'_set'
# Add/Update in Redis # Add/Update in Redis
prev_score = server.hget(paste_date, module+'-'+keyword) prev_score = server.hget(paste_date, module+'-'+keyword)
if prev_score is not None: if prev_score is not None:
@ -55,7 +54,9 @@ def compute_most_posted(server, message, num_day):
#Check value for all members #Check value for all members
member_set = [] member_set = []
for keyw in server.smembers(redis_progression_name_set): for keyw in server.smembers(redis_progression_name_set):
member_set.append((keyw, int(server.hget(paste_date, module+'-'+keyw)))) keyw_value = server.hget(paste_date, module+'-'+keyw)
if keyw_value is not None:
member_set.append((keyw, int(keyw_value)))
member_set.sort(key=lambda tup: tup[1]) member_set.sort(key=lambda tup: tup[1])
if member_set[0][1] < keyword_total_sum: if member_set[0][1] < keyword_total_sum:
#remove min from set and add the new one #remove min from set and add the new one
@ -111,10 +112,8 @@ def compute_provider_size(server, path, num_day_to_look):
# for date in date_range: # for date in date_range:
curr_size = server.hget(provider+'_size', paste_date) curr_size = server.hget(provider+'_size', paste_date)
curr_num = server.hget(provider+'_num', paste_date) curr_num = server.hget(provider+'_num', paste_date)
print curr_size
if (curr_size is not None) and (curr_num is not None): if (curr_size is not None) and (curr_num is not None):
curr_avg += float(curr_size) / float(curr_num) curr_avg += float(curr_size) / float(curr_num)
print str(curr_avg)
member_set.append((provider, curr_avg)) member_set.append((provider, curr_avg))
member_set.sort(key=lambda tup: tup[1]) member_set.sort(key=lambda tup: tup[1])
if member_set[0][1] < new_avg: if member_set[0][1] < new_avg:
@ -156,12 +155,11 @@ if __name__ == '__main__':
if message is None: if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section)) publisher.debug("{} queue is empty, waiting".format(config_section))
print 'sleeping' print 'sleeping'
time.sleep(2) time.sleep(20)
continue continue
else: else:
# Do something with the message from the queue # Do something with the message from the queue
print message.split(';')
if len(message.split(';')) > 1: if len(message.split(';')) > 1:
compute_most_posted(r_serv_trend, message, num_day_to_look) compute_most_posted(r_serv_trend, message, num_day_to_look)
else: else:

View file

@ -81,11 +81,11 @@
}; };
moduleCharts = "size" == module_name ? "sizeCharts" : "moduleCharts"; var moduleCharts = "size" == module_name ? "sizeCharts" : "moduleCharts";
// Graph1 // Graph1
$.getJSON($SCRIPT_ROOT+"/_"+moduleCharts+"?moduleName="+module_name+"&num_day="+chart_1_num_day, $.getJSON($SCRIPT_ROOT+"/_"+moduleCharts+"?moduleName="+module_name+"&num_day="+chart_1_num_day,
function(data) { function(data) {
temp_data_pie = []; var temp_data_pie = [];
for(i=0; i<data.length; i++){ for(i=0; i<data.length; i++){
temp_data_pie.push({label: data[i][0], data: data[i][1]}); temp_data_pie.push({label: data[i][0], data: data[i][1]});
} }
@ -96,14 +96,14 @@
if (item == null) if (item == null)
return; return;
var clicked_label = item.series.label; var clicked_label = item.series.label;
update_bar_chart("#flot-bar-chart-"+module_name, clicked_label, item.series.color, chart_1_num_day, "%m/%d"); update_bar_chart(moduleCharts, "#flot-bar-chart-"+module_name, clicked_label, item.series.color, chart_1_num_day, "%m/%d");
}); });
}, 500); }, 500);
}); });
// flot bar char // flot bar char
function update_bar_chart(chartID, involved_item, serie_color, num_day, timeformat){ function update_bar_chart(chartUrl, chartID, involved_item, serie_color, num_day, timeformat){
var barOptions = { var barOptions = {
series: { series: {
bars: { show: true, barWidth: 82800000 } bars: { show: true, barWidth: 82800000 }
@ -120,7 +120,7 @@
tooltipOpts: { content: "x: %x, y: %y" } tooltipOpts: { content: "x: %x, y: %y" }
}; };
$.getJSON($SCRIPT_ROOT+"/_"+moduleCharts+"?keywordName="+involved_item+"&moduleName="+module_name+"&bar=true"+"&days="+num_day, $.getJSON($SCRIPT_ROOT+"/_"+chartUrl+"?keywordName="+involved_item+"&moduleName="+module_name+"&bar=true"+"&days="+num_day,
function(data) { function(data) {
var temp_data_bar = [] var temp_data_bar = []
for(i=0; i<data.length; i++){ for(i=0; i<data.length; i++){