Fixed variable bug in moduleStats and added draft of credential stats

This commit is contained in:
Mokaddem 2016-07-26 10:45:02 +02:00
parent 06be1f129a
commit cc1b49baaf
3 changed files with 29 additions and 33 deletions

View file

@ -1,10 +1,12 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
import time
import sys
from packages import Paste
from pubsublogger import publisher
from Helper import Process
import re
from pyfaup.faup import Faup
if __name__ == "__main__":
publisher.port = 6380
@ -38,16 +40,12 @@ if __name__ == "__main__":
if len(creds) == 0:
continue
sites_for_stats = []
for elem in re.findall(regex_site_for_stats, content):
sites.append(elem[1:-1])
sites = set(re.findall(regex_web, content))
sites_for_stats = set(sites_for_stats)
sites= re.findall(regex_web, content) #Use to count occurences
sites_set = set(re.findall(regex_web, content))
message = 'Checked {} credentials found.'.format(len(creds))
if sites:
message += ' Related websites: {}'.format(', '.join(sites))
if sites_set:
message += ' Related websites: {}'.format(', '.join(sites_set))
to_print = 'Credential;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message)
@ -59,21 +57,21 @@ if __name__ == "__main__":
#Send to duplicate
p.populate_set_out(filepath, 'Duplicate')
#Put in form, then send to moduleStats
#Put in form, count occurences, then send to moduleStats
creds_sites = {}
for cred in creds:
user_and_site, password = cred.split(':')
site = user_web.split('@')[1]
if site in sites: # if the parsing went fine
if site in creds_sites.keys(): # check if the key already exists
creds_sites[site] = creds_sites[web]+1
faup = Faup()
for url in sites:
faup.decode(url)
domain = faup.get()['domain']
if domain in creds_sites.keys():
creds_sites[domain] += 1
else:
creds_sites[site] = 1
for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats
print 'Credential;{};{};{}'.format(num, site, paste.p_date)
#p.populate_set_out('Credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats')
creds_sites[domain] = 1
if sites:
print("=======> Probably on : {}".format(', '.join(sites)))
for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats
p.populate_set_out('credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats')
if sites_set:
print("=======> Probably on : {}".format(', '.join(sites_set)))
else:
publisher.info(to_print)

View file

@ -32,7 +32,6 @@ def compute_most_posted(server, message, num_day):
module, num, keyword, paste_date = message.split(';')
redis_progression_name_set = 'top_'+ module +'_set'
# Add/Update in Redis
prev_score = server.hget(paste_date, module+'-'+keyword)
if prev_score is not None:
@ -55,7 +54,9 @@ def compute_most_posted(server, message, num_day):
#Check value for all members
member_set = []
for keyw in server.smembers(redis_progression_name_set):
member_set.append((keyw, int(server.hget(paste_date, module+'-'+keyw))))
keyw_value = server.hget(paste_date, module+'-'+keyw)
if keyw_value is not None:
member_set.append((keyw, int(keyw_value)))
member_set.sort(key=lambda tup: tup[1])
if member_set[0][1] < keyword_total_sum:
#remove min from set and add the new one
@ -111,10 +112,8 @@ def compute_provider_size(server, path, num_day_to_look):
# for date in date_range:
curr_size = server.hget(provider+'_size', paste_date)
curr_num = server.hget(provider+'_num', paste_date)
print curr_size
if (curr_size is not None) and (curr_num is not None):
curr_avg += float(curr_size) / float(curr_num)
print str(curr_avg)
member_set.append((provider, curr_avg))
member_set.sort(key=lambda tup: tup[1])
if member_set[0][1] < new_avg:
@ -156,12 +155,11 @@ if __name__ == '__main__':
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
print 'sleeping'
time.sleep(2)
time.sleep(20)
continue
else:
# Do something with the message from the queue
print message.split(';')
if len(message.split(';')) > 1:
compute_most_posted(r_serv_trend, message, num_day_to_look)
else:

View file

@ -81,11 +81,11 @@
};
moduleCharts = "size" == module_name ? "sizeCharts" : "moduleCharts";
var moduleCharts = "size" == module_name ? "sizeCharts" : "moduleCharts";
// Graph1
$.getJSON($SCRIPT_ROOT+"/_"+moduleCharts+"?moduleName="+module_name+"&num_day="+chart_1_num_day,
function(data) {
temp_data_pie = [];
var temp_data_pie = [];
for(i=0; i<data.length; i++){
temp_data_pie.push({label: data[i][0], data: data[i][1]});
}
@ -96,14 +96,14 @@
if (item == null)
return;
var clicked_label = item.series.label;
update_bar_chart("#flot-bar-chart-"+module_name, clicked_label, item.series.color, chart_1_num_day, "%m/%d");
update_bar_chart(moduleCharts, "#flot-bar-chart-"+module_name, clicked_label, item.series.color, chart_1_num_day, "%m/%d");
});
}, 500);
});
// flot bar char
function update_bar_chart(chartID, involved_item, serie_color, num_day, timeformat){
function update_bar_chart(chartUrl, chartID, involved_item, serie_color, num_day, timeformat){
var barOptions = {
series: {
bars: { show: true, barWidth: 82800000 }
@ -120,7 +120,7 @@
tooltipOpts: { content: "x: %x, y: %y" }
};
$.getJSON($SCRIPT_ROOT+"/_"+moduleCharts+"?keywordName="+involved_item+"&moduleName="+module_name+"&bar=true"+"&days="+num_day,
$.getJSON($SCRIPT_ROOT+"/_"+chartUrl+"?keywordName="+involved_item+"&moduleName="+module_name+"&bar=true"+"&days="+num_day,
function(data) {
var temp_data_bar = []
for(i=0; i<data.length; i++){