diff --git a/bin/Credential.py b/bin/Credential.py index d81c9ff6..84fe49ee 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -17,6 +17,7 @@ if __name__ == "__main__": regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" + regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" while True: message = p.get_from_set() if message is None: @@ -37,7 +38,12 @@ if __name__ == "__main__": if len(creds) == 0: continue + sites_for_stats = [] + for elem in re.findall(regex_site_for_stats, content): + sites.append(elem[1:-1]) + sites = set(re.findall(regex_web, content)) + sites_for_stats = set(sites_for_stats) message = 'Checked {} credentials found.'.format(len(creds)) if sites: @@ -51,7 +57,22 @@ if __name__ == "__main__": print("========> Found more than 10 credentials in this file : {}".format(filepath)) publisher.warning(to_print) #Send to duplicate - p.populate_set_out(filepath) + p.populate_set_out(filepath, 'Duplicate') + + #Put in form, then send to moduleStats + creds_sites = {} + for cred in creds: + user_and_site, password = cred.split(':') + site = user_web.split('@')[1] + if site in sites: # if the parsing went fine + if site in creds_sites.keys(): # check if the key already exists + creds_sites[site] = creds_sites[web]+1 + else: + creds_sites[site] = 1 + for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats + print 'Credential;{};{};{}'.format(num, site, paste.p_date) + #p.populate_set_out('Credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats') + if sites: print("=======> Probably on : {}".format(', '.join(sites))) else: diff --git a/bin/CreditCard.py b/bin/CreditCard.py index 18703f4e..de90f4d4 100755 --- a/bin/CreditCard.py +++ b/bin/CreditCard.py @@ -66,7 +66,7 @@ if __name__ == "__main__": publisher.warning('{}Checked {} valid number(s)'.format( to_print, len(creditcard_set))) #Send to duplicate - p.populate_set_out(filename) + p.populate_set_out(filepath, 'Redis_Duplicate') else: publisher.info('{}CreditCard related'.format(to_print)) else: diff --git a/bin/Global.py b/bin/Global.py index 8b6e482f..9cacbc88 100755 --- a/bin/Global.py +++ b/bin/Global.py @@ -52,7 +52,7 @@ if __name__ == '__main__': else: # TODO Store the name of the empty paste inside a Redis-list. print "Empty Paste: not processed" - publisher.debug("Empty Paste: {0} not processed".format(paste)) + publisher.debug("Empty Paste: {0} not processed".format(message)) continue else: print "Empty Queues: Waiting..." diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index d6706e1e..5afc3c2a 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -140,6 +140,8 @@ function launching_scripts { screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x' sleep 0.1 screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' } #If no params, display the help diff --git a/bin/Mail.py b/bin/Mail.py index 964deb19..d3968442 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -61,9 +61,15 @@ if __name__ == "__main__": if MX_values[0] > is_critical: publisher.warning(to_print) #Send to duplicate - p.populate_set_out(filename) + p.populate_set_out(filename, 'Duplicate') + else: publisher.info(to_print) + #Send to ModuleStats + for mail in MX_values[1]: + print 'mail;{};{};{}'.format(1, mail, PST.p_date) + p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats') + prec_filename = filename else: diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py new file mode 100755 index 00000000..705d84e7 --- /dev/null +++ b/bin/ModuleStats.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* +""" + Template for new modules +""" + +import time +import datetime +import redis +import os +from packages import lib_words +from packages.Date import Date +from pubsublogger import publisher +from Helper import Process +from packages import Paste + +# Config Var +max_set_cardinality = 7 +num_day_to_look = 5 + +def get_date_range(num_day): + curr_date = datetime.date.today() + date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) + date_list = [] + + for i in range(0, num_day+1): + date_list.append(date.substract_day(i)) + return date_list + + +def compute_most_posted(server, message, num_day): + module, num, keyword, paste_date = message.split(';') + + redis_progression_name_set = 'top_'+ module +'_set' + + # Add/Update in Redis + prev_score = server.hget(paste_date, module+'-'+keyword) + if prev_score is not None: + server.hset(paste_date, module+'-'+keyword, int(prev_score) + int(num)) + else: + server.hset(paste_date, module+'-'+keyword, int(num)) + + # Compute Most Posted + date_range = get_date_range(num_day) + # check if this keyword is eligible for progression + keyword_total_sum = 0 + for date in date_range: + curr_value = server.hget(date, module+'-'+keyword) + keyword_total_sum += int(curr_value) if curr_value is not None else 0 + + if (server.scard(redis_progression_name_set) < max_set_cardinality): + server.sadd(redis_progression_name_set, keyword) + + else: #not in the set + #Check value for all members + member_set = [] + for keyw in server.smembers(redis_progression_name_set): + member_set.append((keyw, int(server.hget(paste_date, module+'-'+keyw)))) + member_set.sort(key=lambda tup: tup[1]) + if member_set[0][1] < keyword_total_sum: + #remove min from set and add the new one + print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + server.srem(redis_progression_name_set, member_set[0][0]) + server.sadd(redis_progression_name_set, keyword) + + +def compute_provider_size(server, path, num_day_to_look): + + redis_progression_name_set = 'top_size_set' + paste = Paste.Paste(path) + + paste_size = paste._get_p_size() + paste_provider = paste.p_source + paste_date = paste._get_p_date() + new_avg = paste_size + + # Add/Update in Redis + prev_num_paste = server.hget(paste_provider+'_num', paste_date) + if prev_num_paste is not None: + server.hset(paste_provider+'_num', paste_date, int(prev_num_paste)+1) + prev_sum_size = server.hget(paste_provider+'_size', paste_date) + + if prev_sum_size is not None: + server.hset(paste_provider+'_size', paste_date, paste_size) + new_avg = (float(prev_sum_size)+paste_size) / (int(prev_num_paste)+1) + else: + server.hset(paste_provider+'_size', paste_date, paste_size) + + else: + server.hset(paste_provider+'_num', paste_date, 1) + + # Compute Most Posted + #date_range = get_date_range(num_day_to_look) + # check if this keyword is eligible for progression + provider_total_sum = 0 + #for date in date_range: + # curr_value = server.hget(paste_provider+'_size', date) + # provider_total_sum += int(curr_value) if curr_value is not None else 0 + + #if paste_provider in server.smembers(redis_progression_name_set): # if it is already in the set + # return + + if (server.scard(redis_progression_name_set) < max_set_cardinality): + server.sadd(redis_progression_name_set, paste_provider) + + else: #not in the set + #Check value for all members + member_set = [] + for provider in server.smembers(redis_progression_name_set): + curr_avg = 0.0 + # for date in date_range: + curr_size = server.hget(provider+'_size', paste_date) + curr_num = server.hget(provider+'_num', paste_date) + print curr_size + if (curr_size is not None) and (curr_num is not None): + curr_avg += float(curr_size) / float(curr_num) + print str(curr_avg) + member_set.append((provider, curr_avg)) + member_set.sort(key=lambda tup: tup[1]) + if member_set[0][1] < new_avg: + #remove min from set and add the new one + print 'Adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + server.srem(redis_progression_name_set, member_set[0][0]) + server.srem(redis_progression_name_set, member_set[0][0]) + server.sadd(redis_progression_name_set, paste_provider) + + + +if __name__ == '__main__': + # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) + # Port of the redis instance used by pubsublogger + publisher.port = 6380 + # Script is the default channel used for the modules. + publisher.channel = 'Script' + + # Section name in bin/packages/modules.cfg + config_section = 'ModuleStats' + + # Setup the I/O queues + p = Process(config_section) + + # Sent to the logging a description of the module + publisher.info("Makes statistics about valid URL") + + # REDIS # + r_serv_trend = redis.StrictRedis( + host=p.config.get("Redis_Level_DB_Trending", "host"), + port=p.config.get("Redis_Level_DB_Trending", "port"), + db=p.config.get("Redis_Level_DB_Trending", "db")) + + # Endless loop getting messages from the input queue + while True: + # Get one message from the input queue + message = p.get_from_set() + + if message is None: + publisher.debug("{} queue is empty, waiting".format(config_section)) + print 'sleeping' + time.sleep(2) + continue + + else: + # Do something with the message from the queue + print message.split(';') + if len(message.split(';')) > 1: + compute_most_posted(r_serv_trend, message, num_day_to_look) + else: + compute_provider_size(r_serv_trend, message, num_day_to_look) diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index 5f087427..5f8a7f31 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -1,6 +1,6 @@ [Global] subscribe = ZMQ_Global -publish = Redis_Global +publish = Redis_Global,Redis_ModuleStats [Duplicates] subscribe = Redis_Duplicate @@ -30,12 +30,12 @@ subscribe = Redis_Global publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve [CreditCards] -subscribe = Redis_CreditCards -publish = Redis_Duplicate +subscribe = Redis_CreditCard +publish = Redis_Duplicate,Redis_ModuleStats [Mail] subscribe = Redis_Mail -publish = Redis_Duplicate +publish = Redis_Duplicate,Redis_ModuleStats [Onion] subscribe = Redis_Onion @@ -52,12 +52,15 @@ publish = Redis_Url,ZMQ_Url [WebStats] subscribe = Redis_Url +[ModuleStats] +subscribe = Redis_ModuleStats + [Release] subscribe = Redis_Global [Credential] subscribe = Redis_Credential -publish = Redis_Duplicate +publish = Redis_Duplicate,Redis_ModuleStats [Cve] subscribe = Redis_Cve diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 4bdee047..00ceaf14 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -163,6 +163,72 @@ def progressionCharts(): member_set.append(("No relevant data", int(100))) return jsonify(member_set) +@app.route("/_moduleCharts", methods=['GET']) +def modulesCharts(): + #To be used later + keyword_name = request.args.get('keywordName') + module_name = request.args.get('moduleName') + bar_requested = True if request.args.get('bar') == "true" else False + + if (bar_requested): + num_day = int(request.args.get('days')) + bar_values = [] + + date_range = get_date_range(num_day) + # Retreive all data from the last num_day + for date in date_range: + curr_value = r_serv_charts.hget(date, module_name+'-'+keyword_name) + bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) + return jsonify(bar_values) + + else: + redis_progression_name_set = 'top_'+ module_name +'_set' + + member_set = [] + for keyw in r_serv_charts.smembers(redis_progression_name_set): + redis_progression_name = module_name+'-'+keyw + keyw_value = r_serv_charts.hget(get_date_range(0)[0] ,redis_progression_name) + keyw_value = keyw_value if keyw_value is not None else 0 + member_set.append((keyw, int(keyw_value))) + member_set.sort(key=lambda tup: tup[1], reverse=True) + if len(member_set) == 0: + member_set.append(("No relevant data", int(100))) + return jsonify(member_set) + + +@app.route("/_sizeCharts", methods=['GET']) +def sizeCharts(): + #To be used later + keyword_name = request.args.get('keywordName') + module_name = request.args.get('moduleName') + bar_requested = True if request.args.get('bar') == "true" else False + + if (bar_requested): + num_day = int(request.args.get('days')) + bar_values = [] + + date_range = get_date_range(num_day) + # Retreive all data from the last num_day + for date in date_range: + curr_value = r_serv_charts.hget(keyword_name+'_size', date) + bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], float(curr_value if curr_value is not None else 0)]) + return jsonify(bar_values) + + else: + redis_progression_name_set = 'top_size_set' + + member_set = [] + for keyw in r_serv_charts.smembers(redis_progression_name_set): + redis_progression_name = keyw+'_size' + keyw_value = r_serv_charts.hget(redis_progression_name, get_date_range(0)[0]) + keyw_value = keyw_value if keyw_value is not None else 0 + member_set.append((keyw, float(keyw_value))) + member_set.sort(key=lambda tup: tup[1], reverse=True) + if len(member_set) == 0: + member_set.append(("No relevant data", float(100))) + return jsonify(member_set) + + @app.route("/search", methods=['POST']) def search(): @@ -227,6 +293,11 @@ def trending(): return render_template("Trending.html", default_display = default_display) +@app.route("/moduletrending/") +def moduletrending(): + return render_template("Moduletrending.html") + + @app.route("/showsavedpaste/") #completely shows the paste in a new tab def showsavedpaste(): return showpaste(0) diff --git a/var/www/templates/Moduletrending.html b/var/www/templates/Moduletrending.html new file mode 100644 index 00000000..1240f5ef --- /dev/null +++ b/var/www/templates/Moduletrending.html @@ -0,0 +1,145 @@ + + + +
+ + + +