This commit is contained in:
Raphaël Vinot 2016-07-20 14:12:18 +02:00
parent 34e23998b1
commit 62eef44ca8

View file

@ -6,22 +6,21 @@
import time import time
import datetime import datetime
import re
import redis import redis
import os import os
from packages import lib_words from packages import lib_words
from packages.Date import Date from packages.Date import Date
from pubsublogger import publisher from pubsublogger import publisher
from packages import Paste
from Helper import Process from Helper import Process
from pyfaup.faup import Faup from pyfaup.faup import Faup
# Config Var # Config Var
threshold_need_to_look = 50 threshold_need_to_look = 50
range_to_look = 10 range_to_look = 10
threshold_to_plot = 1 #500% threshold_to_plot = 1 # 500%
to_plot = set() to_plot = set()
clean_frequency = 10 #minutes clean_frequency = 10 # minutes
def analyse(server, field_name): def analyse(server, field_name):
field = url_parsed[field_name] field = url_parsed[field_name]
@ -32,6 +31,7 @@ def analyse(server, field_name):
else: else:
server.hset(field, date, 1) server.hset(field, date, 1)
def analyse_and_progression(server, field_name): def analyse_and_progression(server, field_name):
field = url_parsed[field_name] field = url_parsed[field_name]
if field is not None: if field is not None:
@ -39,43 +39,34 @@ def analyse_and_progression(server, field_name):
if prev_score is not None: if prev_score is not None:
print field + ' prev_score:' + prev_score print field + ' prev_score:' + prev_score
server.hset(field, date, int(prev_score) + 1) server.hset(field, date, int(prev_score) + 1)
if int(prev_score) + 1 > threshold_need_to_look: #threshold for false possitive if int(prev_score) + 1 > threshold_need_to_look: # threshold for false possitive
if(check_for_progression(server, field, date)): if(check_for_progression(server, field, date)):
to_plot.add(field) to_plot.add(field)
else: else:
server.hset(field, date, 1) server.hset(field, date, 1)
def check_for_progression(server, field, date): def check_for_progression(server, field, date):
previous_data = set() previous_data = set()
tot_sum = 0 tot_sum = 0
for i in range(0, range_to_look): for i in range(0, range_to_look):
curr_value = server.hget(field, Date(date).substract_day(i)) curr_value = server.hget(field, Date(date).substract_day(i))
if curr_value is None: #no further data if curr_value is None: # no further data
break break
else: else:
curr_value = int(curr_value) curr_value = int(curr_value)
previous_data.add(curr_value) previous_data.add(curr_value)
tot_sum += curr_value tot_sum += curr_value
if i == 0: if i == 0:
today_val = curr_value today_val = curr_value
print 'totsum=' + str(tot_sum)
print 'totsum='+str(tot_sum) print 'div=' + str(tot_sum / today_val)
print 'div='+str(tot_sum/today_val) if tot_sum / today_val >= threshold_to_plot:
if tot_sum/today_val >= threshold_to_plot:
return True return True
else: else:
return False return False
def clean_to_plot():
temp_to_plot = set()
curr_date = datetime.date.today()
date = Date(str(curr_date.year)+str(curr_date.month)+str(curr_date.day))
for elem in to_plot:
if(check_for_progression(field, date)):
temp_to_plot.add(elem)
to_plot = temp_to_plot
if __name__ == '__main__': if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
@ -98,7 +89,7 @@ if __name__ == '__main__':
host=p.config.get("Redis_Level_DB", "host"), host=p.config.get("Redis_Level_DB", "host"),
port=p.config.get("Redis_Level_DB", "port"), port=p.config.get("Redis_Level_DB", "port"),
db=p.config.get("Redis_Level_DB", "db")) db=p.config.get("Redis_Level_DB", "db"))
r_serv2 = redis.StrictRedis( r_serv2 = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_Domain", "host"), host=p.config.get("Redis_Level_DB_Domain", "host"),
port=p.config.get("Redis_Level_DB_Domain", "port"), port=p.config.get("Redis_Level_DB_Domain", "port"),
@ -106,18 +97,17 @@ if __name__ == '__main__':
# FILE CURVE SECTION # # FILE CURVE SECTION #
csv_path_proto = os.path.join(os.environ['AIL_HOME'], csv_path_proto = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "protocolstrending_csv")) p.config.get("Directories", "protocolstrending_csv"))
protocolsfile_path = os.path.join(os.environ['AIL_HOME'], protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "protocolsfile")) p.config.get("Directories", "protocolsfile"))
csv_path_tld = os.path.join(os.environ['AIL_HOME'], csv_path_tld = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "tldstrending_csv")) p.config.get("Directories", "tldstrending_csv"))
tldsfile_path = os.path.join(os.environ['AIL_HOME'], tldsfile_path = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "tldsfile")) p.config.get("Directories", "tldsfile"))
csv_path_domain = os.path.join(os.environ['AIL_HOME'], csv_path_domain = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "domainstrending_csv")) p.config.get("Directories", "domainstrending_csv"))
faup = Faup() faup = Faup()
generate_new_graph = False generate_new_graph = False
@ -125,7 +115,7 @@ if __name__ == '__main__':
while True: while True:
# Get one message from the input queue # Get one message from the input queue
message = p.get_from_set() message = p.get_from_set()
if message is None: if message is None:
if generate_new_graph: if generate_new_graph:
generate_new_graph = False generate_new_graph = False
@ -143,8 +133,7 @@ if __name__ == '__main__':
month) month)
lib_words.create_curve_with_list(r_serv2, csv_path_domain, lib_words.create_curve_with_list(r_serv2, csv_path_domain,
to_plot, year, to_plot, year, month)
month)
print 'end building' print 'end building'
publisher.debug("{} queue is empty, waiting".format(config_section)) publisher.debug("{} queue is empty, waiting".format(config_section))
@ -152,13 +141,13 @@ if __name__ == '__main__':
time.sleep(5) time.sleep(5)
continue continue
else: else:
generate_new_graph = True generate_new_graph = True
# Do something with the message from the queue # Do something with the message from the queue
url, date = message.split() url, date = message.split()
faup.decode(url) faup.decode(url)
url_parsed = faup.get() url_parsed = faup.get()
analyse(r_serv1, 'scheme') #Scheme analysis analyse(r_serv1, 'scheme') # Scheme analysis
analyse(r_serv1, 'tld') #Tld analysis analyse(r_serv1, 'tld') # Tld analysis
analyse_and_progression(r_serv2, 'domain') #Domain analysis analyse_and_progression(r_serv2, 'domain') # Domain analysis