2016-06-30 12:38:28 +00:00
|
|
|
#!/usr/bin/env python2
|
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
"""
|
|
|
|
Template for new modules
|
|
|
|
"""
|
|
|
|
|
|
|
|
import time
|
2016-07-01 14:59:08 +00:00
|
|
|
import datetime
|
2016-06-30 12:38:28 +00:00
|
|
|
import redis
|
|
|
|
import os
|
2016-07-01 14:59:08 +00:00
|
|
|
from packages import lib_words
|
2016-07-05 14:53:03 +00:00
|
|
|
from packages.Date import Date
|
2016-06-30 12:38:28 +00:00
|
|
|
from pubsublogger import publisher
|
|
|
|
from Helper import Process
|
2016-07-01 14:59:08 +00:00
|
|
|
from pyfaup.faup import Faup
|
2016-06-30 12:38:28 +00:00
|
|
|
|
2016-07-05 14:53:03 +00:00
|
|
|
# Config Var
|
2016-07-20 12:12:18 +00:00
|
|
|
threshold_need_to_look = 50
|
|
|
|
range_to_look = 10
|
|
|
|
threshold_to_plot = 1 # 500%
|
|
|
|
to_plot = set()
|
|
|
|
clean_frequency = 10 # minutes
|
|
|
|
|
2016-07-05 14:53:03 +00:00
|
|
|
|
|
|
|
def analyse(server, field_name):
|
|
|
|
field = url_parsed[field_name]
|
|
|
|
if field is not None:
|
|
|
|
prev_score = server.hget(field, date)
|
|
|
|
if prev_score is not None:
|
|
|
|
server.hset(field, date, int(prev_score) + 1)
|
|
|
|
else:
|
|
|
|
server.hset(field, date, 1)
|
|
|
|
|
2016-07-20 12:12:18 +00:00
|
|
|
|
2016-07-05 14:53:03 +00:00
|
|
|
def analyse_and_progression(server, field_name):
|
2016-07-01 14:59:08 +00:00
|
|
|
field = url_parsed[field_name]
|
|
|
|
if field is not None:
|
2016-07-05 14:53:03 +00:00
|
|
|
prev_score = server.hget(field, date)
|
2016-07-01 14:59:08 +00:00
|
|
|
if prev_score is not None:
|
2016-07-05 14:53:03 +00:00
|
|
|
print field + ' prev_score:' + prev_score
|
|
|
|
server.hset(field, date, int(prev_score) + 1)
|
2016-07-20 12:12:18 +00:00
|
|
|
if int(prev_score) + 1 > threshold_need_to_look: # threshold for false possitive
|
2016-07-05 14:53:03 +00:00
|
|
|
if(check_for_progression(server, field, date)):
|
|
|
|
to_plot.add(field)
|
|
|
|
else:
|
|
|
|
server.hset(field, date, 1)
|
|
|
|
|
2016-07-20 12:12:18 +00:00
|
|
|
|
2016-07-05 14:53:03 +00:00
|
|
|
def check_for_progression(server, field, date):
|
|
|
|
previous_data = set()
|
|
|
|
tot_sum = 0
|
|
|
|
for i in range(0, range_to_look):
|
|
|
|
curr_value = server.hget(field, Date(date).substract_day(i))
|
2016-07-20 12:12:18 +00:00
|
|
|
if curr_value is None: # no further data
|
2016-07-05 14:53:03 +00:00
|
|
|
break
|
2016-07-01 14:59:08 +00:00
|
|
|
else:
|
2016-07-05 14:53:03 +00:00
|
|
|
curr_value = int(curr_value)
|
|
|
|
previous_data.add(curr_value)
|
2016-07-20 12:12:18 +00:00
|
|
|
tot_sum += curr_value
|
2016-07-05 14:53:03 +00:00
|
|
|
if i == 0:
|
|
|
|
today_val = curr_value
|
|
|
|
|
2016-07-20 12:12:18 +00:00
|
|
|
print 'totsum=' + str(tot_sum)
|
|
|
|
print 'div=' + str(tot_sum / today_val)
|
|
|
|
if tot_sum / today_val >= threshold_to_plot:
|
2016-07-05 14:53:03 +00:00
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
2016-06-30 12:38:28 +00:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
|
|
|
# Port of the redis instance used by pubsublogger
|
|
|
|
publisher.port = 6380
|
|
|
|
# Script is the default channel used for the modules.
|
|
|
|
publisher.channel = 'Script'
|
|
|
|
|
|
|
|
# Section name in bin/packages/modules.cfg
|
|
|
|
config_section = 'WebStats'
|
|
|
|
|
|
|
|
# Setup the I/O queues
|
|
|
|
p = Process(config_section)
|
|
|
|
|
|
|
|
# Sent to the logging a description of the module
|
|
|
|
publisher.info("Makes statistics about valid URL")
|
|
|
|
|
|
|
|
# REDIS #
|
|
|
|
r_serv1 = redis.StrictRedis(
|
|
|
|
host=p.config.get("Redis_Level_DB", "host"),
|
|
|
|
port=p.config.get("Redis_Level_DB", "port"),
|
|
|
|
db=p.config.get("Redis_Level_DB", "db"))
|
2016-07-20 12:12:18 +00:00
|
|
|
|
2016-07-05 14:53:03 +00:00
|
|
|
r_serv2 = redis.StrictRedis(
|
|
|
|
host=p.config.get("Redis_Level_DB_Domain", "host"),
|
|
|
|
port=p.config.get("Redis_Level_DB_Domain", "port"),
|
|
|
|
db=p.config.get("Redis_Level_DB_Domain", "db"))
|
2016-06-30 12:38:28 +00:00
|
|
|
|
|
|
|
# FILE CURVE SECTION #
|
2016-07-01 14:59:08 +00:00
|
|
|
csv_path_proto = os.path.join(os.environ['AIL_HOME'],
|
2016-07-20 12:12:18 +00:00
|
|
|
p.config.get("Directories", "protocolstrending_csv"))
|
2016-06-30 12:38:28 +00:00
|
|
|
protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
|
2016-07-20 12:12:18 +00:00
|
|
|
p.config.get("Directories", "protocolsfile"))
|
|
|
|
|
2016-07-01 14:59:08 +00:00
|
|
|
csv_path_tld = os.path.join(os.environ['AIL_HOME'],
|
2016-07-20 12:12:18 +00:00
|
|
|
p.config.get("Directories", "tldstrending_csv"))
|
2016-07-01 14:59:08 +00:00
|
|
|
tldsfile_path = os.path.join(os.environ['AIL_HOME'],
|
|
|
|
p.config.get("Directories", "tldsfile"))
|
|
|
|
|
2016-07-05 14:53:03 +00:00
|
|
|
csv_path_domain = os.path.join(os.environ['AIL_HOME'],
|
2016-07-20 12:12:18 +00:00
|
|
|
p.config.get("Directories", "domainstrending_csv"))
|
2016-07-05 14:53:03 +00:00
|
|
|
|
2016-07-01 14:59:08 +00:00
|
|
|
faup = Faup()
|
|
|
|
generate_new_graph = False
|
2016-06-30 12:38:28 +00:00
|
|
|
# Endless loop getting messages from the input queue
|
|
|
|
while True:
|
|
|
|
# Get one message from the input queue
|
|
|
|
message = p.get_from_set()
|
2016-07-20 12:12:18 +00:00
|
|
|
|
2016-06-30 12:38:28 +00:00
|
|
|
if message is None:
|
|
|
|
if generate_new_graph:
|
|
|
|
generate_new_graph = False
|
|
|
|
print 'Building graph'
|
|
|
|
today = datetime.date.today()
|
|
|
|
year = today.year
|
|
|
|
month = today.month
|
2016-07-12 09:47:51 +00:00
|
|
|
|
2016-07-01 14:59:08 +00:00
|
|
|
lib_words.create_curve_with_word_file(r_serv1, csv_path_proto,
|
2016-06-30 12:38:28 +00:00
|
|
|
protocolsfile_path, year,
|
|
|
|
month)
|
2016-07-12 09:47:51 +00:00
|
|
|
|
2016-07-01 14:59:08 +00:00
|
|
|
lib_words.create_curve_with_word_file(r_serv1, csv_path_tld,
|
|
|
|
tldsfile_path, year,
|
|
|
|
month)
|
2016-07-12 09:47:51 +00:00
|
|
|
|
2016-07-05 14:53:03 +00:00
|
|
|
lib_words.create_curve_with_list(r_serv2, csv_path_domain,
|
2016-07-20 12:12:18 +00:00
|
|
|
to_plot, year, month)
|
2016-07-05 14:53:03 +00:00
|
|
|
print 'end building'
|
2016-07-12 09:47:51 +00:00
|
|
|
|
2016-06-30 12:38:28 +00:00
|
|
|
publisher.debug("{} queue is empty, waiting".format(config_section))
|
2016-07-05 14:53:03 +00:00
|
|
|
print 'sleeping'
|
|
|
|
time.sleep(5)
|
2016-06-30 12:38:28 +00:00
|
|
|
continue
|
|
|
|
|
2016-07-20 12:12:18 +00:00
|
|
|
else:
|
2016-06-30 12:38:28 +00:00
|
|
|
generate_new_graph = True
|
|
|
|
# Do something with the message from the queue
|
2016-07-01 14:59:08 +00:00
|
|
|
url, date = message.split()
|
|
|
|
faup.decode(url)
|
|
|
|
url_parsed = faup.get()
|
2016-07-20 12:12:18 +00:00
|
|
|
|
|
|
|
analyse(r_serv1, 'scheme') # Scheme analysis
|
|
|
|
analyse(r_serv1, 'tld') # Tld analysis
|
|
|
|
analyse_and_progression(r_serv2, 'domain') # Domain analysis
|