From 5dcb4ebb582ace37f41266042a83b09488cdf671 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 30 Jul 2018 16:36:34 +0200 Subject: [PATCH] chg: [statistics] add script to generate pie chart png --- bin/Credential.py | 15 ++-- bin/Helper.py | 23 ++++--- bin/Mail.py | 21 +++--- doc/api/create_lu_graph.py | 137 +++++++++++++++++++++++++++++++++++++ 4 files changed, 169 insertions(+), 27 deletions(-) create mode 100755 doc/api/create_lu_graph.py diff --git a/bin/Credential.py b/bin/Credential.py index bc4ca9fe..8dd91c41 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -72,6 +72,7 @@ if __name__ == "__main__": #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+" regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" + while True: message = p.get_from_set() if message is None: @@ -141,20 +142,20 @@ if __name__ == "__main__": if sites_set: print("=======> Probably on : {}".format(', '.join(sites_set))) + + date = datetime.datetime.now().strftime("%Y%m") + for cred in creds: + maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0] + faup.decode(maildomains) + tld = faup.get()['tld'] + server_statistics.hincrby('credential_by_tld:'+date, tld, 1) else: publisher.info(to_print) print('found {} credentials'.format(len(creds))) #for searching credential in termFreq - date = datetime.datetime.now().strftime("%Y%m") for cred in creds: - mail = cred.split('@')[-1].split()[0] - faup.decode(mail) - tld = faup.get()['tld'] - print(tld) - server_statistics.hincrby('credential_by_tld:'+date, tld, 1) - cred = cred.split('@')[0] #Split to ignore mail address #unique number attached to unique path diff --git a/bin/Helper.py b/bin/Helper.py index c0d836e3..0bb4b410 100755 --- a/bin/Helper.py +++ b/bin/Helper.py @@ -111,7 +111,7 @@ class PubSub(object): class Process(object): - def __init__(self, conf_section): + def __init__(self, conf_section, module=True): configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') if not os.path.exists(configfile): raise Exception('Unable to find the configuration file. \ @@ -125,17 +125,18 @@ class Process(object): self.subscriber_name = conf_section self.pubsub = None - if self.modules.has_section(conf_section): - self.pubsub = PubSub() - else: - raise Exception('Your process has to listen to at least one feed.') - self.r_temp = redis.StrictRedis( - host=self.config.get('RedisPubSub', 'host'), - port=self.config.get('RedisPubSub', 'port'), - db=self.config.get('RedisPubSub', 'db'), - decode_responses=True) + if module: + if self.modules.has_section(conf_section): + self.pubsub = PubSub() + else: + raise Exception('Your process has to listen to at least one feed.') + self.r_temp = redis.StrictRedis( + host=self.config.get('RedisPubSub', 'host'), + port=self.config.get('RedisPubSub', 'port'), + db=self.config.get('RedisPubSub', 'db'), + decode_responses=True) - self.moduleNum = os.getpid() + self.moduleNum = os.getpid() def populate_set_in(self): # monoproc diff --git a/bin/Mail.py b/bin/Mail.py index 2cb9b769..1f682661 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -2,7 +2,7 @@ # -*-coding:UTF-8 -* """ -The CreditCards Module +The Mail Module ====================== This module is consuming the Redis-list created by the Categ module. @@ -12,7 +12,6 @@ It apply mail regexes on paste content and warn if above a threshold. """ import redis -import pprint import time import datetime import dns.exception @@ -89,19 +88,23 @@ if __name__ == "__main__": msg = 'infoleak:automatic-detection="mail";{}'.format(filename) p.populate_set_out(msg, 'Tags') + #create country statistics + date = datetime.datetime.now().strftime("%Y%m") + for mail in MX_values[1]: + print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date)) + p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats') + + faup.decode(mail) + tld = faup.get()['tld'] + server_statistics.hincrby('mail_by_tld:'+date, tld, MX_values[1][mail]) + else: publisher.info(to_print) - #Send to ModuleStats and create country statistics - date = datetime.datetime.now().strftime("%Y%m") + #create country statistics for mail in MX_values[1]: print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date)) p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats') - faup.decode(mail) - tld = faup.get()['tld'] - print(tld) - server_statistics.hincrby('mail_by_tld:'+date, tld, MX_values[1][mail]) - prec_filename = filename else: diff --git a/doc/api/create_lu_graph.py b/doc/api/create_lu_graph.py new file mode 100755 index 00000000..4bcff2a6 --- /dev/null +++ b/doc/api/create_lu_graph.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' +lu +''' + +import os +import sys +import redis +import datetime +import heapq +import operator +import matplotlib.pyplot as plt +import numpy as np + +sys.path.append(os.environ['AIL_BIN']) + +from Helper import Process + +def create_pie_chart(db_key, date, pie_title, path, save_name): + + monthly_credential_by_tld = server_statistics.hkeys(db_key + date) + + l_tld = [] + for tld in monthly_credential_by_tld: + nb_tld = server_statistics.hget(db_key + date, tld) + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + l_tld.append( (tld, nb_tld) ) + + mail_tld_top5 = heapq.nlargest(5, l_tld, key=operator.itemgetter(1)) + + # Pie chart, where the slices will be ordered and plotted counter-clockwise: + labels = [] + sizes = [] + explode = [] # only "explode" the 2nd slice (i.e. 'Hogs') + for tld in mail_tld_top5: + labels.append(tld[0]) + sizes.append(tld[1]) + explode.append(0) + + nb_tld = server_statistics.hget(db_key + date, 'lu') + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + labels.append('lu') + sizes.append(nb_tld) + explode.append(0.3) # only "explode" lu slice + explode = tuple(explode) + + fig1, ax1 = plt.subplots() + ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', + shadow=True, startangle=90) + ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. + + ax1.set_title(pie_title) + #plt.show() + plt.savefig(os.path.join(path, save_name)) + plt.close(fig1) + +def create_donut_chart(db_key, date, pie_title, path, save_name): + + monthly_credential_by_tld = server_statistics.hkeys(db_key + date) + print() + + l_tld = [] + for tld in monthly_credential_by_tld: + nb_tld = server_statistics.hget(db_key + date, tld) + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + l_tld.append( (tld, nb_tld) ) + + mail_tld_top5 = heapq.nlargest(5, l_tld, key=operator.itemgetter(1)) + + # Pie chart, where the slices will be ordered and plotted counter-clockwise: + recipe = [] + data = [] + for tld in mail_tld_top5: + recipe.append(tld[0]) + data.append(tld[1]) + + nb_tld = server_statistics.hget(db_key + date, 'lu') + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + recipe.append('lu') + data.append(nb_tld) + + fig1, ax1 = plt.subplots(figsize=(6, 3), subplot_kw=dict(aspect="equal")) + + wedges, texts = ax1.pie(data, wedgeprops=dict(width=0.5), startangle=-40) + + bbox_props = dict(boxstyle="square,pad=0.3", fc="w", ec="k", lw=0.72) + kw = dict(xycoords='data', textcoords='data', arrowprops=dict(arrowstyle="-"), + bbox=bbox_props, zorder=0, va="center") + + for i, p in enumerate(wedges): + ang = (p.theta2 - p.theta1)/2. + p.theta1 + y = np.sin(np.deg2rad(ang)) + x = np.cos(np.deg2rad(ang)) + horizontalalignment = {-1: "right", 1: "left"}[int(np.sign(x))] + connectionstyle = "angle,angleA=0,angleB={}".format(ang) + kw["arrowprops"].update({"connectionstyle": connectionstyle}) + ax1.annotate(recipe[i], xy=(x, y), xytext=(1.35*np.sign(x), 1.4*y), + horizontalalignment=horizontalalignment, **kw) + + ax1.set_title(pie_title) + #plt.show() + plt.savefig(os.path.join(path, save_name)) + plt.close(fig1) + +if __name__ == '__main__': + + path = os.path.join(os.environ['AIL_HOME'], 'doc') # path to module config file + + config_section = 'ARDB_Statistics' + + p = Process(config_section, False) + + # ARDB # + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + + date = datetime.datetime.now().strftime("%Y%m") + create_pie_chart('credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png') + create_pie_chart('mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png') + create_pie_chart('SQLInjection_by_tld:', date, "AIL: sqlInjection by tld", path, 'AIL_sqlInjectionl_by_tld.png')