diff --git a/.gitignore b/.gitignore index e74906ae..c4bd48c9 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ var/www/submitted # Local config bin/packages/config.cfg +bin/packages/config.cfg.backup configs/keys # installed files @@ -39,3 +40,4 @@ doc/all_modules.txt # auto generated doc/module-data-flow.png doc/data-flow.png +doc/statistics diff --git a/OVERVIEW.md b/OVERVIEW.md index b62a063f..effb387d 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -23,6 +23,15 @@ Redis and ARDB overview ARDB overview --------------------------- +ARDB_DB +* DB 1 - Curve +* DB 2 - TermFreq +* DB 3 - Trending +* DB 4 - Sentiment +* DB 5 - TermCred +* DB 6 - Tags +* DB 7 - Metadata +* DB 8 - Statistics * DB 7 - Metadata: ----------------------------------------- BASE64 ---------------------------------------- @@ -40,7 +49,7 @@ ARDB overview SET - 'all_decoder' decoder* - SET - 'hash_all_type' hash_type * + SET - 'hash_all_type' hash_type * SET - 'hash_base64_all_type' hash_type * SET - 'hash_binary_all_type' hash_type * @@ -62,4 +71,3 @@ ARDB overview GET - 'base64_decoded:'+date nd_decoded GET - 'binary_decoded:'+date nd_decoded - diff --git a/bin/BankAccount.py b/bin/BankAccount.py index 58fa3e64..06e86d06 100755 --- a/bin/BankAccount.py +++ b/bin/BankAccount.py @@ -11,6 +11,8 @@ It apply IBAN regexes on paste content and warn if above a threshold. import redis import time +import redis +import datetime import re import string from itertools import chain @@ -54,11 +56,13 @@ def check_all_iban(l_iban, paste, filename): iban = ''.join(e for e in iban if e.isalnum()) #iban = iban.upper() res = iban_regex_verify.findall(iban) + date = datetime.datetime.now().strftime("%Y%m") if res: print('checking '+iban) if is_valid_iban(iban): print('------') nb_valid_iban = nb_valid_iban + 1 + server_statistics.hincrby('iban_by_country:'+date, iban[0:2], 1) if(nb_valid_iban > 0): to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) @@ -79,6 +83,13 @@ if __name__ == "__main__": p = Process(config_section) max_execution_time = p.config.getint("BankAccount", "max_execution_time") + # ARDB # + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + publisher.info("BankAccount started") message = p.get_from_set() diff --git a/bin/Credential.py b/bin/Credential.py index d1016586..7f665227 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -28,6 +28,7 @@ import sys from packages import Paste from pubsublogger import publisher from Helper import Process +import datetime import re import redis from pyfaup.faup import Faup @@ -58,6 +59,12 @@ if __name__ == "__main__": db=p.config.get("ARDB_TermCred", "db"), decode_responses=True) + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert") minTopPassList = p.config.getint("Credential", "minTopPassList") @@ -65,6 +72,7 @@ if __name__ == "__main__": #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+" regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" + while True: message = p.get_from_set() if message is None: @@ -132,6 +140,13 @@ if __name__ == "__main__": if sites_set: print("=======> Probably on : {}".format(', '.join(sites_set))) + + date = datetime.datetime.now().strftime("%Y%m") + for cred in creds: + maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0] + faup.decode(maildomains) + tld = faup.get()['tld'] + server_statistics.hincrby('credential_by_tld:'+date, tld, 1) else: publisher.info(to_print) print('found {} credentials'.format(len(creds))) diff --git a/bin/Helper.py b/bin/Helper.py index d90388f5..52097ef6 100755 --- a/bin/Helper.py +++ b/bin/Helper.py @@ -111,7 +111,7 @@ class PubSub(object): class Process(object): - def __init__(self, conf_section): + def __init__(self, conf_section, module=True): configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') if not os.path.exists(configfile): raise Exception('Unable to find the configuration file. \ @@ -125,23 +125,24 @@ class Process(object): self.subscriber_name = conf_section self.pubsub = None - if self.modules.has_section(conf_section): - self.pubsub = PubSub() - else: - raise Exception('Your process has to listen to at least one feed.') - self.r_temp = redis.StrictRedis( - host=self.config.get('RedisPubSub', 'host'), - port=self.config.get('RedisPubSub', 'port'), - db=self.config.get('RedisPubSub', 'db'), - decode_responses=True) + if module: + if self.modules.has_section(conf_section): + self.pubsub = PubSub() + else: + raise Exception('Your process has to listen to at least one feed.') + self.r_temp = redis.StrictRedis( + host=self.config.get('RedisPubSub', 'host'), + port=self.config.get('RedisPubSub', 'port'), + db=self.config.get('RedisPubSub', 'db'), + decode_responses=True) - self.serv_statistics = redis.StrictRedis( - host=self.config.get('ARDB_Statistics', 'host'), - port=self.config.get('ARDB_Statistics', 'port'), - db=self.config.get('ARDB_Statistics', 'db'), - decode_responses=True) + self.serv_statistics = redis.StrictRedis( + host=self.config.get('ARDB_Statistics', 'host'), + port=self.config.get('ARDB_Statistics', 'port'), + db=self.config.get('ARDB_Statistics', 'db'), + decode_responses=True) - self.moduleNum = os.getpid() + self.moduleNum = os.getpid() def populate_set_in(self): # monoproc diff --git a/bin/LibInjection.py b/bin/LibInjection.py index 4ad388d5..283bba00 100755 --- a/bin/LibInjection.py +++ b/bin/LibInjection.py @@ -12,6 +12,8 @@ It tries to identify SQL Injections with libinjection. """ import time +import datetime +import redis import string import urllib.request import re @@ -54,6 +56,12 @@ def analyse(url, path): msg = 'infoleak:automatic-detection="sql-injection";{}'.format(path) p.populate_set_out(msg, 'Tags') + #statistics + tld = url_parsed['tld'] + if tld is not None: + date = datetime.datetime.now().strftime("%Y%m") + server_statistics.hincrby('SQLInjection_by_tld:'+date, tld, 1) + if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # Port of the redis instance used by pubsublogger @@ -70,6 +78,12 @@ if __name__ == '__main__': # Sent to the logging a description of the module publisher.info("Try to detect SQL injection with LibInjection") + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + faup = Faup() # Endless loop getting messages from the input queue diff --git a/bin/Mail.py b/bin/Mail.py index c1d8cf70..1f682661 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -2,7 +2,7 @@ # -*-coding:UTF-8 -* """ -The CreditCards Module +The Mail Module ====================== This module is consuming the Redis-list created by the Categ module. @@ -12,13 +12,15 @@ It apply mail regexes on paste content and warn if above a threshold. """ import redis -import pprint import time +import datetime import dns.exception from packages import Paste from packages import lib_refine from pubsublogger import publisher +from pyfaup.faup import Faup + from Helper import Process if __name__ == "__main__": @@ -27,6 +29,8 @@ if __name__ == "__main__": config_section = 'Mail' + faup = Faup() + p = Process(config_section) addr_dns = p.config.get("Mail", "dns") @@ -36,6 +40,12 @@ if __name__ == "__main__": port=p.config.getint("Redis_Cache", "port"), db=p.config.getint("Redis_Cache", "db"), decode_responses=True) + # ARDB # + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) # FUNCTIONS # publisher.info("Suscribed to channel mails_categ") @@ -66,7 +76,6 @@ if __name__ == "__main__": PST.save_attribute_redis(channel, (MX_values[0], list(MX_values[1]))) - pprint.pprint(MX_values) to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\ format(PST.p_source, PST.p_date, PST.p_name, MX_values[0], PST.p_path) @@ -79,12 +88,22 @@ if __name__ == "__main__": msg = 'infoleak:automatic-detection="mail";{}'.format(filename) p.populate_set_out(msg, 'Tags') + #create country statistics + date = datetime.datetime.now().strftime("%Y%m") + for mail in MX_values[1]: + print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date)) + p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats') + + faup.decode(mail) + tld = faup.get()['tld'] + server_statistics.hincrby('mail_by_tld:'+date, tld, MX_values[1][mail]) + else: publisher.info(to_print) - #Send to ModuleStats + #create country statistics for mail in MX_values[1]: - print('mail;{};{};{}'.format(1, mail, PST.p_date)) - p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats') + print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date)) + p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats') prec_filename = filename diff --git a/bin/SQLInjectionDetection.py b/bin/SQLInjectionDetection.py index 117f3dc0..f03d7555 100755 --- a/bin/SQLInjectionDetection.py +++ b/bin/SQLInjectionDetection.py @@ -12,6 +12,8 @@ It test different possibility to makes some sqlInjection. """ import time +import datetime +import redis import string import urllib.request import re @@ -85,6 +87,13 @@ def analyse(url, path): msg = 'infoleak:automatic-detection="sql-injection";{}'.format(path) p.populate_set_out(msg, 'Tags') + + #statistics + tld = url_parsed['tld'] + if tld is not None: + date = datetime.datetime.now().strftime("%Y%m") + server_statistics.hincrby('SQLInjection_by_tld:'+date, tld, 1) + else: print("Potential SQL injection:") print(urllib.request.unquote(url)) @@ -143,6 +152,12 @@ if __name__ == '__main__': # Sent to the logging a description of the module publisher.info("Try to detect SQL injection") + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + faup = Faup() # Endless loop getting messages from the input queue diff --git a/bin/packages/lib_refine.py b/bin/packages/lib_refine.py index 83511e40..5d2af0a9 100644 --- a/bin/packages/lib_refine.py +++ b/bin/packages/lib_refine.py @@ -38,6 +38,7 @@ def checking_MX_record(r_serv, adress_set, addr_dns): score = 0 num = len(adress_set) WalidMX = set([]) + validMX = {} # Transforming the set into a string MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower()) resolver = dns.resolver.Resolver() @@ -46,20 +47,23 @@ def checking_MX_record(r_serv, adress_set, addr_dns): resolver.lifetime = 2 if MXdomains != []: - for MXdomain in set(MXdomains): + for MXdomain in MXdomains: try: + MXdomain = MXdomain[1:] # Already in Redis living. - if r_serv.exists(MXdomain[1:]): + if r_serv.exists(MXdomain): score += 1 - WalidMX.add(MXdomain[1:]) + WalidMX.add(MXdomain) + validMX[MXdomain] = validMX.get(MXdomain, 0) + 1 # Not already in Redis else: # If I'm Walid MX domain - if resolver.query(MXdomain[1:], rdtype=dns.rdatatype.MX): + if resolver.query(MXdomain, rdtype=dns.rdatatype.MX): # Gonna be added in redis. - r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) + r_serv.setex(MXdomain, 1, timedelta(days=1)) score += 1 - WalidMX.add(MXdomain[1:]) + WalidMX.add(MXdomain) + validMX[MXdomain] = validMX.get(MXdomain, 0) + 1 else: pass @@ -86,13 +90,14 @@ def checking_MX_record(r_serv, adress_set, addr_dns): except dns.resolver.Timeout: print('timeout') - r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) + r_serv.setex(MXdomain, 1, timedelta(days=1)) except Exception as e: print(e) publisher.debug("emails before: {0} after: {1} (valid)".format(num, score)) - return (num, WalidMX) + #return (num, WalidMX) + return (num, validMX) def checking_A_record(r_serv, domains_set): diff --git a/doc/statistics/create_graph_by_tld.py b/doc/statistics/create_graph_by_tld.py new file mode 100755 index 00000000..3ab3e1cc --- /dev/null +++ b/doc/statistics/create_graph_by_tld.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' +Create statistics pie charts by tld + +Default tld: lu +''' + +import matplotlib +matplotlib.use('Agg') +import os +import sys +import redis +import argparse +import datetime +import heapq +import operator +import matplotlib.pyplot as plt +import numpy as np + +sys.path.append(os.environ['AIL_BIN']) + +from Helper import Process + +def create_pie_chart(country ,db_key, date, pie_title, path, save_name): + + monthly_credential_by_tld = server_statistics.hkeys(db_key + date) + + l_tld = [] + for tld in monthly_credential_by_tld: + nb_tld = server_statistics.hget(db_key + date, tld) + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + l_tld.append( (tld, nb_tld) ) + + mail_tld_top5 = heapq.nlargest(5, l_tld, key=operator.itemgetter(1)) + + # Pie chart, where the slices will be ordered and plotted counter-clockwise: + labels = [] + sizes = [] + explode = [] # only "explode" the 2nd slice (i.e. 'Hogs') + explode_value = 0 + for tld in mail_tld_top5: + labels.append(tld[0] +' ('+str(tld[1])+')') + sizes.append(tld[1]) + explode.append(explode_value) + explode_value = explode_value +0.1 + + nb_tld = server_statistics.hget(db_key + date, country) + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + country_label = country + ' ('+str(nb_tld)+')' + if country_label not in labels: + labels.append(country_label) + sizes.append(nb_tld) + explode.append(explode_value) + explode = tuple(explode) + + fig1, ax1 = plt.subplots() + ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', + shadow=True, startangle=90) + ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. + + ax1.set_title(pie_title) + #plt.show() + plt.savefig(os.path.join(path,save_name)) + plt.close(fig1) + +def create_donut_chart(db_key, date, pie_title, path, save_name): + + monthly_credential_by_tld = server_statistics.hkeys(db_key + date) + print() + + l_tld = [] + for tld in monthly_credential_by_tld: + nb_tld = server_statistics.hget(db_key + date, tld) + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + l_tld.append( (tld, nb_tld) ) + + mail_tld_top5 = heapq.nlargest(5, l_tld, key=operator.itemgetter(1)) + + # Pie chart, where the slices will be ordered and plotted counter-clockwise: + recipe = [] + data = [] + for tld in mail_tld_top5: + recipe.append(tld[0]) + data.append(tld[1]) + + nb_tld = server_statistics.hget(db_key + date, country) + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + if country not in recipe: + recipe.append(country) + data.append(nb_tld) + + fig1, ax1 = plt.subplots(figsize=(6, 3), subplot_kw=dict(aspect="equal")) + + wedges, texts = ax1.pie(data, wedgeprops=dict(width=0.5), startangle=-40) + + bbox_props = dict(boxstyle="square,pad=0.3", fc="w", ec="k", lw=0.72) + kw = dict(xycoords='data', textcoords='data', arrowprops=dict(arrowstyle="-"), + bbox=bbox_props, zorder=0, va="center") + + for i, p in enumerate(wedges): + ang = (p.theta2 - p.theta1)/2. + p.theta1 + y = np.sin(np.deg2rad(ang)) + x = np.cos(np.deg2rad(ang)) + horizontalalignment = {-1: "right", 1: "left"}[int(np.sign(x))] + connectionstyle = "angle,angleA=0,angleB={}".format(ang) + kw["arrowprops"].update({"connectionstyle": connectionstyle}) + ax1.annotate(recipe[i], xy=(x, y), xytext=(1.35*np.sign(x), 1.4*y), + horizontalalignment=horizontalalignment, **kw) + + ax1.set_title(pie_title) + #plt.show() + plt.savefig(os.path.join(path, save_name)) + plt.close(fig1) + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description='''This script is a part of the Analysis Information Leak + framework. Create statistics pie charts".''', + epilog='Example: ./create_lu_graph.py 0 lu now, create_lu_graph.py 0 lu 201807') + + parser.add_argument('type', type=int, default=0, + help='''The graph type (default 0), + 0: all, + 1: credential_pie, + 2: mail_pie + 3: sqlinjection_pie, + 4: iban_pie,''', + choices=[0, 1, 2, 3, 4], action='store') + + parser.add_argument('country', type=str, default="lu", + help='''The country code, lu:default''', + action='store') + + parser.add_argument('date', type=str, default="now", + help='''month %Y%m, example: 201810''', action='store') + + args = parser.parse_args() + + path = os.path.join(os.environ['AIL_HOME'], 'doc', 'statistics') # save path + + config_section = 'ARDB_Statistics' + + p = Process(config_section, False) + + # ARDB # + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + + if args.date == 'now' or len(args.date) != 6: + date = datetime.datetime.now().strftime("%Y%m") + else: + date = args.date + + if args.type == 0: + create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png') + create_pie_chart(args.country, 'mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png') + create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: SQLInjection by tld", path, 'AIL_SQLInjection_by_tld.png') + create_pie_chart(args.country.upper(), 'iban_by_country:', date, "AIL: Iban by country", path, 'AIL_iban_by_country.png') + elif args.type == 1: + create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png') + elif args.type == 2: + create_pie_chart(args.country, 'mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png') + elif args.type == 3: + create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: sqlInjection by tld", path, 'AIL_sqlInjectionl_by_tld.png') + elif args.type == 4: + create_pie_chart(args.country.upper(), 'iban_by_country:', date, "AIL: Iban by country", path, 'AIL_iban_by_country.png') diff --git a/installing_deps.sh b/installing_deps.sh index c29ba4b9..975830f4 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -5,7 +5,7 @@ set -x sudo apt-get update -sudo apt-get install python3-pip python-virtualenv python3-dev libfreetype6-dev \ +sudo apt-get install python3-pip python-virtualenv python3-dev python3-tk libfreetype6-dev \ screen g++ python-tk unzip libsnappy-dev cmake -y #optional tor install diff --git a/pip3_packages_requirement.txt b/pip3_packages_requirement.txt index 53ec97e7..dd447d5c 100644 --- a/pip3_packages_requirement.txt +++ b/pip3_packages_requirement.txt @@ -70,3 +70,6 @@ https://github.com/saffsd/langid.py/archive/master.zip #LibInjection bindings pylibinjection + +# Graph +matplotlib