From 39f54d44202cd57a79e82c2605c88f6dfe0dd278 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 30 Jul 2018 09:21:22 +0200 Subject: [PATCH 01/10] chg: [Mail] add country statistic --- .gitignore | 1 + OVERVIEW.md | 12 +++++++++++- bin/Mail.py | 18 ++++++++++++++---- bin/packages/lib_refine.py | 21 +++++++++++++-------- 4 files changed, 39 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index e74906ae..11c01083 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ var/www/submitted # Local config bin/packages/config.cfg +bin/packages/config.cfg.backup configs/keys # installed files diff --git a/OVERVIEW.md b/OVERVIEW.md index aa417ff7..cf40bad9 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -20,4 +20,14 @@ Redis and ARDB overview - DB 0 - Lines duplicate - DB 1 - Hashes -To be updated +ARDB overview +--------------------------- +ARDB_DB +* DB 1 - Curve +* DB 2 - TermFreq +* DB 3 - Trending +* DB 4 - Sentiment +* DB 5 - TermCred +* DB 6 - Tags +* DB 7 - Metadata +* DB 8 - Statistics diff --git a/bin/Mail.py b/bin/Mail.py index c1d8cf70..13a675ea 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -14,6 +14,7 @@ It apply mail regexes on paste content and warn if above a threshold. import redis import pprint import time +import datetime import dns.exception from packages import Paste from packages import lib_refine @@ -36,6 +37,12 @@ if __name__ == "__main__": port=p.config.getint("Redis_Cache", "port"), db=p.config.getint("Redis_Cache", "db"), decode_responses=True) + # ARDB # + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) # FUNCTIONS # publisher.info("Suscribed to channel mails_categ") @@ -66,7 +73,6 @@ if __name__ == "__main__": PST.save_attribute_redis(channel, (MX_values[0], list(MX_values[1]))) - pprint.pprint(MX_values) to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\ format(PST.p_source, PST.p_date, PST.p_name, MX_values[0], PST.p_path) @@ -81,10 +87,14 @@ if __name__ == "__main__": else: publisher.info(to_print) - #Send to ModuleStats + #Send to ModuleStats and create country statistics + date = datetime.datetime.now().strftime("%Y%m") for mail in MX_values[1]: - print('mail;{};{};{}'.format(1, mail, PST.p_date)) - p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats') + print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date)) + p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats') + + country = mail.split('.')[-1] + server_statistics.hincrby('mail_by_country:'+date, country, MX_values[1][mail]) prec_filename = filename diff --git a/bin/packages/lib_refine.py b/bin/packages/lib_refine.py index 83511e40..5d2af0a9 100644 --- a/bin/packages/lib_refine.py +++ b/bin/packages/lib_refine.py @@ -38,6 +38,7 @@ def checking_MX_record(r_serv, adress_set, addr_dns): score = 0 num = len(adress_set) WalidMX = set([]) + validMX = {} # Transforming the set into a string MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower()) resolver = dns.resolver.Resolver() @@ -46,20 +47,23 @@ def checking_MX_record(r_serv, adress_set, addr_dns): resolver.lifetime = 2 if MXdomains != []: - for MXdomain in set(MXdomains): + for MXdomain in MXdomains: try: + MXdomain = MXdomain[1:] # Already in Redis living. - if r_serv.exists(MXdomain[1:]): + if r_serv.exists(MXdomain): score += 1 - WalidMX.add(MXdomain[1:]) + WalidMX.add(MXdomain) + validMX[MXdomain] = validMX.get(MXdomain, 0) + 1 # Not already in Redis else: # If I'm Walid MX domain - if resolver.query(MXdomain[1:], rdtype=dns.rdatatype.MX): + if resolver.query(MXdomain, rdtype=dns.rdatatype.MX): # Gonna be added in redis. - r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) + r_serv.setex(MXdomain, 1, timedelta(days=1)) score += 1 - WalidMX.add(MXdomain[1:]) + WalidMX.add(MXdomain) + validMX[MXdomain] = validMX.get(MXdomain, 0) + 1 else: pass @@ -86,13 +90,14 @@ def checking_MX_record(r_serv, adress_set, addr_dns): except dns.resolver.Timeout: print('timeout') - r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) + r_serv.setex(MXdomain, 1, timedelta(days=1)) except Exception as e: print(e) publisher.debug("emails before: {0} after: {1} (valid)".format(num, score)) - return (num, WalidMX) + #return (num, WalidMX) + return (num, validMX) def checking_A_record(r_serv, domains_set): From c20e7d5ab448ff3ef4c1ee2763668e60a4499500 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 30 Jul 2018 10:19:26 +0200 Subject: [PATCH 02/10] chg: [Mail Credential] add tld statistic --- bin/Credential.py | 7 +++++++ bin/Mail.py | 12 +++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/bin/Credential.py b/bin/Credential.py index 5112f534..219a86f3 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -28,6 +28,7 @@ import sys from packages import Paste from pubsublogger import publisher from Helper import Process +import datetime import re import redis from pyfaup.faup import Faup @@ -140,7 +141,13 @@ if __name__ == "__main__": #for searching credential in termFreq + date = datetime.datetime.now().strftime("%Y%m") for cred in creds: + mail = cred.split('@')[-1] + tld = faup.get()['tld'] + print(tld) + server_statistics.hincrby('credential_by_tld:'+date, tld, MX_values[1][mail]) + cred = cred.split('@')[0] #Split to ignore mail address #unique number attached to unique path diff --git a/bin/Mail.py b/bin/Mail.py index 13a675ea..2cb9b769 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -20,6 +20,8 @@ from packages import Paste from packages import lib_refine from pubsublogger import publisher +from pyfaup.faup import Faup + from Helper import Process if __name__ == "__main__": @@ -28,6 +30,8 @@ if __name__ == "__main__": config_section = 'Mail' + faup = Faup() + p = Process(config_section) addr_dns = p.config.get("Mail", "dns") @@ -92,9 +96,11 @@ if __name__ == "__main__": for mail in MX_values[1]: print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date)) p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats') - - country = mail.split('.')[-1] - server_statistics.hincrby('mail_by_country:'+date, country, MX_values[1][mail]) + + faup.decode(mail) + tld = faup.get()['tld'] + print(tld) + server_statistics.hincrby('mail_by_tld:'+date, tld, MX_values[1][mail]) prec_filename = filename From 09fbc363f11ec4b8eb15af210a2f358191418cdb Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 30 Jul 2018 11:56:50 +0200 Subject: [PATCH 03/10] chg: [sglinjection Phone] add tld statistic, fix phone regex --- bin/Credential.py | 11 +++++++++-- bin/LibInjection.py | 14 ++++++++++++++ bin/Phone.py | 23 ++++++++++++++++++++--- bin/SQLInjectionDetection.py | 15 +++++++++++++++ 4 files changed, 58 insertions(+), 5 deletions(-) diff --git a/bin/Credential.py b/bin/Credential.py index 219a86f3..bc4ca9fe 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -59,6 +59,12 @@ if __name__ == "__main__": db=p.config.get("ARDB_TermCred", "db"), decode_responses=True) + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert") minTopPassList = p.config.getint("Credential", "minTopPassList") @@ -143,10 +149,11 @@ if __name__ == "__main__": #for searching credential in termFreq date = datetime.datetime.now().strftime("%Y%m") for cred in creds: - mail = cred.split('@')[-1] + mail = cred.split('@')[-1].split()[0] + faup.decode(mail) tld = faup.get()['tld'] print(tld) - server_statistics.hincrby('credential_by_tld:'+date, tld, MX_values[1][mail]) + server_statistics.hincrby('credential_by_tld:'+date, tld, 1) cred = cred.split('@')[0] #Split to ignore mail address diff --git a/bin/LibInjection.py b/bin/LibInjection.py index 4ad388d5..283bba00 100755 --- a/bin/LibInjection.py +++ b/bin/LibInjection.py @@ -12,6 +12,8 @@ It tries to identify SQL Injections with libinjection. """ import time +import datetime +import redis import string import urllib.request import re @@ -54,6 +56,12 @@ def analyse(url, path): msg = 'infoleak:automatic-detection="sql-injection";{}'.format(path) p.populate_set_out(msg, 'Tags') + #statistics + tld = url_parsed['tld'] + if tld is not None: + date = datetime.datetime.now().strftime("%Y%m") + server_statistics.hincrby('SQLInjection_by_tld:'+date, tld, 1) + if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # Port of the redis instance used by pubsublogger @@ -70,6 +78,12 @@ if __name__ == '__main__': # Sent to the logging a description of the module publisher.info("Try to detect SQL injection with LibInjection") + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + faup = Faup() # Endless loop getting messages from the input queue diff --git a/bin/Phone.py b/bin/Phone.py index 213db2b3..3d579f4e 100755 --- a/bin/Phone.py +++ b/bin/Phone.py @@ -11,7 +11,9 @@ It apply phone number regexes on paste content and warn if above a threshold. """ +import datetime import time +import redis import re import phonenumbers from packages import Paste @@ -23,8 +25,10 @@ def search_phone(message): paste = Paste.Paste(message) content = paste.get_p_content() # regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required) - reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})') - reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})') + #reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})') + #reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})') + # use non capturing group + reg_phone = re.compile(r'(?:\+\d{1,4}(?:\(\d\))?\d?|0\d?)(?:\d{6,8}|(?:[-/\. ]{1}\(?\d{2,4}\)?){3,4})') # list of the regex results in the Paste, may be null results = reg_phone.findall(content) @@ -45,17 +49,23 @@ def search_phone(message): for phone_number in results: try: x = phonenumbers.parse(phone_number, None) + print(x) country_code = x.country_code if stats.get(country_code) is None: stats[country_code] = 1 else: stats[country_code] = stats[country_code] + 1 - except: + except Exception as e: + #print(e) pass + + date = datetime.datetime.now().strftime("%Y%m") for country_code in stats: + print(country_code) if stats[country_code] > 4: publisher.warning('{} contains Phone numbers with country code {}'.format(paste.p_name, country_code)) + if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # Port of the redis instance used by pubsublogger @@ -72,6 +82,13 @@ if __name__ == '__main__': # Sent to the logging a description of the module publisher.info("Run Phone module") + # ARDB # + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + # Endless loop getting messages from the input queue while True: # Get one message from the input queue diff --git a/bin/SQLInjectionDetection.py b/bin/SQLInjectionDetection.py index 117f3dc0..f03d7555 100755 --- a/bin/SQLInjectionDetection.py +++ b/bin/SQLInjectionDetection.py @@ -12,6 +12,8 @@ It test different possibility to makes some sqlInjection. """ import time +import datetime +import redis import string import urllib.request import re @@ -85,6 +87,13 @@ def analyse(url, path): msg = 'infoleak:automatic-detection="sql-injection";{}'.format(path) p.populate_set_out(msg, 'Tags') + + #statistics + tld = url_parsed['tld'] + if tld is not None: + date = datetime.datetime.now().strftime("%Y%m") + server_statistics.hincrby('SQLInjection_by_tld:'+date, tld, 1) + else: print("Potential SQL injection:") print(urllib.request.unquote(url)) @@ -143,6 +152,12 @@ if __name__ == '__main__': # Sent to the logging a description of the module publisher.info("Try to detect SQL injection") + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + faup = Faup() # Endless loop getting messages from the input queue From 5dcb4ebb582ace37f41266042a83b09488cdf671 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 30 Jul 2018 16:36:34 +0200 Subject: [PATCH 04/10] chg: [statistics] add script to generate pie chart png --- bin/Credential.py | 15 ++-- bin/Helper.py | 23 ++++--- bin/Mail.py | 21 +++--- doc/api/create_lu_graph.py | 137 +++++++++++++++++++++++++++++++++++++ 4 files changed, 169 insertions(+), 27 deletions(-) create mode 100755 doc/api/create_lu_graph.py diff --git a/bin/Credential.py b/bin/Credential.py index bc4ca9fe..8dd91c41 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -72,6 +72,7 @@ if __name__ == "__main__": #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+" regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" + while True: message = p.get_from_set() if message is None: @@ -141,20 +142,20 @@ if __name__ == "__main__": if sites_set: print("=======> Probably on : {}".format(', '.join(sites_set))) + + date = datetime.datetime.now().strftime("%Y%m") + for cred in creds: + maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0] + faup.decode(maildomains) + tld = faup.get()['tld'] + server_statistics.hincrby('credential_by_tld:'+date, tld, 1) else: publisher.info(to_print) print('found {} credentials'.format(len(creds))) #for searching credential in termFreq - date = datetime.datetime.now().strftime("%Y%m") for cred in creds: - mail = cred.split('@')[-1].split()[0] - faup.decode(mail) - tld = faup.get()['tld'] - print(tld) - server_statistics.hincrby('credential_by_tld:'+date, tld, 1) - cred = cred.split('@')[0] #Split to ignore mail address #unique number attached to unique path diff --git a/bin/Helper.py b/bin/Helper.py index c0d836e3..0bb4b410 100755 --- a/bin/Helper.py +++ b/bin/Helper.py @@ -111,7 +111,7 @@ class PubSub(object): class Process(object): - def __init__(self, conf_section): + def __init__(self, conf_section, module=True): configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') if not os.path.exists(configfile): raise Exception('Unable to find the configuration file. \ @@ -125,17 +125,18 @@ class Process(object): self.subscriber_name = conf_section self.pubsub = None - if self.modules.has_section(conf_section): - self.pubsub = PubSub() - else: - raise Exception('Your process has to listen to at least one feed.') - self.r_temp = redis.StrictRedis( - host=self.config.get('RedisPubSub', 'host'), - port=self.config.get('RedisPubSub', 'port'), - db=self.config.get('RedisPubSub', 'db'), - decode_responses=True) + if module: + if self.modules.has_section(conf_section): + self.pubsub = PubSub() + else: + raise Exception('Your process has to listen to at least one feed.') + self.r_temp = redis.StrictRedis( + host=self.config.get('RedisPubSub', 'host'), + port=self.config.get('RedisPubSub', 'port'), + db=self.config.get('RedisPubSub', 'db'), + decode_responses=True) - self.moduleNum = os.getpid() + self.moduleNum = os.getpid() def populate_set_in(self): # monoproc diff --git a/bin/Mail.py b/bin/Mail.py index 2cb9b769..1f682661 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -2,7 +2,7 @@ # -*-coding:UTF-8 -* """ -The CreditCards Module +The Mail Module ====================== This module is consuming the Redis-list created by the Categ module. @@ -12,7 +12,6 @@ It apply mail regexes on paste content and warn if above a threshold. """ import redis -import pprint import time import datetime import dns.exception @@ -89,19 +88,23 @@ if __name__ == "__main__": msg = 'infoleak:automatic-detection="mail";{}'.format(filename) p.populate_set_out(msg, 'Tags') + #create country statistics + date = datetime.datetime.now().strftime("%Y%m") + for mail in MX_values[1]: + print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date)) + p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats') + + faup.decode(mail) + tld = faup.get()['tld'] + server_statistics.hincrby('mail_by_tld:'+date, tld, MX_values[1][mail]) + else: publisher.info(to_print) - #Send to ModuleStats and create country statistics - date = datetime.datetime.now().strftime("%Y%m") + #create country statistics for mail in MX_values[1]: print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date)) p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats') - faup.decode(mail) - tld = faup.get()['tld'] - print(tld) - server_statistics.hincrby('mail_by_tld:'+date, tld, MX_values[1][mail]) - prec_filename = filename else: diff --git a/doc/api/create_lu_graph.py b/doc/api/create_lu_graph.py new file mode 100755 index 00000000..4bcff2a6 --- /dev/null +++ b/doc/api/create_lu_graph.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' +lu +''' + +import os +import sys +import redis +import datetime +import heapq +import operator +import matplotlib.pyplot as plt +import numpy as np + +sys.path.append(os.environ['AIL_BIN']) + +from Helper import Process + +def create_pie_chart(db_key, date, pie_title, path, save_name): + + monthly_credential_by_tld = server_statistics.hkeys(db_key + date) + + l_tld = [] + for tld in monthly_credential_by_tld: + nb_tld = server_statistics.hget(db_key + date, tld) + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + l_tld.append( (tld, nb_tld) ) + + mail_tld_top5 = heapq.nlargest(5, l_tld, key=operator.itemgetter(1)) + + # Pie chart, where the slices will be ordered and plotted counter-clockwise: + labels = [] + sizes = [] + explode = [] # only "explode" the 2nd slice (i.e. 'Hogs') + for tld in mail_tld_top5: + labels.append(tld[0]) + sizes.append(tld[1]) + explode.append(0) + + nb_tld = server_statistics.hget(db_key + date, 'lu') + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + labels.append('lu') + sizes.append(nb_tld) + explode.append(0.3) # only "explode" lu slice + explode = tuple(explode) + + fig1, ax1 = plt.subplots() + ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', + shadow=True, startangle=90) + ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. + + ax1.set_title(pie_title) + #plt.show() + plt.savefig(os.path.join(path, save_name)) + plt.close(fig1) + +def create_donut_chart(db_key, date, pie_title, path, save_name): + + monthly_credential_by_tld = server_statistics.hkeys(db_key + date) + print() + + l_tld = [] + for tld in monthly_credential_by_tld: + nb_tld = server_statistics.hget(db_key + date, tld) + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + l_tld.append( (tld, nb_tld) ) + + mail_tld_top5 = heapq.nlargest(5, l_tld, key=operator.itemgetter(1)) + + # Pie chart, where the slices will be ordered and plotted counter-clockwise: + recipe = [] + data = [] + for tld in mail_tld_top5: + recipe.append(tld[0]) + data.append(tld[1]) + + nb_tld = server_statistics.hget(db_key + date, 'lu') + if nb_tld is not None: + nb_tld = int(nb_tld) + else: + nb_tld = 0 + recipe.append('lu') + data.append(nb_tld) + + fig1, ax1 = plt.subplots(figsize=(6, 3), subplot_kw=dict(aspect="equal")) + + wedges, texts = ax1.pie(data, wedgeprops=dict(width=0.5), startangle=-40) + + bbox_props = dict(boxstyle="square,pad=0.3", fc="w", ec="k", lw=0.72) + kw = dict(xycoords='data', textcoords='data', arrowprops=dict(arrowstyle="-"), + bbox=bbox_props, zorder=0, va="center") + + for i, p in enumerate(wedges): + ang = (p.theta2 - p.theta1)/2. + p.theta1 + y = np.sin(np.deg2rad(ang)) + x = np.cos(np.deg2rad(ang)) + horizontalalignment = {-1: "right", 1: "left"}[int(np.sign(x))] + connectionstyle = "angle,angleA=0,angleB={}".format(ang) + kw["arrowprops"].update({"connectionstyle": connectionstyle}) + ax1.annotate(recipe[i], xy=(x, y), xytext=(1.35*np.sign(x), 1.4*y), + horizontalalignment=horizontalalignment, **kw) + + ax1.set_title(pie_title) + #plt.show() + plt.savefig(os.path.join(path, save_name)) + plt.close(fig1) + +if __name__ == '__main__': + + path = os.path.join(os.environ['AIL_HOME'], 'doc') # path to module config file + + config_section = 'ARDB_Statistics' + + p = Process(config_section, False) + + # ARDB # + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + + date = datetime.datetime.now().strftime("%Y%m") + create_pie_chart('credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png') + create_pie_chart('mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png') + create_pie_chart('SQLInjection_by_tld:', date, "AIL: sqlInjection by tld", path, 'AIL_sqlInjectionl_by_tld.png') From e455d244d82a322748b9045120c95c71157c6bb0 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 31 Jul 2018 10:36:29 +0200 Subject: [PATCH 05/10] chg: [statistics] add domain statistics --- bin/DomClassifier.py | 42 ++++++++++++++++------ doc/api/create_lu_graph.py | 71 ++++++++++++++++++++++++++++++-------- 2 files changed, 88 insertions(+), 25 deletions(-) diff --git a/bin/DomClassifier.py b/bin/DomClassifier.py index aed87a55..268607e8 100755 --- a/bin/DomClassifier.py +++ b/bin/DomClassifier.py @@ -10,6 +10,8 @@ the out output of the Global module. """ import time +import datetime +import redis from packages import Paste from pubsublogger import publisher @@ -26,6 +28,13 @@ def main(): p = Process(config_section) addr_dns = p.config.get("DomClassifier", "dns") + # ARDB # + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + publisher.info("""ZMQ DomainClassifier is Running""") c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns]) @@ -46,20 +55,31 @@ def main(): paste = PST.get_p_content() mimetype = PST._get_p_encoding() + nb_domain = 0 + nb_tld_domain = 0 + if mimetype == "text/plain": c.text(rawtext=paste) c.potentialdomain() - c.validdomain(rtype=['A'], extended=True) - localizeddomains = c.include(expression=cc_tld) - if localizeddomains: - print(localizeddomains) - publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( - PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path)) - localizeddomains = c.localizedomain(cc=cc) - if localizeddomains: - print(localizeddomains) - publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( - PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) + valid = c.validdomain(rtype=['A'], extended=True) + nb_domain = len(set(valid)) + if nb_domain > 0: + localizeddomains = c.include(expression=cc_tld) + if localizeddomains: + nb_tld_domain = len(set(localizeddomains)) + publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( + PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path)) + + localizeddomains = c.localizedomain(cc=cc) + if localizeddomains: + nb_tld_domain = nb_tld_domain + len(set(localizeddomains)) + publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( + PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) + + date = datetime.datetime.now().strftime("%Y%m") + server_statistics.hincrby('domain_by_tld:'+date, 'ALL', nb_domain) + if nb_tld_domain > 0: + server_statistics.hincrby('domain_by_tld:'+date, cc, nb_tld_domain) except IOError: print("CRC Checksum Failed on :", PST.p_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( diff --git a/doc/api/create_lu_graph.py b/doc/api/create_lu_graph.py index 4bcff2a6..1c9440ef 100755 --- a/doc/api/create_lu_graph.py +++ b/doc/api/create_lu_graph.py @@ -8,6 +8,7 @@ lu import os import sys import redis +import argparse import datetime import heapq import operator @@ -18,7 +19,7 @@ sys.path.append(os.environ['AIL_BIN']) from Helper import Process -def create_pie_chart(db_key, date, pie_title, path, save_name): +def create_pie_chart(country ,db_key, date, pie_title, path, save_name): monthly_credential_by_tld = server_statistics.hkeys(db_key + date) @@ -37,19 +38,23 @@ def create_pie_chart(db_key, date, pie_title, path, save_name): labels = [] sizes = [] explode = [] # only "explode" the 2nd slice (i.e. 'Hogs') + explode_value = 0 for tld in mail_tld_top5: - labels.append(tld[0]) + labels.append(tld[0] +' ('+str(tld[1])+')') sizes.append(tld[1]) - explode.append(0) + explode.append(explode_value) + explode_value = explode_value +0.1 - nb_tld = server_statistics.hget(db_key + date, 'lu') + nb_tld = server_statistics.hget(db_key + date, country) if nb_tld is not None: nb_tld = int(nb_tld) else: nb_tld = 0 - labels.append('lu') - sizes.append(nb_tld) - explode.append(0.3) # only "explode" lu slice + country_label = country + ' ('+str(nb_tld)+')' + if country_label not in labels: + labels.append(country_label) + sizes.append(nb_tld) + explode.append(explode_value) explode = tuple(explode) fig1, ax1 = plt.subplots() @@ -85,13 +90,14 @@ def create_donut_chart(db_key, date, pie_title, path, save_name): recipe.append(tld[0]) data.append(tld[1]) - nb_tld = server_statistics.hget(db_key + date, 'lu') + nb_tld = server_statistics.hget(db_key + date, country) if nb_tld is not None: nb_tld = int(nb_tld) else: nb_tld = 0 - recipe.append('lu') - data.append(nb_tld) + if country not in recipe: + recipe.append(country) + data.append(nb_tld) fig1, ax1 = plt.subplots(figsize=(6, 3), subplot_kw=dict(aspect="equal")) @@ -118,6 +124,29 @@ def create_donut_chart(db_key, date, pie_title, path, save_name): if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='''This script is a part of the Analysis Information Leak + framework. It create pie charts on a country statistics".''', + epilog='Example: ./create_lu_graph.py 0 lu now, create_lu_graph.py 0 lu 201807') + + parser.add_argument('type', type=int, default=0, + help='''The graph type (default 0), + 0: all, + 1: credential_pie, + 2: mail_pie + 3: sqlinjection_pie, + 4: domain_pie,''', + choices=[0, 1, 2, 3, 4], action='store') + + parser.add_argument('country', type=str, default="de", + help='''The country code, de:default''', + action='store') + + parser.add_argument('date', type=str, default="now", + help='''month %Y%m, example: 201810''', action='store') + + args = parser.parse_args() + path = os.path.join(os.environ['AIL_HOME'], 'doc') # path to module config file config_section = 'ARDB_Statistics' @@ -131,7 +160,21 @@ if __name__ == '__main__': db=p.config.getint("ARDB_Statistics", "db"), decode_responses=True) - date = datetime.datetime.now().strftime("%Y%m") - create_pie_chart('credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png') - create_pie_chart('mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png') - create_pie_chart('SQLInjection_by_tld:', date, "AIL: sqlInjection by tld", path, 'AIL_sqlInjectionl_by_tld.png') + if args.date == 'now' or len(args.date) != 6: + date = datetime.datetime.now().strftime("%Y%m") + else: + date = args.date + + if args.type == 0: + create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png') + create_pie_chart(args.country, 'mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png') + create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: SQLInjection by tld", path, 'AIL_SQLInjection_by_tld.png') + create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png') + elif args.type == 1: + create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png') + elif args.type == 2: + create_pie_chart(args.country, 'mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png') + elif args.type == 3: + create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: sqlInjection by tld", path, 'AIL_sqlInjectionl_by_tld.png') + elif args.type == 4: + create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png') From d0135c248d9c91e6978e96aa2ea2e3c888f3079a Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 1 Aug 2018 15:26:29 +0200 Subject: [PATCH 06/10] add iban country statistic --- bin/BankAccount.py | 11 +++++++++++ doc/api/create_lu_graph.py | 8 ++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/bin/BankAccount.py b/bin/BankAccount.py index 58fa3e64..42baf535 100755 --- a/bin/BankAccount.py +++ b/bin/BankAccount.py @@ -11,6 +11,8 @@ It apply IBAN regexes on paste content and warn if above a threshold. import redis import time +import redis +import datetime import re import string from itertools import chain @@ -54,11 +56,13 @@ def check_all_iban(l_iban, paste, filename): iban = ''.join(e for e in iban if e.isalnum()) #iban = iban.upper() res = iban_regex_verify.findall(iban) + date = datetime.datetime.now().strftime("%Y%m") if res: print('checking '+iban) if is_valid_iban(iban): print('------') nb_valid_iban = nb_valid_iban + 1 + server_statistics.hincrby('iban_by_tld:'+date, iban[0:2], 1) if(nb_valid_iban > 0): to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) @@ -79,6 +83,13 @@ if __name__ == "__main__": p = Process(config_section) max_execution_time = p.config.getint("BankAccount", "max_execution_time") + # ARDB # + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) + publisher.info("BankAccount started") message = p.get_from_set() diff --git a/doc/api/create_lu_graph.py b/doc/api/create_lu_graph.py index 1c9440ef..c2a66769 100755 --- a/doc/api/create_lu_graph.py +++ b/doc/api/create_lu_graph.py @@ -135,8 +135,9 @@ if __name__ == '__main__': 1: credential_pie, 2: mail_pie 3: sqlinjection_pie, - 4: domain_pie,''', - choices=[0, 1, 2, 3, 4], action='store') + 4: domain_pie, + 5: iban_pie,''', + choices=[0, 1, 2, 3, 4, 5], action='store') parser.add_argument('country', type=str, default="de", help='''The country code, de:default''', @@ -170,6 +171,7 @@ if __name__ == '__main__': create_pie_chart(args.country, 'mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png') create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: SQLInjection by tld", path, 'AIL_SQLInjection_by_tld.png') create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png') + create_pie_chart(args.country.upper(), 'iban_by_tld:', date, "AIL: Iban by tld", path, 'AIL_iban_by_tld.png') elif args.type == 1: create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png') elif args.type == 2: @@ -178,3 +180,5 @@ if __name__ == '__main__': create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: sqlInjection by tld", path, 'AIL_sqlInjectionl_by_tld.png') elif args.type == 4: create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png') + elif args.type == 5: + create_pie_chart(args.country.upper(), 'iban_by_tld:', date, "AIL: Iban by tld", path, 'AIL_iban_by_tld.png') From 204e996fc3e24187154a44e5ca0a29487e5324b7 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 12 Sep 2018 11:21:11 +0200 Subject: [PATCH 07/10] chg: [statistics] clean scripts --- .gitignore | 1 + bin/BankAccount.py | 2 +- bin/DomClassifier.py | 42 +++-------- bin/DomainSubject.py | 70 +++++++++++++++++++ bin/Phone.py | 23 +----- .../create_graph_by_tld.py} | 25 ++++--- 6 files changed, 98 insertions(+), 65 deletions(-) create mode 100755 bin/DomainSubject.py rename doc/{api/create_lu_graph.py => statistics/create_graph_by_tld.py} (87%) diff --git a/.gitignore b/.gitignore index 11c01083..c4bd48c9 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,4 @@ doc/all_modules.txt # auto generated doc/module-data-flow.png doc/data-flow.png +doc/statistics diff --git a/bin/BankAccount.py b/bin/BankAccount.py index 42baf535..06e86d06 100755 --- a/bin/BankAccount.py +++ b/bin/BankAccount.py @@ -62,7 +62,7 @@ def check_all_iban(l_iban, paste, filename): if is_valid_iban(iban): print('------') nb_valid_iban = nb_valid_iban + 1 - server_statistics.hincrby('iban_by_tld:'+date, iban[0:2], 1) + server_statistics.hincrby('iban_by_country:'+date, iban[0:2], 1) if(nb_valid_iban > 0): to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) diff --git a/bin/DomClassifier.py b/bin/DomClassifier.py index 268607e8..aed87a55 100755 --- a/bin/DomClassifier.py +++ b/bin/DomClassifier.py @@ -10,8 +10,6 @@ the out output of the Global module. """ import time -import datetime -import redis from packages import Paste from pubsublogger import publisher @@ -28,13 +26,6 @@ def main(): p = Process(config_section) addr_dns = p.config.get("DomClassifier", "dns") - # ARDB # - server_statistics = redis.StrictRedis( - host=p.config.get("ARDB_Statistics", "host"), - port=p.config.getint("ARDB_Statistics", "port"), - db=p.config.getint("ARDB_Statistics", "db"), - decode_responses=True) - publisher.info("""ZMQ DomainClassifier is Running""") c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns]) @@ -55,31 +46,20 @@ def main(): paste = PST.get_p_content() mimetype = PST._get_p_encoding() - nb_domain = 0 - nb_tld_domain = 0 - if mimetype == "text/plain": c.text(rawtext=paste) c.potentialdomain() - valid = c.validdomain(rtype=['A'], extended=True) - nb_domain = len(set(valid)) - if nb_domain > 0: - localizeddomains = c.include(expression=cc_tld) - if localizeddomains: - nb_tld_domain = len(set(localizeddomains)) - publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( - PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path)) - - localizeddomains = c.localizedomain(cc=cc) - if localizeddomains: - nb_tld_domain = nb_tld_domain + len(set(localizeddomains)) - publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( - PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) - - date = datetime.datetime.now().strftime("%Y%m") - server_statistics.hincrby('domain_by_tld:'+date, 'ALL', nb_domain) - if nb_tld_domain > 0: - server_statistics.hincrby('domain_by_tld:'+date, cc, nb_tld_domain) + c.validdomain(rtype=['A'], extended=True) + localizeddomains = c.include(expression=cc_tld) + if localizeddomains: + print(localizeddomains) + publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( + PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path)) + localizeddomains = c.localizedomain(cc=cc) + if localizeddomains: + print(localizeddomains) + publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( + PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) except IOError: print("CRC Checksum Failed on :", PST.p_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( diff --git a/bin/DomainSubject.py b/bin/DomainSubject.py new file mode 100755 index 00000000..6db110da --- /dev/null +++ b/bin/DomainSubject.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + + +from packages import Paste +from Helper import Process +from pubsublogger import publisher + +import time +import redis +import newspaper + +from collections import defaultdict + +from newspaper import fulltext + +if __name__ == '__main__': + + publisher.port = 6380 + publisher.channel = "Script" + + publisher.info("Script DomainSubject started") + + config_section = 'DomainSubject' + p = Process(config_section) + + r_onion = redis.StrictRedis( + host=p.config.get("ARDB_Onion", "host"), + port=p.config.getint("ARDB_Onion", "port"), + db=p.config.getint("ARDB_Onion", "db"), + decode_responses=True) + + + while True: + + # format: + domain = p.get_from_set() + domain = 'easycoinsayj7p5l.onion' + + if domain is not None: + + #retrieve all crawled pastes + set_crawles_pastes = r_onion.smembers('temp:crawled_domain_pastes:{}'.format(domain)) + if set_crawles_pastes: + dict_keyword = defaultdict(int) + + for paste_path in set_crawles_pastes: + + paste = Paste.Paste(paste_path) + content = paste.get_p_content() + + article = newspaper.Article(url='') + article.set_html(content) + article.parse() + article.nlp() + + for keyword in article.keywords: + dict_keyword[keyword] += 1 + + + if dict_keyword: + res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)] + for item in res: + print(item) + else: + print('no keywords found') + time.sleep(60) + + else: + time.sleep(5) diff --git a/bin/Phone.py b/bin/Phone.py index 3d579f4e..213db2b3 100755 --- a/bin/Phone.py +++ b/bin/Phone.py @@ -11,9 +11,7 @@ It apply phone number regexes on paste content and warn if above a threshold. """ -import datetime import time -import redis import re import phonenumbers from packages import Paste @@ -25,10 +23,8 @@ def search_phone(message): paste = Paste.Paste(message) content = paste.get_p_content() # regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required) - #reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})') - #reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})') - # use non capturing group - reg_phone = re.compile(r'(?:\+\d{1,4}(?:\(\d\))?\d?|0\d?)(?:\d{6,8}|(?:[-/\. ]{1}\(?\d{2,4}\)?){3,4})') + reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})') + reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})') # list of the regex results in the Paste, may be null results = reg_phone.findall(content) @@ -49,23 +45,17 @@ def search_phone(message): for phone_number in results: try: x = phonenumbers.parse(phone_number, None) - print(x) country_code = x.country_code if stats.get(country_code) is None: stats[country_code] = 1 else: stats[country_code] = stats[country_code] + 1 - except Exception as e: - #print(e) + except: pass - - date = datetime.datetime.now().strftime("%Y%m") for country_code in stats: - print(country_code) if stats[country_code] > 4: publisher.warning('{} contains Phone numbers with country code {}'.format(paste.p_name, country_code)) - if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # Port of the redis instance used by pubsublogger @@ -82,13 +72,6 @@ if __name__ == '__main__': # Sent to the logging a description of the module publisher.info("Run Phone module") - # ARDB # - server_statistics = redis.StrictRedis( - host=p.config.get("ARDB_Statistics", "host"), - port=p.config.getint("ARDB_Statistics", "port"), - db=p.config.getint("ARDB_Statistics", "db"), - decode_responses=True) - # Endless loop getting messages from the input queue while True: # Get one message from the input queue diff --git a/doc/api/create_lu_graph.py b/doc/statistics/create_graph_by_tld.py similarity index 87% rename from doc/api/create_lu_graph.py rename to doc/statistics/create_graph_by_tld.py index c2a66769..e1fbbe97 100755 --- a/doc/api/create_lu_graph.py +++ b/doc/statistics/create_graph_by_tld.py @@ -2,7 +2,9 @@ # -*-coding:UTF-8 -* ''' -lu +Create statistics pie charts by tld + +Default tld: lu ''' import os @@ -64,7 +66,7 @@ def create_pie_chart(country ,db_key, date, pie_title, path, save_name): ax1.set_title(pie_title) #plt.show() - plt.savefig(os.path.join(path, save_name)) + plt.savefig(os.path.join(path,save_name)) plt.close(fig1) def create_donut_chart(db_key, date, pie_title, path, save_name): @@ -126,7 +128,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser( description='''This script is a part of the Analysis Information Leak - framework. It create pie charts on a country statistics".''', + framework. Create statistics pie charts".''', epilog='Example: ./create_lu_graph.py 0 lu now, create_lu_graph.py 0 lu 201807') parser.add_argument('type', type=int, default=0, @@ -135,12 +137,11 @@ if __name__ == '__main__': 1: credential_pie, 2: mail_pie 3: sqlinjection_pie, - 4: domain_pie, - 5: iban_pie,''', - choices=[0, 1, 2, 3, 4, 5], action='store') + 4: iban_pie,''', + choices=[0, 1, 2, 3, 4], action='store') - parser.add_argument('country', type=str, default="de", - help='''The country code, de:default''', + parser.add_argument('country', type=str, default="lu", + help='''The country code, lu:default''', action='store') parser.add_argument('date', type=str, default="now", @@ -148,7 +149,7 @@ if __name__ == '__main__': args = parser.parse_args() - path = os.path.join(os.environ['AIL_HOME'], 'doc') # path to module config file + path = os.path.join(os.environ['AIL_HOME'], 'doc', 'statistics') # save path config_section = 'ARDB_Statistics' @@ -171,7 +172,7 @@ if __name__ == '__main__': create_pie_chart(args.country, 'mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png') create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: SQLInjection by tld", path, 'AIL_SQLInjection_by_tld.png') create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png') - create_pie_chart(args.country.upper(), 'iban_by_tld:', date, "AIL: Iban by tld", path, 'AIL_iban_by_tld.png') + create_pie_chart(args.country.upper(), 'iban_by_country:', date, "AIL: Iban by country", path, 'AIL_iban_by_country.png') elif args.type == 1: create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png') elif args.type == 2: @@ -179,6 +180,4 @@ if __name__ == '__main__': elif args.type == 3: create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: sqlInjection by tld", path, 'AIL_sqlInjectionl_by_tld.png') elif args.type == 4: - create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png') - elif args.type == 5: - create_pie_chart(args.country.upper(), 'iban_by_tld:', date, "AIL: Iban by tld", path, 'AIL_iban_by_tld.png') + create_pie_chart(args.country.upper(), 'iban_by_country:', date, "AIL: Iban by country", path, 'AIL_iban_by_country.png') From c12df315333ceac4529c450e2537a8921ecddf06 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 12 Sep 2018 14:02:01 +0200 Subject: [PATCH 08/10] fix: [statistics] remove unused graph --- doc/statistics/create_graph_by_tld.py | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/statistics/create_graph_by_tld.py b/doc/statistics/create_graph_by_tld.py index e1fbbe97..e21ac736 100755 --- a/doc/statistics/create_graph_by_tld.py +++ b/doc/statistics/create_graph_by_tld.py @@ -171,7 +171,6 @@ if __name__ == '__main__': create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png') create_pie_chart(args.country, 'mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png') create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: SQLInjection by tld", path, 'AIL_SQLInjection_by_tld.png') - create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png') create_pie_chart(args.country.upper(), 'iban_by_country:', date, "AIL: Iban by country", path, 'AIL_iban_by_country.png') elif args.type == 1: create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png') From a94a0fe1a74da3c111223bb753da430a0010c68e Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 12 Sep 2018 19:10:49 +0200 Subject: [PATCH 09/10] fix: [Install] add python requirements --- bin/DomainSubject.py | 70 ----------------------------------- pip3_packages_requirement.txt | 3 ++ 2 files changed, 3 insertions(+), 70 deletions(-) delete mode 100755 bin/DomainSubject.py diff --git a/bin/DomainSubject.py b/bin/DomainSubject.py deleted file mode 100755 index 6db110da..00000000 --- a/bin/DomainSubject.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - - -from packages import Paste -from Helper import Process -from pubsublogger import publisher - -import time -import redis -import newspaper - -from collections import defaultdict - -from newspaper import fulltext - -if __name__ == '__main__': - - publisher.port = 6380 - publisher.channel = "Script" - - publisher.info("Script DomainSubject started") - - config_section = 'DomainSubject' - p = Process(config_section) - - r_onion = redis.StrictRedis( - host=p.config.get("ARDB_Onion", "host"), - port=p.config.getint("ARDB_Onion", "port"), - db=p.config.getint("ARDB_Onion", "db"), - decode_responses=True) - - - while True: - - # format: - domain = p.get_from_set() - domain = 'easycoinsayj7p5l.onion' - - if domain is not None: - - #retrieve all crawled pastes - set_crawles_pastes = r_onion.smembers('temp:crawled_domain_pastes:{}'.format(domain)) - if set_crawles_pastes: - dict_keyword = defaultdict(int) - - for paste_path in set_crawles_pastes: - - paste = Paste.Paste(paste_path) - content = paste.get_p_content() - - article = newspaper.Article(url='') - article.set_html(content) - article.parse() - article.nlp() - - for keyword in article.keywords: - dict_keyword[keyword] += 1 - - - if dict_keyword: - res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)] - for item in res: - print(item) - else: - print('no keywords found') - time.sleep(60) - - else: - time.sleep(5) diff --git a/pip3_packages_requirement.txt b/pip3_packages_requirement.txt index 53ec97e7..dd447d5c 100644 --- a/pip3_packages_requirement.txt +++ b/pip3_packages_requirement.txt @@ -70,3 +70,6 @@ https://github.com/saffsd/langid.py/archive/master.zip #LibInjection bindings pylibinjection + +# Graph +matplotlib From 86cacbc51fe806e093b9e56ab1e39e94d028a396 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 12 Sep 2018 19:30:00 +0200 Subject: [PATCH 10/10] fix: [Install] add python and package requirements --- doc/statistics/create_graph_by_tld.py | 2 ++ installing_deps.sh | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/statistics/create_graph_by_tld.py b/doc/statistics/create_graph_by_tld.py index e21ac736..3ab3e1cc 100755 --- a/doc/statistics/create_graph_by_tld.py +++ b/doc/statistics/create_graph_by_tld.py @@ -7,6 +7,8 @@ Create statistics pie charts by tld Default tld: lu ''' +import matplotlib +matplotlib.use('Agg') import os import sys import redis diff --git a/installing_deps.sh b/installing_deps.sh index c29ba4b9..975830f4 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -5,7 +5,7 @@ set -x sudo apt-get update -sudo apt-get install python3-pip python-virtualenv python3-dev libfreetype6-dev \ +sudo apt-get install python3-pip python-virtualenv python3-dev python3-tk libfreetype6-dev \ screen g++ python-tk unzip libsnappy-dev cmake -y #optional tor install