chg: [statistics] clean scripts

This commit is contained in:
Terrtia 2018-09-12 11:21:11 +02:00
parent ea3d2c1977
commit 204e996fc3
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
6 changed files with 98 additions and 65 deletions

1
.gitignore vendored
View file

@ -40,3 +40,4 @@ doc/all_modules.txt
# auto generated
doc/module-data-flow.png
doc/data-flow.png
doc/statistics

View file

@ -62,7 +62,7 @@ def check_all_iban(l_iban, paste, filename):
if is_valid_iban(iban):
print('------')
nb_valid_iban = nb_valid_iban + 1
server_statistics.hincrby('iban_by_tld:'+date, iban[0:2], 1)
server_statistics.hincrby('iban_by_country:'+date, iban[0:2], 1)
if(nb_valid_iban > 0):
to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name)

View file

@ -10,8 +10,6 @@ the out output of the Global module.
"""
import time
import datetime
import redis
from packages import Paste
from pubsublogger import publisher
@ -28,13 +26,6 @@ def main():
p = Process(config_section)
addr_dns = p.config.get("DomClassifier", "dns")
# ARDB #
server_statistics = redis.StrictRedis(
host=p.config.get("ARDB_Statistics", "host"),
port=p.config.getint("ARDB_Statistics", "port"),
db=p.config.getint("ARDB_Statistics", "db"),
decode_responses=True)
publisher.info("""ZMQ DomainClassifier is Running""")
c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
@ -55,31 +46,20 @@ def main():
paste = PST.get_p_content()
mimetype = PST._get_p_encoding()
nb_domain = 0
nb_tld_domain = 0
if mimetype == "text/plain":
c.text(rawtext=paste)
c.potentialdomain()
valid = c.validdomain(rtype=['A'], extended=True)
nb_domain = len(set(valid))
if nb_domain > 0:
localizeddomains = c.include(expression=cc_tld)
if localizeddomains:
nb_tld_domain = len(set(localizeddomains))
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path))
localizeddomains = c.localizedomain(cc=cc)
if localizeddomains:
nb_tld_domain = nb_tld_domain + len(set(localizeddomains))
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
date = datetime.datetime.now().strftime("%Y%m")
server_statistics.hincrby('domain_by_tld:'+date, 'ALL', nb_domain)
if nb_tld_domain > 0:
server_statistics.hincrby('domain_by_tld:'+date, cc, nb_tld_domain)
c.validdomain(rtype=['A'], extended=True)
localizeddomains = c.include(expression=cc_tld)
if localizeddomains:
print(localizeddomains)
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path))
localizeddomains = c.localizedomain(cc=cc)
if localizeddomains:
print(localizeddomains)
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
except IOError:
print("CRC Checksum Failed on :", PST.p_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(

70
bin/DomainSubject.py Executable file
View file

@ -0,0 +1,70 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
from packages import Paste
from Helper import Process
from pubsublogger import publisher
import time
import redis
import newspaper
from collections import defaultdict
from newspaper import fulltext
if __name__ == '__main__':
publisher.port = 6380
publisher.channel = "Script"
publisher.info("Script DomainSubject started")
config_section = 'DomainSubject'
p = Process(config_section)
r_onion = redis.StrictRedis(
host=p.config.get("ARDB_Onion", "host"),
port=p.config.getint("ARDB_Onion", "port"),
db=p.config.getint("ARDB_Onion", "db"),
decode_responses=True)
while True:
# format: <domain>
domain = p.get_from_set()
domain = 'easycoinsayj7p5l.onion'
if domain is not None:
#retrieve all crawled pastes
set_crawles_pastes = r_onion.smembers('temp:crawled_domain_pastes:{}'.format(domain))
if set_crawles_pastes:
dict_keyword = defaultdict(int)
for paste_path in set_crawles_pastes:
paste = Paste.Paste(paste_path)
content = paste.get_p_content()
article = newspaper.Article(url='')
article.set_html(content)
article.parse()
article.nlp()
for keyword in article.keywords:
dict_keyword[keyword] += 1
if dict_keyword:
res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)]
for item in res:
print(item)
else:
print('no keywords found')
time.sleep(60)
else:
time.sleep(5)

View file

@ -11,9 +11,7 @@ It apply phone number regexes on paste content and warn if above a threshold.
"""
import datetime
import time
import redis
import re
import phonenumbers
from packages import Paste
@ -25,10 +23,8 @@ def search_phone(message):
paste = Paste.Paste(message)
content = paste.get_p_content()
# regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
#reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
#reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
# use non capturing group
reg_phone = re.compile(r'(?:\+\d{1,4}(?:\(\d\))?\d?|0\d?)(?:\d{6,8}|(?:[-/\. ]{1}\(?\d{2,4}\)?){3,4})')
reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
# list of the regex results in the Paste, may be null
results = reg_phone.findall(content)
@ -49,23 +45,17 @@ def search_phone(message):
for phone_number in results:
try:
x = phonenumbers.parse(phone_number, None)
print(x)
country_code = x.country_code
if stats.get(country_code) is None:
stats[country_code] = 1
else:
stats[country_code] = stats[country_code] + 1
except Exception as e:
#print(e)
except:
pass
date = datetime.datetime.now().strftime("%Y%m")
for country_code in stats:
print(country_code)
if stats[country_code] > 4:
publisher.warning('{} contains Phone numbers with country code {}'.format(paste.p_name, country_code))
if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
# Port of the redis instance used by pubsublogger
@ -82,13 +72,6 @@ if __name__ == '__main__':
# Sent to the logging a description of the module
publisher.info("Run Phone module")
# ARDB #
server_statistics = redis.StrictRedis(
host=p.config.get("ARDB_Statistics", "host"),
port=p.config.getint("ARDB_Statistics", "port"),
db=p.config.getint("ARDB_Statistics", "db"),
decode_responses=True)
# Endless loop getting messages from the input queue
while True:
# Get one message from the input queue

View file

@ -2,7 +2,9 @@
# -*-coding:UTF-8 -*
'''
lu
Create statistics pie charts by tld
Default tld: lu
'''
import os
@ -64,7 +66,7 @@ def create_pie_chart(country ,db_key, date, pie_title, path, save_name):
ax1.set_title(pie_title)
#plt.show()
plt.savefig(os.path.join(path, save_name))
plt.savefig(os.path.join(path,save_name))
plt.close(fig1)
def create_donut_chart(db_key, date, pie_title, path, save_name):
@ -126,7 +128,7 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='''This script is a part of the Analysis Information Leak
framework. It create pie charts on a country statistics".''',
framework. Create statistics pie charts".''',
epilog='Example: ./create_lu_graph.py 0 lu now, create_lu_graph.py 0 lu 201807')
parser.add_argument('type', type=int, default=0,
@ -135,12 +137,11 @@ if __name__ == '__main__':
1: credential_pie,
2: mail_pie
3: sqlinjection_pie,
4: domain_pie,
5: iban_pie,''',
choices=[0, 1, 2, 3, 4, 5], action='store')
4: iban_pie,''',
choices=[0, 1, 2, 3, 4], action='store')
parser.add_argument('country', type=str, default="de",
help='''The country code, de:default''',
parser.add_argument('country', type=str, default="lu",
help='''The country code, lu:default''',
action='store')
parser.add_argument('date', type=str, default="now",
@ -148,7 +149,7 @@ if __name__ == '__main__':
args = parser.parse_args()
path = os.path.join(os.environ['AIL_HOME'], 'doc') # path to module config file
path = os.path.join(os.environ['AIL_HOME'], 'doc', 'statistics') # save path
config_section = 'ARDB_Statistics'
@ -171,7 +172,7 @@ if __name__ == '__main__':
create_pie_chart(args.country, 'mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png')
create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: SQLInjection by tld", path, 'AIL_SQLInjection_by_tld.png')
create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png')
create_pie_chart(args.country.upper(), 'iban_by_tld:', date, "AIL: Iban by tld", path, 'AIL_iban_by_tld.png')
create_pie_chart(args.country.upper(), 'iban_by_country:', date, "AIL: Iban by country", path, 'AIL_iban_by_country.png')
elif args.type == 1:
create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png')
elif args.type == 2:
@ -179,6 +180,4 @@ if __name__ == '__main__':
elif args.type == 3:
create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: sqlInjection by tld", path, 'AIL_sqlInjectionl_by_tld.png')
elif args.type == 4:
create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png')
elif args.type == 5:
create_pie_chart(args.country.upper(), 'iban_by_tld:', date, "AIL: Iban by tld", path, 'AIL_iban_by_tld.png')
create_pie_chart(args.country.upper(), 'iban_by_country:', date, "AIL: Iban by country", path, 'AIL_iban_by_country.png')