mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 00:28:22 +00:00
chg: [statistics] clean scripts
This commit is contained in:
parent
ea3d2c1977
commit
204e996fc3
6 changed files with 98 additions and 65 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -40,3 +40,4 @@ doc/all_modules.txt
|
|||
# auto generated
|
||||
doc/module-data-flow.png
|
||||
doc/data-flow.png
|
||||
doc/statistics
|
||||
|
|
|
@ -62,7 +62,7 @@ def check_all_iban(l_iban, paste, filename):
|
|||
if is_valid_iban(iban):
|
||||
print('------')
|
||||
nb_valid_iban = nb_valid_iban + 1
|
||||
server_statistics.hincrby('iban_by_tld:'+date, iban[0:2], 1)
|
||||
server_statistics.hincrby('iban_by_country:'+date, iban[0:2], 1)
|
||||
|
||||
if(nb_valid_iban > 0):
|
||||
to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name)
|
||||
|
|
|
@ -10,8 +10,6 @@ the out output of the Global module.
|
|||
|
||||
"""
|
||||
import time
|
||||
import datetime
|
||||
import redis
|
||||
from packages import Paste
|
||||
from pubsublogger import publisher
|
||||
|
||||
|
@ -28,13 +26,6 @@ def main():
|
|||
p = Process(config_section)
|
||||
addr_dns = p.config.get("DomClassifier", "dns")
|
||||
|
||||
# ARDB #
|
||||
server_statistics = redis.StrictRedis(
|
||||
host=p.config.get("ARDB_Statistics", "host"),
|
||||
port=p.config.getint("ARDB_Statistics", "port"),
|
||||
db=p.config.getint("ARDB_Statistics", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
publisher.info("""ZMQ DomainClassifier is Running""")
|
||||
|
||||
c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
|
||||
|
@ -55,31 +46,20 @@ def main():
|
|||
paste = PST.get_p_content()
|
||||
mimetype = PST._get_p_encoding()
|
||||
|
||||
nb_domain = 0
|
||||
nb_tld_domain = 0
|
||||
|
||||
if mimetype == "text/plain":
|
||||
c.text(rawtext=paste)
|
||||
c.potentialdomain()
|
||||
valid = c.validdomain(rtype=['A'], extended=True)
|
||||
nb_domain = len(set(valid))
|
||||
if nb_domain > 0:
|
||||
localizeddomains = c.include(expression=cc_tld)
|
||||
if localizeddomains:
|
||||
nb_tld_domain = len(set(localizeddomains))
|
||||
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
|
||||
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path))
|
||||
|
||||
localizeddomains = c.localizedomain(cc=cc)
|
||||
if localizeddomains:
|
||||
nb_tld_domain = nb_tld_domain + len(set(localizeddomains))
|
||||
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
|
||||
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
|
||||
|
||||
date = datetime.datetime.now().strftime("%Y%m")
|
||||
server_statistics.hincrby('domain_by_tld:'+date, 'ALL', nb_domain)
|
||||
if nb_tld_domain > 0:
|
||||
server_statistics.hincrby('domain_by_tld:'+date, cc, nb_tld_domain)
|
||||
c.validdomain(rtype=['A'], extended=True)
|
||||
localizeddomains = c.include(expression=cc_tld)
|
||||
if localizeddomains:
|
||||
print(localizeddomains)
|
||||
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
|
||||
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path))
|
||||
localizeddomains = c.localizedomain(cc=cc)
|
||||
if localizeddomains:
|
||||
print(localizeddomains)
|
||||
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
|
||||
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
|
||||
except IOError:
|
||||
print("CRC Checksum Failed on :", PST.p_path)
|
||||
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
|
||||
|
|
70
bin/DomainSubject.py
Executable file
70
bin/DomainSubject.py
Executable file
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
|
||||
from packages import Paste
|
||||
from Helper import Process
|
||||
from pubsublogger import publisher
|
||||
|
||||
import time
|
||||
import redis
|
||||
import newspaper
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
from newspaper import fulltext
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
publisher.port = 6380
|
||||
publisher.channel = "Script"
|
||||
|
||||
publisher.info("Script DomainSubject started")
|
||||
|
||||
config_section = 'DomainSubject'
|
||||
p = Process(config_section)
|
||||
|
||||
r_onion = redis.StrictRedis(
|
||||
host=p.config.get("ARDB_Onion", "host"),
|
||||
port=p.config.getint("ARDB_Onion", "port"),
|
||||
db=p.config.getint("ARDB_Onion", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
|
||||
while True:
|
||||
|
||||
# format: <domain>
|
||||
domain = p.get_from_set()
|
||||
domain = 'easycoinsayj7p5l.onion'
|
||||
|
||||
if domain is not None:
|
||||
|
||||
#retrieve all crawled pastes
|
||||
set_crawles_pastes = r_onion.smembers('temp:crawled_domain_pastes:{}'.format(domain))
|
||||
if set_crawles_pastes:
|
||||
dict_keyword = defaultdict(int)
|
||||
|
||||
for paste_path in set_crawles_pastes:
|
||||
|
||||
paste = Paste.Paste(paste_path)
|
||||
content = paste.get_p_content()
|
||||
|
||||
article = newspaper.Article(url='')
|
||||
article.set_html(content)
|
||||
article.parse()
|
||||
article.nlp()
|
||||
|
||||
for keyword in article.keywords:
|
||||
dict_keyword[keyword] += 1
|
||||
|
||||
|
||||
if dict_keyword:
|
||||
res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)]
|
||||
for item in res:
|
||||
print(item)
|
||||
else:
|
||||
print('no keywords found')
|
||||
time.sleep(60)
|
||||
|
||||
else:
|
||||
time.sleep(5)
|
23
bin/Phone.py
23
bin/Phone.py
|
@ -11,9 +11,7 @@ It apply phone number regexes on paste content and warn if above a threshold.
|
|||
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import time
|
||||
import redis
|
||||
import re
|
||||
import phonenumbers
|
||||
from packages import Paste
|
||||
|
@ -25,10 +23,8 @@ def search_phone(message):
|
|||
paste = Paste.Paste(message)
|
||||
content = paste.get_p_content()
|
||||
# regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
|
||||
#reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
|
||||
#reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
|
||||
# use non capturing group
|
||||
reg_phone = re.compile(r'(?:\+\d{1,4}(?:\(\d\))?\d?|0\d?)(?:\d{6,8}|(?:[-/\. ]{1}\(?\d{2,4}\)?){3,4})')
|
||||
reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
|
||||
reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
|
||||
# list of the regex results in the Paste, may be null
|
||||
results = reg_phone.findall(content)
|
||||
|
||||
|
@ -49,23 +45,17 @@ def search_phone(message):
|
|||
for phone_number in results:
|
||||
try:
|
||||
x = phonenumbers.parse(phone_number, None)
|
||||
print(x)
|
||||
country_code = x.country_code
|
||||
if stats.get(country_code) is None:
|
||||
stats[country_code] = 1
|
||||
else:
|
||||
stats[country_code] = stats[country_code] + 1
|
||||
except Exception as e:
|
||||
#print(e)
|
||||
except:
|
||||
pass
|
||||
|
||||
date = datetime.datetime.now().strftime("%Y%m")
|
||||
for country_code in stats:
|
||||
print(country_code)
|
||||
if stats[country_code] > 4:
|
||||
publisher.warning('{} contains Phone numbers with country code {}'.format(paste.p_name, country_code))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
||||
# Port of the redis instance used by pubsublogger
|
||||
|
@ -82,13 +72,6 @@ if __name__ == '__main__':
|
|||
# Sent to the logging a description of the module
|
||||
publisher.info("Run Phone module")
|
||||
|
||||
# ARDB #
|
||||
server_statistics = redis.StrictRedis(
|
||||
host=p.config.get("ARDB_Statistics", "host"),
|
||||
port=p.config.getint("ARDB_Statistics", "port"),
|
||||
db=p.config.getint("ARDB_Statistics", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
# Endless loop getting messages from the input queue
|
||||
while True:
|
||||
# Get one message from the input queue
|
||||
|
|
|
@ -2,7 +2,9 @@
|
|||
# -*-coding:UTF-8 -*
|
||||
|
||||
'''
|
||||
lu
|
||||
Create statistics pie charts by tld
|
||||
|
||||
Default tld: lu
|
||||
'''
|
||||
|
||||
import os
|
||||
|
@ -64,7 +66,7 @@ def create_pie_chart(country ,db_key, date, pie_title, path, save_name):
|
|||
|
||||
ax1.set_title(pie_title)
|
||||
#plt.show()
|
||||
plt.savefig(os.path.join(path, save_name))
|
||||
plt.savefig(os.path.join(path,save_name))
|
||||
plt.close(fig1)
|
||||
|
||||
def create_donut_chart(db_key, date, pie_title, path, save_name):
|
||||
|
@ -126,7 +128,7 @@ if __name__ == '__main__':
|
|||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='''This script is a part of the Analysis Information Leak
|
||||
framework. It create pie charts on a country statistics".''',
|
||||
framework. Create statistics pie charts".''',
|
||||
epilog='Example: ./create_lu_graph.py 0 lu now, create_lu_graph.py 0 lu 201807')
|
||||
|
||||
parser.add_argument('type', type=int, default=0,
|
||||
|
@ -135,12 +137,11 @@ if __name__ == '__main__':
|
|||
1: credential_pie,
|
||||
2: mail_pie
|
||||
3: sqlinjection_pie,
|
||||
4: domain_pie,
|
||||
5: iban_pie,''',
|
||||
choices=[0, 1, 2, 3, 4, 5], action='store')
|
||||
4: iban_pie,''',
|
||||
choices=[0, 1, 2, 3, 4], action='store')
|
||||
|
||||
parser.add_argument('country', type=str, default="de",
|
||||
help='''The country code, de:default''',
|
||||
parser.add_argument('country', type=str, default="lu",
|
||||
help='''The country code, lu:default''',
|
||||
action='store')
|
||||
|
||||
parser.add_argument('date', type=str, default="now",
|
||||
|
@ -148,7 +149,7 @@ if __name__ == '__main__':
|
|||
|
||||
args = parser.parse_args()
|
||||
|
||||
path = os.path.join(os.environ['AIL_HOME'], 'doc') # path to module config file
|
||||
path = os.path.join(os.environ['AIL_HOME'], 'doc', 'statistics') # save path
|
||||
|
||||
config_section = 'ARDB_Statistics'
|
||||
|
||||
|
@ -171,7 +172,7 @@ if __name__ == '__main__':
|
|||
create_pie_chart(args.country, 'mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png')
|
||||
create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: SQLInjection by tld", path, 'AIL_SQLInjection_by_tld.png')
|
||||
create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png')
|
||||
create_pie_chart(args.country.upper(), 'iban_by_tld:', date, "AIL: Iban by tld", path, 'AIL_iban_by_tld.png')
|
||||
create_pie_chart(args.country.upper(), 'iban_by_country:', date, "AIL: Iban by country", path, 'AIL_iban_by_country.png')
|
||||
elif args.type == 1:
|
||||
create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png')
|
||||
elif args.type == 2:
|
||||
|
@ -179,6 +180,4 @@ if __name__ == '__main__':
|
|||
elif args.type == 3:
|
||||
create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: sqlInjection by tld", path, 'AIL_sqlInjectionl_by_tld.png')
|
||||
elif args.type == 4:
|
||||
create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png')
|
||||
elif args.type == 5:
|
||||
create_pie_chart(args.country.upper(), 'iban_by_tld:', date, "AIL: Iban by tld", path, 'AIL_iban_by_tld.png')
|
||||
create_pie_chart(args.country.upper(), 'iban_by_country:', date, "AIL: Iban by country", path, 'AIL_iban_by_country.png')
|
Loading…
Reference in a new issue