mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
chg: [statistics] ARDB migration
This commit is contained in:
parent
d27d47dc70
commit
aa6ba61050
13 changed files with 482 additions and 420 deletions
|
@ -16,6 +16,13 @@ import re
|
|||
import string
|
||||
from itertools import chain
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages #
|
||||
##################################
|
||||
from lib import Statistics
|
||||
|
||||
|
||||
from packages import Item
|
||||
from pubsublogger import publisher
|
||||
|
||||
|
@ -48,6 +55,7 @@ def is_valid_iban(iban):
|
|||
return True
|
||||
return False
|
||||
|
||||
# # TODO: SET
|
||||
def check_all_iban(l_iban, obj_id):
|
||||
nb_valid_iban = 0
|
||||
for iban in l_iban:
|
||||
|
@ -61,7 +69,8 @@ def check_all_iban(l_iban, obj_id):
|
|||
if is_valid_iban(iban):
|
||||
print('------')
|
||||
nb_valid_iban = nb_valid_iban + 1
|
||||
server_statistics.hincrby('iban_by_country:'+date, iban[0:2], 1)
|
||||
Statistics.add_iban_country_stats_by_date(date, iban[0:2], 1)
|
||||
|
||||
|
||||
if(nb_valid_iban > 0):
|
||||
to_print = 'Iban;{};{};{};'.format(Item.get_source(obj_id), Item.get_item_date(obj_id), Item.get_basename(obj_id))
|
||||
|
@ -70,9 +79,6 @@ def check_all_iban(l_iban, obj_id):
|
|||
msg = 'infoleak:automatic-detection="iban";{}'.format(obj_id)
|
||||
p.populate_set_out(msg, 'Tags')
|
||||
|
||||
#Send to duplicate
|
||||
p.populate_set_out(obj_id, 'Duplicate')
|
||||
|
||||
if __name__ == "__main__":
|
||||
publisher.port = 6380
|
||||
publisher.channel = "Script"
|
||||
|
@ -82,13 +88,6 @@ if __name__ == "__main__":
|
|||
p = Process(config_section)
|
||||
max_execution_time = p.config.getint("BankAccount", "max_execution_time")
|
||||
|
||||
# ARDB #
|
||||
server_statistics = redis.StrictRedis(
|
||||
host=p.config.get("ARDB_Statistics", "host"),
|
||||
port=p.config.getint("ARDB_Statistics", "port"),
|
||||
db=p.config.getint("ARDB_Statistics", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
publisher.info("BankAccount started")
|
||||
|
||||
#iban_regex = re.compile(r'\b[A-Za-z]{2}[0-9]{2}(?:[ ]?[0-9]{4}){4}(?:[ ]?[0-9]{1,2})?\b')
|
||||
|
|
|
@ -314,14 +314,6 @@ if __name__ == '__main__':
|
|||
|
||||
print('splash url: {}'.format(splash_url))
|
||||
|
||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
|
||||
|
||||
r_serv_metadata = redis.StrictRedis(
|
||||
host=p.config.get("ARDB_Metadata", "host"),
|
||||
port=p.config.getint("ARDB_Metadata", "port"),
|
||||
db=p.config.getint("ARDB_Metadata", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
r_cache = redis.StrictRedis(
|
||||
host=p.config.get("Redis_Cache", "host"),
|
||||
port=p.config.getint("Redis_Cache", "port"),
|
||||
|
|
|
@ -15,6 +15,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib import Statistics
|
||||
from lib import Tag
|
||||
from lib import Users
|
||||
from lib.objects import Decodeds
|
||||
|
@ -35,6 +36,8 @@ r_serv_tracker = config_loader.get_redis_conn("ARDB_Tracker")
|
|||
r_serv_tags = config_loader.get_redis_conn("ARDB_Tags")
|
||||
r_crawler = config_loader.get_redis_conn("ARDB_Onion")
|
||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
||||
r_serv_trend = config_loader.get_redis_conn("ARDB_Trending")
|
||||
r_statistics = config_loader.get_redis_conn("ARDB_Statistics")
|
||||
config_loader = None
|
||||
# # - - CONFIGS - - # #
|
||||
|
||||
|
@ -358,9 +361,16 @@ def items_migration():
|
|||
def get_last_crawled_domains(domain_type):
|
||||
return r_crawler.lrange(f'last_{domain_type}', 0 ,-1)
|
||||
|
||||
def get_domains_blacklist(domain_type):
|
||||
return r_crawler.smembers(f'blacklist_{domain_type}')
|
||||
|
||||
def crawler_migration():
|
||||
print('CRAWLER MIGRATION...')
|
||||
|
||||
for domain_type in ['onion', 'regular']:
|
||||
for domain in get_domains_blacklist(domain_type):
|
||||
crawlers.add_domain_blacklist(domain_type, domain)
|
||||
|
||||
# for domain_type in ['onion', 'regular']:
|
||||
# for row in get_last_crawled_domains(domain_type):
|
||||
# dom_row, epoch = row.rsplit(';', 1)
|
||||
|
@ -368,19 +378,18 @@ def crawler_migration():
|
|||
# print(domain, port, epoch)
|
||||
# #crawlers.add_last_crawled_domain(domain_type, domain, port, epoch)
|
||||
|
||||
for cookiejar_uuid in old_crawlers.get_all_cookiejar():
|
||||
meta = old_crawlers.get_cookiejar_metadata(cookiejar_uuid, level=True)
|
||||
#print(meta)
|
||||
#crawlers.create_cookiejar(meta['user_id'], level=meta['level'], description=meta['description'], cookiejar_uuid=cookiejar_uuid)
|
||||
#crawlers._set_cookiejar_date(meta['date'])
|
||||
|
||||
for meta_cookie, cookie_uuid in old_crawlers.get_cookiejar_cookies_list(cookiejar_uuid, add_cookie_uuid=True):
|
||||
print(cookie_uuid)
|
||||
#crawlers.add_cookie_to_cookiejar(cookiejar_uuid, meta_cookie, cookie_uuid=cookie_uuid)
|
||||
# for cookiejar_uuid in old_crawlers.get_all_cookiejar():
|
||||
# meta = old_crawlers.get_cookiejar_metadata(cookiejar_uuid, level=True)
|
||||
# #print(meta)
|
||||
# crawlers.create_cookiejar(meta['user_id'], level=meta['level'], description=meta['description'], cookiejar_uuid=cookiejar_uuid)
|
||||
# crawlers._set_cookiejar_date(meta['date'])
|
||||
#
|
||||
# for meta_cookie, cookie_uuid in old_crawlers.get_cookiejar_cookies_list(cookiejar_uuid, add_cookie_uuid=True):
|
||||
# print(cookie_uuid)
|
||||
# crawlers.add_cookie_to_cookiejar(cookiejar_uuid, meta_cookie, cookie_uuid=cookie_uuid)
|
||||
|
||||
# TODO: auto crawler -> to Fix / change
|
||||
|
||||
|
||||
# TODO: crawlers queues
|
||||
|
||||
###############################
|
||||
|
@ -689,12 +698,89 @@ def subtypes_obj_migration():
|
|||
#
|
||||
# Credential:
|
||||
# HSET 'credential_by_tld:'+date, tld, 1
|
||||
|
||||
def get_all_provider():
|
||||
return r_serv_trend.smembers('all_provider_set')
|
||||
|
||||
def get_item_source_stats_by_date(date, source):
|
||||
stats = {}
|
||||
stats['num'] = r_serv_trend.hget(f'{source}_num', date)
|
||||
stats['size'] = r_serv_trend.hget(f'{source}_size', date)
|
||||
stats['avg'] = r_serv_trend.hget(f'{source}_avg', date)
|
||||
return stats
|
||||
|
||||
def get_item_stats_size_avg_by_date(date):
|
||||
return r_serv_trend.zrange(f'top_avg_size_set_{date}', 0, -1, withscores=True)
|
||||
|
||||
def get_item_stats_nb_by_date(date):
|
||||
return r_serv_trend.zrange(f'providers_set_{date}', 0, -1, withscores=True)
|
||||
|
||||
def get_top_stats_module(module_name, date):
|
||||
return r_serv_trend.zrange(f'top_{module_name}_set_{date}', 0, -1, withscores=True)
|
||||
|
||||
def get_module_tld_stats_by_date(module, date):
|
||||
return r_statistics.hgetall(f'{module}_by_tld:{date}')
|
||||
|
||||
def statistics_migration():
|
||||
# paste_by_modules_timeout
|
||||
|
||||
# Date full history => lot of keys
|
||||
|
||||
|
||||
# top_size_set_{date}
|
||||
# top_avg_size_set_{date}
|
||||
|
||||
# 'providers_set_{date}
|
||||
|
||||
|
||||
|
||||
sources = get_all_provider()
|
||||
for date in Date.get_date_range_today('20180101'):
|
||||
|
||||
size_avg = get_item_stats_size_avg_by_date(date)
|
||||
|
||||
nb_items = get_item_stats_nb_by_date(date)
|
||||
|
||||
# top_size_set_{date}
|
||||
# top_avg_size_set_{date}
|
||||
|
||||
# 'providers_set_{date}
|
||||
|
||||
# ITEM STATS
|
||||
for source in sources:
|
||||
source_stat = get_item_source_stats_by_date(date, source)
|
||||
Statistics._create_item_stats_size_nb(date, source, source_stat['num'], source_stat['size'], source_stat['avg'])
|
||||
|
||||
|
||||
|
||||
# # MODULE STATS
|
||||
# for module in ['credential', 'mail', 'SQLInjection']:
|
||||
# stats = get_module_tld_stats_by_date(module, date)
|
||||
# for tld in stats:
|
||||
# if tld:
|
||||
# print(module, date, tld, stats[tld])
|
||||
# Statistics.add_module_tld_stats_by_date(module, date, tld, stats[tld])
|
||||
# for module in ['credential']:
|
||||
# # TOP STATS
|
||||
# top_module = get_top_stats_module(module, date)
|
||||
# for keyword, total_sum in top_module:
|
||||
# print(date, module, keyword, total_sum)
|
||||
# #Statistics._add_module_stats(module, total_sum, keyword, date)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
core_migration()
|
||||
#core_migration()
|
||||
# user_migration()
|
||||
# tags_migration()
|
||||
#items_migration()
|
||||
|
@ -706,6 +792,7 @@ if __name__ == '__main__':
|
|||
# ail_2_ail_migration()
|
||||
# trackers_migration()
|
||||
# investigations_migration()
|
||||
statistics_migration()
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -217,8 +217,12 @@ function launching_scripts {
|
|||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Onion" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Onion.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Mail" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Mail.py; read x"
|
||||
sleep 0.1
|
||||
# screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./SentimentAnalysis.py; read x"
|
||||
# sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./ModuleStats.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Telegram" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Telegram.py; read x"
|
||||
sleep 0.1
|
||||
|
||||
|
@ -267,8 +271,6 @@ function launching_scripts {
|
|||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "BankAccount" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./BankAccount.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Mail" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Mail.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "PgpDump" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./PgpDump.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Cryptocurrency" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Cryptocurrencies.py; read x"
|
||||
|
@ -277,8 +279,6 @@ function launching_scripts {
|
|||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Cve" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Cve.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ModuleStats.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "MISPtheHIVEfeeder" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./MISP_The_Hive_feeder.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Languages" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Languages.py; read x"
|
||||
|
|
220
bin/Mail.py
220
bin/Mail.py
|
@ -1,220 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
"""
|
||||
The Mails Module
|
||||
======================
|
||||
|
||||
This module is consuming the Redis-list created by the Categ module.
|
||||
|
||||
It apply mail regexes on item content and warn if above a threshold.
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import uuid
|
||||
import redis
|
||||
import time
|
||||
import datetime
|
||||
|
||||
import dns.resolver
|
||||
import dns.exception
|
||||
|
||||
from multiprocessing import Process as Proc
|
||||
|
||||
from pubsublogger import publisher
|
||||
from Helper import Process
|
||||
|
||||
from pyfaup.faup import Faup
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
|
||||
import Item
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
|
||||
## LOAD CONFIG ##
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
server_statistics = config_loader.get_redis_conn("ARDB_Statistics")
|
||||
r_serv_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
|
||||
dns_server = config_loader.get_config_str('Mail', 'dns')
|
||||
|
||||
config_loader = None
|
||||
## -- ##
|
||||
|
||||
def is_mxdomain_in_cache(mxdomain):
|
||||
return r_serv_cache.exists('mxdomain:{}'.format(mxdomain))
|
||||
|
||||
def save_mxdomain_in_cache(mxdomain):
|
||||
r_serv_cache.setex('mxdomain:{}'.format(mxdomain), 1, datetime.timedelta(days=1))
|
||||
|
||||
def check_mx_record(set_mxdomains, dns_server):
|
||||
"""Check if emails MX domains are responding.
|
||||
|
||||
:param adress_set: -- (set) This is a set of emails domains
|
||||
:return: (int) Number of adress with a responding and valid MX domains
|
||||
|
||||
"""
|
||||
resolver = dns.resolver.Resolver()
|
||||
resolver.nameservers = [dns_server]
|
||||
resolver.timeout = 5.0
|
||||
resolver.lifetime = 2.0
|
||||
|
||||
valid_mxdomain = []
|
||||
for mxdomain in set_mxdomains:
|
||||
|
||||
# check if is in cache
|
||||
# # TODO:
|
||||
if is_mxdomain_in_cache(mxdomain):
|
||||
valid_mxdomain.append(mxdomain)
|
||||
else:
|
||||
|
||||
# DNS resolution
|
||||
try:
|
||||
answers = resolver.query(mxdomain, rdtype=dns.rdatatype.MX)
|
||||
if answers:
|
||||
save_mxdomain_in_cache(mxdomain)
|
||||
valid_mxdomain.append(mxdomain)
|
||||
# DEBUG
|
||||
# print('---')
|
||||
# print(answers.response)
|
||||
# print(answers.qname)
|
||||
# print(answers.rdtype)
|
||||
# print(answers.rdclass)
|
||||
# print(answers.nameserver)
|
||||
# print()
|
||||
|
||||
except dns.resolver.NoNameservers:
|
||||
publisher.debug('NoNameserver, No non-broken nameservers are available to answer the query.')
|
||||
print('NoNameserver, No non-broken nameservers are available to answer the query.')
|
||||
except dns.resolver.NoAnswer:
|
||||
publisher.debug('NoAnswer, The response did not contain an answer to the question.')
|
||||
print('NoAnswer, The response did not contain an answer to the question.')
|
||||
except dns.name.EmptyLabel:
|
||||
publisher.debug('SyntaxError: EmptyLabel')
|
||||
print('SyntaxError: EmptyLabel')
|
||||
except dns.resolver.NXDOMAIN:
|
||||
#save_mxdomain_in_cache(mxdomain)
|
||||
publisher.debug('The query name does not exist.')
|
||||
print('The query name does not exist.')
|
||||
except dns.name.LabelTooLong:
|
||||
publisher.debug('The Label is too long')
|
||||
print('The Label is too long')
|
||||
except dns.exception.Timeout:
|
||||
print('dns timeout')
|
||||
#save_mxdomain_in_cache(mxdomain)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return valid_mxdomain
|
||||
|
||||
def extract_all_emails(redis_key, item_content):
|
||||
all_emails = re.findall(email_regex, item_content)
|
||||
if len(all_emails) > 1:
|
||||
r_serv_cache.sadd(redis_key, *all_emails)
|
||||
r_serv_cache.expire(redis_key, 360)
|
||||
elif all_emails:
|
||||
r_serv_cache.sadd(redis_key, all_emails[0])
|
||||
r_serv_cache.expire(redis_key, 360)
|
||||
|
||||
if __name__ == "__main__":
|
||||
publisher.port = 6380
|
||||
publisher.channel = "Script"
|
||||
|
||||
config_section = 'Mail'
|
||||
|
||||
faup = Faup()
|
||||
|
||||
p = Process(config_section)
|
||||
|
||||
publisher.info("Mails module started")
|
||||
|
||||
# Numbers of Mails needed to Tags
|
||||
mail_threshold = 10
|
||||
|
||||
max_execution_time = 30
|
||||
|
||||
email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
|
||||
|
||||
redis_key = 'mail_extracted:{}'.format(str(uuid.uuid4()))
|
||||
|
||||
while True:
|
||||
message = p.get_from_set()
|
||||
|
||||
if message is not None:
|
||||
item_id, score = message.split()
|
||||
|
||||
print(item_id)
|
||||
|
||||
item_content = Item.get_item_content(item_id)
|
||||
|
||||
proc = Proc(target=extract_all_emails, args=(redis_key, item_content, ))
|
||||
try:
|
||||
proc.start()
|
||||
proc.join(max_execution_time)
|
||||
if proc.is_alive():
|
||||
proc.terminate()
|
||||
p.incr_module_timeout_statistic()
|
||||
err_mess = "Mails: processing timeout: {}".format(item_id)
|
||||
print(err_mess)
|
||||
publisher.info(err_mess)
|
||||
continue
|
||||
else:
|
||||
all_emails = r_serv_cache.smembers(redis_key)
|
||||
r_serv_cache.delete(redis_key)
|
||||
proc.terminate()
|
||||
except KeyboardInterrupt:
|
||||
print("Caught KeyboardInterrupt, terminating workers")
|
||||
proc.terminate()
|
||||
sys.exit(0)
|
||||
|
||||
# get MXdomains
|
||||
set_mxdomains = set()
|
||||
dict_mxdomains_email = {}
|
||||
for email in all_emails:
|
||||
mxdomain = email.split('@')[1].lower()
|
||||
if not mxdomain in dict_mxdomains_email:
|
||||
dict_mxdomains_email[mxdomain] = []
|
||||
set_mxdomains.add(mxdomain)
|
||||
dict_mxdomains_email[mxdomain].append(email)
|
||||
|
||||
## TODO: add MAIL trackers
|
||||
|
||||
valid_mx = check_mx_record(set_mxdomains, dns_server)
|
||||
|
||||
item_date = Item.get_item_date(item_id)
|
||||
|
||||
num_valid_email = 0
|
||||
for domain_mx in valid_mx:
|
||||
num_valid_email += len(dict_mxdomains_email[domain_mx])
|
||||
|
||||
for email in dict_mxdomains_email[domain_mx]:
|
||||
msg = 'mail;{};{};{}'.format(1, email, item_date)
|
||||
p.populate_set_out(msg, 'ModuleStats')
|
||||
|
||||
# Create country stats
|
||||
faup.decode(email)
|
||||
tld = faup.get()['tld']
|
||||
try:
|
||||
tld = tld.decode()
|
||||
except:
|
||||
pass
|
||||
server_statistics.hincrby('mail_by_tld:{}'.format(item_date), tld, 1)
|
||||
|
||||
msg = 'Mails;{};{};{};Checked {} e-mail(s);{}'.format(Item.get_source(item_id), item_date, Item.get_item_basename(item_id), num_valid_email, item_id)
|
||||
|
||||
if num_valid_email > mail_threshold:
|
||||
print('{} Checked {} e-mail(s)'.format(item_id, num_valid_email))
|
||||
publisher.warning(msg)
|
||||
#Send to duplicate
|
||||
p.populate_set_out(item_id, 'Duplicate')
|
||||
#tags
|
||||
msg = 'infoleak:automatic-detection="mail";{}'.format(item_id)
|
||||
p.populate_set_out(msg, 'Tags')
|
||||
else:
|
||||
publisher.info(msg)
|
||||
|
||||
else:
|
||||
time.sleep(10)
|
|
@ -1,156 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
"""
|
||||
This module makes statistics for some modules and providers
|
||||
|
||||
"""
|
||||
|
||||
##################################
|
||||
# Import External packages #
|
||||
##################################
|
||||
import time
|
||||
import datetime
|
||||
import redis
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages #
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Date import Date
|
||||
from packages import Paste
|
||||
import ConfigLoader
|
||||
|
||||
|
||||
class ModuleStats(AbstractModule):
|
||||
"""
|
||||
Module Statistics module for AIL framework
|
||||
"""
|
||||
|
||||
# Config Var
|
||||
MAX_SET_CARDINALITY = 8
|
||||
|
||||
|
||||
def __init__(self):
|
||||
|
||||
super(ModuleStats, self).__init__()
|
||||
|
||||
# Waiting time in secondes between to message proccessed
|
||||
self.pending_seconds = 20
|
||||
|
||||
# Sent to the logging a description of the module
|
||||
self.redis_logger.info("Makes statistics about valid URL")
|
||||
|
||||
# REDIS #
|
||||
self.r_serv_trend = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Trending")
|
||||
|
||||
def compute(self, message):
|
||||
|
||||
if len(message.split(';')) > 1:
|
||||
self.compute_most_posted(message)
|
||||
else:
|
||||
self.compute_provider_info(message)
|
||||
|
||||
|
||||
def get_date_range(self, num_day):
|
||||
curr_date = datetime.date.today()
|
||||
date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2))
|
||||
date_list = []
|
||||
|
||||
for i in range(0, num_day+1):
|
||||
date_list.append(date.substract_day(i))
|
||||
return date_list
|
||||
|
||||
|
||||
def compute_most_posted(self, message):
|
||||
module, num, keyword, paste_date = message.split(';')
|
||||
|
||||
redis_progression_name_set = 'top_'+ module +'_set_' + paste_date
|
||||
|
||||
# Add/Update in Redis
|
||||
self.r_serv_trend.hincrby(paste_date, module+'-'+keyword, int(num))
|
||||
|
||||
# Compute Most Posted
|
||||
date = self.get_date_range(0)[0]
|
||||
# check if this keyword is eligible for progression
|
||||
keyword_total_sum = 0
|
||||
|
||||
curr_value = self.r_serv_trend.hget(date, module+'-'+keyword)
|
||||
keyword_total_sum += int(curr_value) if curr_value is not None else 0
|
||||
|
||||
if self.r_serv_trend.zcard(redis_progression_name_set) < self.MAX_SET_CARDINALITY:
|
||||
self.r_serv_trend.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
|
||||
|
||||
else: # not in set
|
||||
member_set = self.r_serv_trend.zrangebyscore(redis_progression_name_set, '-inf', '+inf', withscores=True, start=0, num=1)
|
||||
# Member set is a list of (value, score) pairs
|
||||
if int(member_set[0][1]) < keyword_total_sum:
|
||||
#remove min from set and add the new one
|
||||
self.redis_logger.debug(module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
|
||||
self.r_serv_trend.zrem(redis_progression_name_set, member_set[0][0])
|
||||
self.r_serv_trend.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
|
||||
self.redis_logger.debug(redis_progression_name_set)
|
||||
|
||||
|
||||
def compute_provider_info(self, message):
|
||||
redis_all_provider = 'all_provider_set'
|
||||
|
||||
paste = Paste.Paste(message)
|
||||
|
||||
paste_baseName = paste.p_name.split('.')[0]
|
||||
paste_size = paste._get_p_size()
|
||||
paste_provider = paste.p_source
|
||||
paste_date = str(paste._get_p_date())
|
||||
redis_sum_size_set = 'top_size_set_' + paste_date
|
||||
redis_avg_size_name_set = 'top_avg_size_set_' + paste_date
|
||||
redis_providers_name_set = 'providers_set_' + paste_date
|
||||
|
||||
# Add/Update in Redis
|
||||
self.r_serv_trend.sadd(redis_all_provider, paste_provider)
|
||||
|
||||
num_paste = int(self.r_serv_trend.hincrby(paste_provider+'_num', paste_date, 1))
|
||||
sum_size = float(self.r_serv_trend.hincrbyfloat(paste_provider+'_size', paste_date, paste_size))
|
||||
new_avg = float(sum_size) / float(num_paste)
|
||||
self.r_serv_trend.hset(paste_provider +'_avg', paste_date, new_avg)
|
||||
|
||||
|
||||
#
|
||||
# Compute Most Posted
|
||||
#
|
||||
|
||||
# Size
|
||||
if self.r_serv_trend.zcard(redis_sum_size_set) < self.MAX_SET_CARDINALITY or self.r_serv_trend.zscore(redis_sum_size_set, paste_provider) != "nil":
|
||||
self.r_serv_trend.zadd(redis_sum_size_set, float(num_paste), paste_provider)
|
||||
self.r_serv_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
|
||||
else: #set full capacity
|
||||
member_set = self.r_serv_trend.zrangebyscore(redis_sum_size_set, '-inf', '+inf', withscores=True, start=0, num=1)
|
||||
# Member set is a list of (value, score) pairs
|
||||
if float(member_set[0][1]) < new_avg:
|
||||
#remove min from set and add the new one
|
||||
self.redis_logger.debug('Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
|
||||
self.r_serv_trend.zrem(redis_sum_size_set, member_set[0][0])
|
||||
self.r_serv_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider)
|
||||
self.r_serv_trend.zrem(redis_avg_size_name_set, member_set[0][0])
|
||||
self.r_serv_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
|
||||
|
||||
|
||||
# Num
|
||||
# if set not full or provider already present
|
||||
if self.r_serv_trend.zcard(redis_providers_name_set) < self.MAX_SET_CARDINALITY or self.r_serv_trend.zscore(redis_providers_name_set, paste_provider) != "nil":
|
||||
self.r_serv_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
|
||||
else: #set at full capacity
|
||||
member_set = self.r_serv_trend.zrangebyscore(redis_providers_name_set, '-inf', '+inf', withscores=True, start=0, num=1)
|
||||
# Member set is a list of (value, score) pairs
|
||||
if int(member_set[0][1]) < num_paste:
|
||||
#remove min from set and add the new one
|
||||
self.redis_logger.debug('Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
|
||||
self.r_serv_trend.zrem(member_set[0][0])
|
||||
self.r_serv_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = ModuleStats()
|
||||
module.run()
|
|
@ -10,9 +10,133 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
|||
import ConfigLoader
|
||||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
r_serv_statistics = config_loader.get_redis_conn("ARDB_Statistics")
|
||||
r_statistics = config_loader.get_redis_conn("ARDB_Statistics")
|
||||
#r_serv_trend = ConfigLoader().get_redis_conn("ARDB_Trending")
|
||||
config_loader = None
|
||||
|
||||
PIE_CHART_MAX_CARDINALITY = 8
|
||||
|
||||
def incr_module_timeout_statistic(module_name):
|
||||
curr_date = datetime.date.today()
|
||||
r_serv_statistics.hincrby(curr_date.strftime("%Y%m%d"), 'paste_by_modules_timeout:{}'.format(module_name), 1)
|
||||
r_statistics.hincrby(curr_date.strftime("%Y%m%d"), 'paste_by_modules_timeout:{}'.format(module_name), 1)
|
||||
|
||||
def create_item_statistics(item_id, source, size):
|
||||
pass
|
||||
|
||||
def get_item_sources():
|
||||
return r_statistics.smembers('all_provider_set')
|
||||
|
||||
def get_nb_items_processed_by_day_and_source(date, source):
|
||||
nb_items = r_statistics.hget(f'{source}_num', date)
|
||||
if not nb_items:
|
||||
nb_items = 0
|
||||
return int(nb_items)
|
||||
|
||||
def get_items_total_size_by_day_and_source(date, source):
|
||||
total_size = r_statistics.hget(f'{source}_size', date)
|
||||
if not total_size:
|
||||
total_size = 0
|
||||
return float(total_size)
|
||||
|
||||
def get_items_av_size_by_day_and_source(date, source):
|
||||
av_size = r_statistics.hget(f'{source}_avg', date)
|
||||
if not av_size:
|
||||
av_size = 0
|
||||
return float(av_size)
|
||||
|
||||
def _create_item_stats_size_nb(date, source, num, size, avg):
|
||||
r_statistics.hset(f'{source}_num', date, num)
|
||||
r_statistics.hset(f'{source}_size', date, size)
|
||||
r_statistics.hset(f'{source}_avg', date, avg)
|
||||
|
||||
def get_item_stats_size_avg_by_date():
|
||||
return r_statistics.zrange(f'top_avg_size_set_{date}', 0, -1, withscores=True)
|
||||
|
||||
def get_item_stats_nb_by_date():
|
||||
return r_statistics.zrange(f'providers_set_{date}', 0, -1, withscores=True)
|
||||
|
||||
def _set_item_stats_nb_by_date(date, source):
|
||||
return r_statistics.zrange(f'providers_set_{date}', )
|
||||
|
||||
|
||||
# # TODO: load ZSET IN CACHE => FAST UPDATE
|
||||
def update_item_stats_size_nb(item_id, source, size, date):
|
||||
# Add/Update in Redis
|
||||
r_statistics.sadd('all_provider_set', source)
|
||||
|
||||
nb_items = int(r_statistics.hincrby(f'{source}_num', date, 1))
|
||||
sum_size = float(r_statistics.hincrbyfloat(f'{source}_size', date, size))
|
||||
new_avg = sum_size / nb_items
|
||||
r_statistics.hset(f'{source}_avg', date, new_avg)
|
||||
|
||||
# TOP Items Size
|
||||
if r_statistics.zcard(f'top_size_set_{date}') < PIE_CHART_MAX_CARDINALITY:
|
||||
r_statistics.zadd(f'top_avg_size_set_{date}', new_avg, source)
|
||||
else:
|
||||
member_set = r_statistics.zrangebyscore(f'top_avg_size_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
|
||||
# Member set is a list of (value, score) pairs
|
||||
if float(member_set[0][1]) < new_avg:
|
||||
# remove min from set and add the new one
|
||||
r_statistics.zrem(f'top_avg_size_set_{date}', member_set[0][0])
|
||||
r_statistics.zadd(f'top_avg_size_set_{date}', new_avg, source)
|
||||
|
||||
# TOP Nb Items
|
||||
if r_statistics.zcard(f'providers_set_{date}') < PIE_CHART_MAX_CARDINALITY or r_statistics.zscore(f'providers_set_{date}', source) != None:
|
||||
r_statistics.zadd(f'providers_set_{date}', float(nb_items), source)
|
||||
else: # zset at full capacity
|
||||
member_set = r_statistics.zrangebyscore(f'providers_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
|
||||
# Member set is a list of (value, score) pairs
|
||||
if int(member_set[0][1]) < nb_items:
|
||||
# remove min from set and add the new one
|
||||
r_statistics.zrem(member_set[0][0])
|
||||
r_statistics.zadd(f'providers_set_{date}', float(nb_items), source)
|
||||
|
||||
# keyword num
|
||||
|
||||
def _add_module_stats(module_name, total_sum, keyword, date):
|
||||
r_statistics.zadd(f'top_{module_name}_set_{date}', float(total_sum), keyword)
|
||||
|
||||
# # TODO: ONE HSET BY MODULE / CUSTOM STATS
|
||||
def update_module_stats(module_name, num, keyword, date):
|
||||
|
||||
# Add/Update in Redis
|
||||
r_statistics.hincrby(date, f'{module_name}-{keyword}', int(num)) # # TODO: RENAME ME !!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
# Compute Most Posted
|
||||
# check if this keyword is eligible for progression
|
||||
keyword_total_sum = 0
|
||||
|
||||
curr_value = r_statistics.hget(date, module+'-'+keyword)
|
||||
keyword_total_sum += int(curr_value) if curr_value is not None else 0
|
||||
|
||||
if r_statistics.zcard(f'top_{module_name}_set_{date}') < PIE_CHART_MAX_CARDINALITY:
|
||||
r_statistics.zadd(f'top_{module_name}_set_{date}', float(keyword_total_sum), keyword)
|
||||
else: # zset at full capacity
|
||||
member_set = r_statistics.zrangebyscore(f'top_{module_name}_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
|
||||
# Member set is a list of (value, score) pairs
|
||||
if int(member_set[0][1]) < keyword_total_sum:
|
||||
#remove min from set and add the new one
|
||||
r_statistics.zrem(f'top_{module_name}_set_{date}', member_set[0][0])
|
||||
r_statistics.zadd(f'top_{module_name}_set_{date}', float(keyword_total_sum), keyword)
|
||||
|
||||
def get_module_tld_stats_by_tld_date(date, tld):
|
||||
nb_tld = r_statistics.hget(f'credential_by_tld:{date}', tld)
|
||||
if not nb_tld:
|
||||
nb_tld = 0
|
||||
return int(nb_tld)
|
||||
|
||||
def get_module_tld_stats_by_date(module, date):
|
||||
return r_statistics.hgetall(f'{module}_by_tld:{date}')
|
||||
|
||||
def add_module_tld_stats_by_date(module, date, tld, nb):
|
||||
r_statistics.hincrby(f'{module}_by_tld:{date}', tld, int(nb))
|
||||
|
||||
|
||||
def get_iban_country_stats_by_date(date):
|
||||
return r_statistics.hgetall(f'iban_by_country:{date}')
|
||||
|
||||
def add_iban_country_stats_by_date(date, tld, nb):
|
||||
r_statistics.hincrby(f'iban_by_country:{date}', tld, int(nb))
|
||||
|
||||
# r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
|
||||
# r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
|
||||
|
|
|
@ -616,6 +616,12 @@ def api_set_nb_crawlers_to_launch(dict_splash_name):
|
|||
else:
|
||||
return ({'error':'invalid input'}, 400)
|
||||
|
||||
def get_domains_blacklist(domain_type):
|
||||
return r_serv_onion.smembers(f'blacklist_{domain_type}')
|
||||
|
||||
def add_domain_blacklist(domain_type, domain):
|
||||
r_serv_onion.sadd(f'blacklist_{domain_type}', domain)
|
||||
|
||||
##-- CRAWLER GLOBAL --##
|
||||
|
||||
#### AUTOMATIC CRAWLER ####
|
||||
|
|
|
@ -42,6 +42,7 @@ from modules.abstract_module import AbstractModule
|
|||
from packages.Item import Item
|
||||
from lib import ConfigLoader
|
||||
from lib import regex_helper
|
||||
from lib import Statistics
|
||||
|
||||
|
||||
class Credential(AbstractModule):
|
||||
|
@ -96,6 +97,7 @@ class Credential(AbstractModule):
|
|||
|
||||
item_content = item.get_content()
|
||||
|
||||
# TODO: USE SETS
|
||||
# Extract all credentials
|
||||
all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item.get_id(), item_content, max_time=self.max_execution_time)
|
||||
|
||||
|
@ -117,9 +119,6 @@ class Credential(AbstractModule):
|
|||
print(f"========> Found more than 10 credentials in this file : {item.get_id()}")
|
||||
self.redis_logger.warning(to_print)
|
||||
|
||||
# Send to duplicate
|
||||
self.send_message_to_queue(item.get_id(), 'Duplicate')
|
||||
|
||||
msg = f'infoleak:automatic-detection="credential";{item.get_id()}'
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
|
@ -158,6 +157,7 @@ class Credential(AbstractModule):
|
|||
print(f"=======> Probably on : {discovered_sites}")
|
||||
|
||||
date = datetime.now().strftime("%Y%m")
|
||||
nb_tlds = {}
|
||||
for cred in all_credentials:
|
||||
maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0]
|
||||
self.faup.decode(maildomains)
|
||||
|
@ -167,7 +167,9 @@ class Credential(AbstractModule):
|
|||
tld = tld.decode()
|
||||
except:
|
||||
pass
|
||||
self.server_statistics.hincrby('credential_by_tld:'+date, tld, 1)
|
||||
nb_tlds[tld] = nb_tlds.get(tld, 0) + 1
|
||||
for tld in nb_tlds:
|
||||
Statistics.add_module_tld_stats_by_date('credential', date, tld, nb_tlds[tld])
|
||||
else:
|
||||
self.redis_logger.info(to_print)
|
||||
print(f'found {nb_cred} credentials')
|
||||
|
|
|
@ -75,9 +75,6 @@ class CreditCards(AbstractModule):
|
|||
if (len(creditcard_set) > 0):
|
||||
self.redis_logger.warning(f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.get_id()}')
|
||||
|
||||
#Send to duplicate
|
||||
self.send_message_to_queue(item.get_id(), 'Duplicate')
|
||||
|
||||
msg = f'infoleak:automatic-detection="credit-card";{item.get_id()}'
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
|
|
177
bin/modules/Mail.py
Executable file
177
bin/modules/Mail.py
Executable file
|
@ -0,0 +1,177 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
"""
|
||||
The Mails Module
|
||||
======================
|
||||
|
||||
This module is consuming the Redis-list created by the Categ module.
|
||||
|
||||
It apply mail regexes on item content and warn if above a threshold.
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import redis
|
||||
import sys
|
||||
import time
|
||||
import datetime
|
||||
|
||||
import dns.resolver
|
||||
import dns.exception
|
||||
|
||||
from pyfaup.faup import Faup
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages #
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib.objects.Items import Item
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib import Statistics
|
||||
|
||||
|
||||
class Mail(AbstractModule):
|
||||
"""
|
||||
Module Mail module for AIL framework
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(Mail, self).__init__()
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
self.r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
|
||||
self.dns_server = config_loader.get_config_str('Mail', 'dns')
|
||||
|
||||
self.faup = Faup()
|
||||
|
||||
# Numbers of Mails needed to Tags
|
||||
self.mail_threshold = 10
|
||||
|
||||
self.regex_timeout = 30
|
||||
self.email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
|
||||
re.compile(self.email_regex)
|
||||
|
||||
def is_mxdomain_in_cache(self, mxdomain):
|
||||
return self.r_cache.exists(f'mxdomain:{mxdomain}')
|
||||
|
||||
def save_mxdomain_in_cache(self, mxdomain):
|
||||
self.r_cache.setex(f'mxdomain:{mxdomain}', 1, datetime.timedelta(days=1))
|
||||
|
||||
def check_mx_record(self, set_mxdomains):
|
||||
"""Check if emails MX domains are responding.
|
||||
|
||||
:param adress_set: -- (set) This is a set of emails domains
|
||||
:return: (int) Number of adress with a responding and valid MX domains
|
||||
|
||||
"""
|
||||
resolver = dns.resolver.Resolver()
|
||||
resolver.nameservers = [self.dns_server]
|
||||
resolver.timeout = 5.0
|
||||
resolver.lifetime = 2.0
|
||||
|
||||
valid_mxdomain = []
|
||||
for mxdomain in set_mxdomains:
|
||||
|
||||
# check if is in cache
|
||||
# # TODO:
|
||||
if self.is_mxdomain_in_cache(mxdomain):
|
||||
valid_mxdomain.append(mxdomain)
|
||||
else:
|
||||
|
||||
# DNS resolution
|
||||
try:
|
||||
answers = resolver.query(mxdomain, rdtype=dns.rdatatype.MX)
|
||||
if answers:
|
||||
self.save_mxdomain_in_cache(mxdomain)
|
||||
valid_mxdomain.append(mxdomain)
|
||||
# DEBUG
|
||||
# print('---')
|
||||
# print(answers.response)
|
||||
# print(answers.qname)
|
||||
# print(answers.rdtype)
|
||||
# print(answers.rdclass)
|
||||
# print(answers.nameserver)
|
||||
# print()
|
||||
|
||||
except dns.resolver.NoNameservers:
|
||||
self.redis_logger.debug('NoNameserver, No non-broken nameservers are available to answer the query.')
|
||||
print('NoNameserver, No non-broken nameservers are available to answer the query.')
|
||||
except dns.resolver.NoAnswer:
|
||||
self.redis_logger.debug('NoAnswer, The response did not contain an answer to the question.')
|
||||
print('NoAnswer, The response did not contain an answer to the question.')
|
||||
except dns.name.EmptyLabel:
|
||||
self.redis_logger.debug('SyntaxError: EmptyLabel')
|
||||
print('SyntaxError: EmptyLabel')
|
||||
except dns.resolver.NXDOMAIN:
|
||||
#save_mxdomain_in_cache(mxdomain)
|
||||
self.redis_logger.debug('The query name does not exist.')
|
||||
print('The query name does not exist.')
|
||||
except dns.name.LabelTooLong:
|
||||
self.redis_logger.debug('The Label is too long')
|
||||
print('The Label is too long')
|
||||
except dns.exception.Timeout:
|
||||
print('dns timeout')
|
||||
#save_mxdomain_in_cache(mxdomain)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return valid_mxdomain
|
||||
|
||||
# # TODO: sanityze mails
|
||||
def compute(self, message):
|
||||
item_id, score = message.split()
|
||||
item = Item(item_id)
|
||||
item_date = item.get_date()
|
||||
|
||||
mails = self.regex_findall(self.email_regex, item_id, item.get_content())
|
||||
mxdomains_email = {}
|
||||
for mail in mails:
|
||||
mxdomain = mail.rsplit('@', 1)[1].lower()
|
||||
if not mxdomain in mxdomains_email:
|
||||
mxdomains_email[mxdomain] = set()
|
||||
mxdomains_email[mxdomain].add(mail)
|
||||
|
||||
## TODO: add MAIL trackers
|
||||
|
||||
valid_mx = self.check_mx_record(mxdomains_email.keys())
|
||||
print(f'valid_mx: {valid_mx}')
|
||||
mx_tlds = {}
|
||||
num_valid_email = 0
|
||||
for domain_mx in valid_mx:
|
||||
nb_mails = len(mxdomains_email[domain_mx])
|
||||
num_valid_email += nb_mails
|
||||
|
||||
# Create doamin_mail stats
|
||||
msg = f'mail;{nb_mails};{domain_mx};{item_date}'
|
||||
self.send_message_to_queue(msg, 'ModuleStats')
|
||||
|
||||
# Create country stats
|
||||
self.faup.decode(domain_mx)
|
||||
tld = self.faup.get()['tld']
|
||||
try:
|
||||
tld = tld.decode()
|
||||
except:
|
||||
pass
|
||||
mx_tlds[tld] = mx_tlds.get(tld, 0) + nb_mails
|
||||
for tld in mx_tlds:
|
||||
Statistics.add_module_tld_stats_by_date('mail', item_date, tld, mx_tlds[tld])
|
||||
|
||||
if num_valid_email > self.mail_threshold:
|
||||
msg = f'Mails;{item.get_source()};{item_date};{item.get_basename()};Checked {num_valid_email} e-mail(s);{item_id}'
|
||||
print(f'{item_id} Checked {num_valid_email} e-mail(s)')
|
||||
self.redis_logger.warning(msg)
|
||||
# Tags
|
||||
msg = f'infoleak:automatic-detection="mail";{item_id}'
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
else:
|
||||
self.redis_logger.info(msg)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
module = Mail()
|
||||
#module.compute('tests/2021/01/01/mails.gz 50')
|
||||
module.run()
|
54
bin/modules/ModuleStats.py
Executable file
54
bin/modules/ModuleStats.py
Executable file
|
@ -0,0 +1,54 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
"""
|
||||
This module makes statistics for some modules and providers
|
||||
|
||||
"""
|
||||
|
||||
##################################
|
||||
# Import External packages #
|
||||
##################################
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages #
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib.objects.Items import Item
|
||||
from lib import Statistics
|
||||
|
||||
|
||||
class ModuleStats(AbstractModule):
|
||||
"""
|
||||
Module Statistics module for AIL framework
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
|
||||
super(ModuleStats, self).__init__()
|
||||
|
||||
# Waiting time in secondes between to message proccessed
|
||||
self.pending_seconds = 20
|
||||
|
||||
def compute(self, message):
|
||||
|
||||
# MODULE STATS
|
||||
if len(message.split(';')) > 1:
|
||||
module_name, num, keyword, date = message.split(';')
|
||||
Statisticsupdate_module_stats(module_name, num, keyword, date)
|
||||
# ITEM STATS
|
||||
else:
|
||||
item = Item(item_id)
|
||||
source = item.get_source()
|
||||
date = item.get_date()
|
||||
size = item.get_size()
|
||||
Statistics.update_item_stats_size_nb(item_id, source, size, date)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = ModuleStats()
|
||||
module.run()
|
|
@ -66,7 +66,7 @@ publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Urls,Redis_Credential,R
|
|||
|
||||
[CreditCards]
|
||||
subscribe = Redis_CreditCards
|
||||
publish = Redis_ModuleStats,Redis_Tags
|
||||
publish = Redis_Tags
|
||||
|
||||
[BankAccount]
|
||||
subscribe = Redis_Global
|
||||
|
|
Loading…
Reference in a new issue