mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
feat: module factorization
This commit is contained in:
parent
99e5b3d4f1
commit
96a30170e3
11 changed files with 1076 additions and 841 deletions
108
bin/Categ.py
108
bin/Categ.py
|
@ -36,68 +36,96 @@ Requirements
|
||||||
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import External packages
|
||||||
|
##################################
|
||||||
import os
|
import os
|
||||||
import argparse
|
import argparse
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from module.abstract_module import AbstractModule
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
|
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Script"
|
|
||||||
|
|
||||||
config_section = 'Categ'
|
class Categ(AbstractModule):
|
||||||
|
"""
|
||||||
|
Categ module for AIL framework
|
||||||
|
"""
|
||||||
|
|
||||||
p = Process(config_section)
|
def __init__(self):
|
||||||
matchingThreshold = p.config.getint("Categ", "matchingThreshold")
|
"""
|
||||||
|
Init Categ
|
||||||
|
"""
|
||||||
|
super(Categ, self).__init__()
|
||||||
|
|
||||||
# SCRIPT PARSER #
|
self.matchingThreshold = self.process.config.getint("Categ", "matchingThreshold")
|
||||||
parser = argparse.ArgumentParser(description='Start Categ module on files.')
|
|
||||||
|
|
||||||
parser.add_argument(
|
# SCRIPT PARSER #
|
||||||
'-d', type=str, default="../files/",
|
parser = argparse.ArgumentParser(description='Start Categ module on files.')
|
||||||
help='Path to the directory containing the category files.',
|
|
||||||
action='store')
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
parser.add_argument(
|
||||||
|
'-d', type=str, default="../files/",
|
||||||
|
help='Path to the directory containing the category files.',
|
||||||
|
action='store')
|
||||||
|
|
||||||
# FUNCTIONS #
|
args = parser.parse_args()
|
||||||
publisher.info("Script Categ started")
|
|
||||||
|
|
||||||
categories = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve', 'ApiKey']
|
self.redis_logger.info("Script Categ started")
|
||||||
tmp_dict = {}
|
|
||||||
for filename in categories:
|
|
||||||
bname = os.path.basename(filename)
|
|
||||||
tmp_dict[bname] = []
|
|
||||||
with open(os.path.join(args.d, filename), 'r') as f:
|
|
||||||
patterns = [r'%s' % ( re.escape(s.strip()) ) for s in f]
|
|
||||||
tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)
|
|
||||||
|
|
||||||
prec_filename = None
|
categories = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve', 'ApiKey']
|
||||||
|
tmp_dict = {}
|
||||||
|
for filename in categories:
|
||||||
|
bname = os.path.basename(filename)
|
||||||
|
tmp_dict[bname] = []
|
||||||
|
with open(os.path.join(args.d, filename), 'r') as f:
|
||||||
|
patterns = [r'%s' % ( re.escape(s.strip()) ) for s in f]
|
||||||
|
tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)
|
||||||
|
|
||||||
while True:
|
self.categ_items = tmp_dict.items()
|
||||||
filename = p.get_from_set()
|
|
||||||
if filename is None:
|
|
||||||
publisher.debug("Script Categ is Idling 10s")
|
|
||||||
print('Sleeping')
|
|
||||||
time.sleep(10)
|
|
||||||
continue
|
|
||||||
|
|
||||||
paste = Paste.Paste(filename)
|
prec_filename = None
|
||||||
|
|
||||||
|
|
||||||
|
def compute(self, message):
|
||||||
|
# Cast message as paste
|
||||||
|
paste = Paste.Paste(message)
|
||||||
|
# Get paste content
|
||||||
content = paste.get_p_content()
|
content = paste.get_p_content()
|
||||||
|
|
||||||
for categ, pattern in tmp_dict.items():
|
# init categories found
|
||||||
|
is_categ_found = False
|
||||||
|
|
||||||
|
# Search for pattern categories in paste content
|
||||||
|
for categ, pattern in self.categ_items:
|
||||||
|
|
||||||
found = set(re.findall(pattern, content))
|
found = set(re.findall(pattern, content))
|
||||||
if len(found) >= matchingThreshold:
|
lenfound = len(found)
|
||||||
msg = '{} {}'.format(paste.p_rel_path, len(found))
|
if lenfound >= self.matchingThreshold:
|
||||||
|
is_categ_found = True
|
||||||
|
msg = '{} {}'.format(paste.p_rel_path, lenfound)
|
||||||
|
|
||||||
print(msg, categ)
|
self.redis_logger.debug('%s;%s %s'%(self.module_name, msg, categ))
|
||||||
p.populate_set_out(msg, categ)
|
|
||||||
|
# Export message to categ queue
|
||||||
|
self.process.populate_set_out(msg, categ)
|
||||||
|
|
||||||
publisher.info(
|
self.redis_logger.info(
|
||||||
'Categ;{};{};{};Detected {} as {};{}'.format(
|
'Categ;{};{};{};Detected {} as {};{}'.format(
|
||||||
paste.p_source, paste.p_date, paste.p_name,
|
paste.p_source, paste.p_date, paste.p_name,
|
||||||
len(found), categ, paste.p_rel_path))
|
lenfound, categ, paste.p_rel_path))
|
||||||
|
|
||||||
|
if not is_categ_found:
|
||||||
|
self.redis_logger.debug('No %s found in this paste: %s'%(self.module_name, paste.p_name))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
module = Categ()
|
||||||
|
module.run()
|
||||||
|
|
223
bin/Indexer.py
223
bin/Indexer.py
|
@ -9,131 +9,148 @@ The ZMQ_Sub_Indexer modules is fetching the list of files to be processed
|
||||||
and index each file with a full-text indexer (Whoosh until now).
|
and index each file with a full-text indexer (Whoosh until now).
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
##################################
|
||||||
|
# Import External packages
|
||||||
|
##################################
|
||||||
import time
|
import time
|
||||||
from packages import Paste
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
from whoosh.index import create_in, exists_in, open_dir
|
|
||||||
from whoosh.fields import Schema, TEXT, ID
|
|
||||||
import shutil
|
import shutil
|
||||||
import os
|
import os
|
||||||
from os.path import join, getsize
|
from os.path import join, getsize
|
||||||
|
from whoosh.index import create_in, exists_in, open_dir
|
||||||
|
from whoosh.fields import Schema, TEXT, ID
|
||||||
|
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from module.abstract_module import AbstractModule
|
||||||
|
from packages import Paste
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
|
|
||||||
# Config variable
|
|
||||||
TIME_WAIT = 60*15 #sec
|
|
||||||
|
|
||||||
# return in bytes
|
class Indexer(AbstractModule):
|
||||||
def check_index_size(baseindexpath, indexname):
|
"""
|
||||||
the_index_name = join(baseindexpath, indexname)
|
Indexer module for AIL framework
|
||||||
cur_sum = 0
|
"""
|
||||||
for root, dirs, files in os.walk(the_index_name):
|
|
||||||
cur_sum += sum(getsize(join(root, name)) for name in files)
|
|
||||||
return cur_sum
|
|
||||||
|
|
||||||
def move_index_into_old_index_folder(baseindexpath):
|
# Time to wait in seconds between two index's size variable compute
|
||||||
for cur_file in os.listdir(baseindexpath):
|
TIME_WAIT = 60*15 # sec
|
||||||
if not cur_file == "old_index":
|
|
||||||
shutil.move(join(baseindexpath, cur_file), join(join(baseindexpath, "old_index"), cur_file))
|
def __init__(self):
|
||||||
|
"""
|
||||||
|
Init Instance
|
||||||
|
"""
|
||||||
|
super(Indexer, self).__init__()
|
||||||
|
|
||||||
|
# Indexer configuration - index dir and schema setup
|
||||||
|
self.baseindexpath = join(os.environ['AIL_HOME'],
|
||||||
|
self.process.config.get("Indexer", "path"))
|
||||||
|
self.indexRegister_path = join(os.environ['AIL_HOME'],
|
||||||
|
self.process.config.get("Indexer", "register"))
|
||||||
|
self.indexertype = self.process.config.get("Indexer", "type")
|
||||||
|
self.INDEX_SIZE_THRESHOLD = self.process.config.getint(
|
||||||
|
"Indexer", "index_max_size")
|
||||||
|
|
||||||
|
self.indexname = None
|
||||||
|
self.schema = None
|
||||||
|
self.ix = None
|
||||||
|
|
||||||
|
if self.indexertype == "whoosh":
|
||||||
|
self.schema = Schema(title=TEXT(stored=True), path=ID(stored=True,
|
||||||
|
unique=True),
|
||||||
|
content=TEXT)
|
||||||
|
if not os.path.exists(self.baseindexpath):
|
||||||
|
os.mkdir(self.baseindexpath)
|
||||||
|
|
||||||
|
# create the index register if not present
|
||||||
|
time_now = int(time.time())
|
||||||
|
if not os.path.isfile(self.indexRegister_path): # index are not organised
|
||||||
|
self.redis_logger.debug("Indexes are not organized")
|
||||||
|
self.redis_logger.debug(
|
||||||
|
"moving all files in folder 'old_index' ")
|
||||||
|
# move all files to old_index folder
|
||||||
|
self.move_index_into_old_index_folder()
|
||||||
|
self.redis_logger.debug("Creating new index")
|
||||||
|
# create all_index.txt
|
||||||
|
with open(self.indexRegister_path, 'w') as f:
|
||||||
|
f.write(str(time_now))
|
||||||
|
# create dir
|
||||||
|
os.mkdir(join(self.baseindexpath, str(time_now)))
|
||||||
|
|
||||||
|
with open(self.indexRegister_path, "r") as f:
|
||||||
|
allIndex = f.read()
|
||||||
|
allIndex = allIndex.split() # format [time1\ntime2]
|
||||||
|
allIndex.sort()
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.indexname = allIndex[-1].strip('\n\r')
|
||||||
|
except IndexError as e:
|
||||||
|
self.indexname = time_now
|
||||||
|
|
||||||
|
self.indexpath = join(self.baseindexpath, str(self.indexname))
|
||||||
|
if not exists_in(self.indexpath):
|
||||||
|
self.ix = create_in(self.indexpath, self.schema)
|
||||||
|
else:
|
||||||
|
self.ix = open_dir(self.indexpath)
|
||||||
|
|
||||||
|
self.last_refresh = time_now
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def compute(self, message):
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Script"
|
|
||||||
|
|
||||||
config_section = 'Indexer'
|
|
||||||
|
|
||||||
p = Process(config_section)
|
|
||||||
|
|
||||||
# Indexer configuration - index dir and schema setup
|
|
||||||
baseindexpath = join(os.environ['AIL_HOME'],
|
|
||||||
p.config.get("Indexer", "path"))
|
|
||||||
indexRegister_path = join(os.environ['AIL_HOME'],
|
|
||||||
p.config.get("Indexer", "register"))
|
|
||||||
indexertype = p.config.get("Indexer", "type")
|
|
||||||
INDEX_SIZE_THRESHOLD = int(p.config.get("Indexer", "index_max_size"))
|
|
||||||
if indexertype == "whoosh":
|
|
||||||
schema = Schema(title=TEXT(stored=True), path=ID(stored=True,
|
|
||||||
unique=True),
|
|
||||||
content=TEXT)
|
|
||||||
if not os.path.exists(baseindexpath):
|
|
||||||
os.mkdir(baseindexpath)
|
|
||||||
|
|
||||||
# create the index register if not present
|
|
||||||
time_now = int(time.time())
|
|
||||||
if not os.path.isfile(indexRegister_path): #index are not organised
|
|
||||||
print("Indexes are not organized")
|
|
||||||
print("moving all files in folder 'old_index' ")
|
|
||||||
#move all files to old_index folder
|
|
||||||
move_index_into_old_index_folder(baseindexpath)
|
|
||||||
print("Creating new index")
|
|
||||||
#create all_index.txt
|
|
||||||
with open(indexRegister_path, 'w') as f:
|
|
||||||
f.write(str(time_now))
|
|
||||||
#create dir
|
|
||||||
os.mkdir(join(baseindexpath, str(time_now)))
|
|
||||||
|
|
||||||
with open(indexRegister_path, "r") as f:
|
|
||||||
allIndex = f.read()
|
|
||||||
allIndex = allIndex.split() # format [time1\ntime2]
|
|
||||||
allIndex.sort()
|
|
||||||
|
|
||||||
try:
|
|
||||||
indexname = allIndex[-1].strip('\n\r')
|
|
||||||
except IndexError as e:
|
|
||||||
indexname = time_now
|
|
||||||
|
|
||||||
indexpath = join(baseindexpath, str(indexname))
|
|
||||||
if not exists_in(indexpath):
|
|
||||||
ix = create_in(indexpath, schema)
|
|
||||||
else:
|
|
||||||
ix = open_dir(indexpath)
|
|
||||||
|
|
||||||
last_refresh = time_now
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.info("ZMQ Indexer is Running")
|
|
||||||
|
|
||||||
while True:
|
|
||||||
try:
|
try:
|
||||||
message = p.get_from_set()
|
PST = Paste.Paste(message)
|
||||||
|
|
||||||
if message is not None:
|
|
||||||
PST = Paste.Paste(message)
|
|
||||||
else:
|
|
||||||
publisher.debug("Script Indexer is idling 1s")
|
|
||||||
time.sleep(1)
|
|
||||||
continue
|
|
||||||
docpath = message.split(" ", -1)[-1]
|
docpath = message.split(" ", -1)[-1]
|
||||||
paste = PST.get_p_content()
|
paste = PST.get_p_content()
|
||||||
print("Indexing - " + indexname + " :", docpath)
|
self.redis_logger.debug("Indexing - " + self.indexname + " :", docpath)
|
||||||
|
|
||||||
|
# Avoid calculating the index's size at each message
|
||||||
#avoid calculating the index's size at each message
|
if(time.time() - self.last_refresh > self.TIME_WAIT):
|
||||||
if( time.time() - last_refresh > TIME_WAIT):
|
self.last_refresh = time.time()
|
||||||
last_refresh = time.time()
|
if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD*(1000*1000):
|
||||||
if check_index_size(baseindexpath, indexname) >= INDEX_SIZE_THRESHOLD*(1000*1000):
|
|
||||||
timestamp = int(time.time())
|
timestamp = int(time.time())
|
||||||
print("Creating new index", timestamp)
|
self.redis_logger.debug("Creating new index", timestamp)
|
||||||
indexpath = join(baseindexpath, str(timestamp))
|
self.indexpath = join(self.baseindexpath, str(timestamp))
|
||||||
indexname = str(timestamp)
|
self.indexname = str(timestamp)
|
||||||
#update all_index
|
# update all_index
|
||||||
with open(indexRegister_path, "a") as f:
|
with open(self.indexRegister_path, "a") as f:
|
||||||
f.write('\n'+str(timestamp))
|
f.write('\n'+str(timestamp))
|
||||||
#create new dir
|
# create new dir
|
||||||
os.mkdir(indexpath)
|
os.mkdir(self.indexpath)
|
||||||
ix = create_in(indexpath, schema)
|
self.ix = create_in(self.indexpath, self.schema)
|
||||||
|
|
||||||
|
if self.indexertype == "whoosh":
|
||||||
if indexertype == "whoosh":
|
indexwriter = self.ix.writer()
|
||||||
indexwriter = ix.writer()
|
|
||||||
indexwriter.update_document(
|
indexwriter.update_document(
|
||||||
title=docpath,
|
title=docpath,
|
||||||
path=docpath,
|
path=docpath,
|
||||||
content=paste)
|
content=paste)
|
||||||
indexwriter.commit()
|
indexwriter.commit()
|
||||||
|
|
||||||
except IOError:
|
except IOError:
|
||||||
print("CRC Checksum Failed on :", PST.p_path)
|
self.redis_logger.debug("CRC Checksum Failed on :", PST.p_path)
|
||||||
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
|
self.redis_logger.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
|
||||||
PST.p_source, PST.p_date, PST.p_name))
|
PST.p_source, PST.p_date, PST.p_name))
|
||||||
|
|
||||||
|
def check_index_size(self):
|
||||||
|
"""
|
||||||
|
return in bytes
|
||||||
|
"""
|
||||||
|
the_index_name = join(self.baseindexpath, self.indexname)
|
||||||
|
cur_sum = 0
|
||||||
|
for root, dirs, files in os.walk(the_index_name):
|
||||||
|
cur_sum += sum(getsize(join(root, name)) for name in files)
|
||||||
|
|
||||||
|
return cur_sum
|
||||||
|
|
||||||
|
|
||||||
|
def move_index_into_old_index_folder(self):
|
||||||
|
for cur_file in os.listdir(self.baseindexpath):
|
||||||
|
if not cur_file == "old_index":
|
||||||
|
shutil.move(join(self.baseindexpath, cur_file), join(
|
||||||
|
join(self.baseindexpath, "old_index"), cur_file))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
module = Indexer()
|
||||||
|
module.run()
|
||||||
|
|
250
bin/Keys.py
250
bin/Keys.py
|
@ -12,160 +12,170 @@ RSA private key, certificate messages
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import External packages
|
||||||
|
##################################
|
||||||
import time
|
import time
|
||||||
|
from enum import Enum
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
#from bin.packages import Paste
|
|
||||||
#from bin.Helper import Process
|
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from module.abstract_module import AbstractModule
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
|
|
||||||
|
|
||||||
def search_key(paste):
|
class KeyEnum(Enum):
|
||||||
content = paste.get_p_content()
|
PGP_MESSAGE = '-----BEGIN PGP MESSAGE-----'
|
||||||
find = False
|
PGP_PUBLIC_KEY_BLOCK = '-----BEGIN PGP PUBLIC KEY BLOCK-----'
|
||||||
get_pgp_content = False
|
PGP_PRIVATE_KEY_BLOCK = '-----BEGIN PGP PRIVATE KEY BLOCK-----'
|
||||||
if '-----BEGIN PGP MESSAGE-----' in content:
|
PGP_SIGNATURE = '-----BEGIN PGP SIGNATURE-----'
|
||||||
publisher.warning('{} has a PGP enc message'.format(paste.p_name))
|
CERTIFICATE = '-----BEGIN CERTIFICATE-----'
|
||||||
|
PUBLIC_KEY = '-----BEGIN PUBLIC KEY-----'
|
||||||
msg = 'infoleak:automatic-detection="pgp-message";{}'.format(message)
|
PRIVATE_KEY = '-----BEGIN PRIVATE KEY-----'
|
||||||
p.populate_set_out(msg, 'Tags')
|
ENCRYPTED_PRIVATE_KEY = '-----BEGIN ENCRYPTED PRIVATE KEY-----'
|
||||||
get_pgp_content = True
|
OPENSSH_PRIVATE_KEY = '-----BEGIN OPENSSH PRIVATE KEY-----'
|
||||||
find = True
|
SSH2_ENCRYPTED_PRIVATE_KEY = '---- BEGIN SSH2 ENCRYPTED PRIVATE KEY ----'
|
||||||
|
OPENVPN_STATIC_KEY_V1 = '-----BEGIN OpenVPN Static key V1-----'
|
||||||
if '-----BEGIN PGP PUBLIC KEY BLOCK-----' in content:
|
RSA_PRIVATE_KEY = '-----BEGIN RSA PRIVATE KEY-----'
|
||||||
msg = 'infoleak:automatic-detection="pgp-public-key-block";{}'.format(message)
|
DSA_PRIVATE_KEY = '-----BEGIN DSA PRIVATE KEY-----'
|
||||||
p.populate_set_out(msg, 'Tags')
|
EC_PRIVATE_KEY = '-----BEGIN EC PRIVATE KEY-----'
|
||||||
get_pgp_content = True
|
|
||||||
|
|
||||||
if '-----BEGIN PGP SIGNATURE-----' in content:
|
|
||||||
msg = 'infoleak:automatic-detection="pgp-signature";{}'.format(message)
|
|
||||||
p.populate_set_out(msg, 'Tags')
|
|
||||||
get_pgp_content = True
|
|
||||||
|
|
||||||
|
|
||||||
if '-----BEGIN CERTIFICATE-----' in content:
|
class Keys(AbstractModule):
|
||||||
publisher.warning('{} has a certificate message'.format(paste.p_name))
|
"""
|
||||||
|
Keys module for AIL framework
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(Keys, self).__init__()
|
||||||
|
|
||||||
msg = 'infoleak:automatic-detection="certificate";{}'.format(message)
|
# Waiting time in secondes between to message proccessed
|
||||||
p.populate_set_out(msg, 'Tags')
|
self.pending_seconds = 1
|
||||||
find = True
|
|
||||||
|
|
||||||
if '-----BEGIN RSA PRIVATE KEY-----' in content:
|
|
||||||
publisher.warning('{} has a RSA private key message'.format(paste.p_name))
|
|
||||||
print('rsa private key message found')
|
|
||||||
|
|
||||||
msg = 'infoleak:automatic-detection="rsa-private-key";{}'.format(message)
|
def compute(self, message):
|
||||||
p.populate_set_out(msg, 'Tags')
|
paste = Paste.Paste(message)
|
||||||
find = True
|
content = paste.get_p_content()
|
||||||
|
|
||||||
if '-----BEGIN PRIVATE KEY-----' in content:
|
find = False
|
||||||
publisher.warning('{} has a private key message'.format(paste.p_name))
|
get_pgp_content = False
|
||||||
print('private key message found')
|
|
||||||
|
|
||||||
msg = 'infoleak:automatic-detection="private-key";{}'.format(message)
|
if KeyEnum.PGP_MESSAGE.value in content:
|
||||||
p.populate_set_out(msg, 'Tags')
|
self.redis_logger.warning('{} has a PGP enc message'.format(paste.p_name))
|
||||||
find = True
|
|
||||||
|
|
||||||
if '-----BEGIN ENCRYPTED PRIVATE KEY-----' in content:
|
msg = 'infoleak:automatic-detection="pgp-message";{}'.format(message)
|
||||||
publisher.warning('{} has an encrypted private key message'.format(paste.p_name))
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
print('encrypted private key message found')
|
get_pgp_content = True
|
||||||
|
find = True
|
||||||
|
|
||||||
msg = 'infoleak:automatic-detection="encrypted-private-key";{}'.format(message)
|
if KeyEnum.PGP_PUBLIC_KEY_BLOCK.value in content:
|
||||||
p.populate_set_out(msg, 'Tags')
|
msg = 'infoleak:automatic-detection="pgp-public-key-block";{}'.format(message)
|
||||||
find = True
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
|
get_pgp_content = True
|
||||||
|
|
||||||
if '-----BEGIN OPENSSH PRIVATE KEY-----' in content:
|
if KeyEnum.PGP_SIGNATURE.value in content:
|
||||||
publisher.warning('{} has an openssh private key message'.format(paste.p_name))
|
msg = 'infoleak:automatic-detection="pgp-signature";{}'.format(message)
|
||||||
print('openssh private key message found')
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
|
get_pgp_content = True
|
||||||
|
|
||||||
msg = 'infoleak:automatic-detection="private-ssh-key";{}'.format(message)
|
if KeyEnum.CERTIFICATE.value in content:
|
||||||
p.populate_set_out(msg, 'Tags')
|
self.redis_logger.warning('{} has a certificate message'.format(paste.p_name))
|
||||||
find = True
|
|
||||||
|
|
||||||
if '---- BEGIN SSH2 ENCRYPTED PRIVATE KEY ----' in content:
|
msg = 'infoleak:automatic-detection="certificate";{}'.format(message)
|
||||||
publisher.warning('{} has an ssh2 private key message'.format(paste.p_name))
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
print('SSH2 private key message found')
|
find = True
|
||||||
|
|
||||||
msg = 'infoleak:automatic-detection="private-ssh-key";{}'.format(message)
|
if KeyEnum.RSA_PRIVATE_KEY.value in content:
|
||||||
p.populate_set_out(msg, 'Tags')
|
self.redis_logger.warning('{} has a RSA private key message'.format(paste.p_name))
|
||||||
find = True
|
print('rsa private key message found')
|
||||||
|
|
||||||
if '-----BEGIN OpenVPN Static key V1-----' in content:
|
msg = 'infoleak:automatic-detection="rsa-private-key";{}'.format(message)
|
||||||
publisher.warning('{} has an openssh private key message'.format(paste.p_name))
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
print('OpenVPN Static key message found')
|
find = True
|
||||||
|
|
||||||
msg = 'infoleak:automatic-detection="vpn-static-key";{}'.format(message)
|
if KeyEnum.PRIVATE_KEY.value in content:
|
||||||
p.populate_set_out(msg, 'Tags')
|
self.redis_logger.warning('{} has a private key message'.format(paste.p_name))
|
||||||
find = True
|
print('private key message found')
|
||||||
|
|
||||||
if '-----BEGIN DSA PRIVATE KEY-----' in content:
|
msg = 'infoleak:automatic-detection="private-key";{}'.format(message)
|
||||||
publisher.warning('{} has a dsa private key message'.format(paste.p_name))
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
|
find = True
|
||||||
|
|
||||||
msg = 'infoleak:automatic-detection="dsa-private-key";{}'.format(message)
|
if KeyEnum.ENCRYPTED_PRIVATE_KEY.value in content:
|
||||||
p.populate_set_out(msg, 'Tags')
|
self.redis_logger.warning('{} has an encrypted private key message'.format(paste.p_name))
|
||||||
find = True
|
print('encrypted private key message found')
|
||||||
|
|
||||||
if '-----BEGIN EC PRIVATE KEY-----' in content:
|
msg = 'infoleak:automatic-detection="encrypted-private-key";{}'.format(message)
|
||||||
publisher.warning('{} has an ec private key message'.format(paste.p_name))
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
|
find = True
|
||||||
|
|
||||||
msg = 'infoleak:automatic-detection="ec-private-key";{}'.format(message)
|
if KeyEnum.OPENSSH_PRIVATE_KEY.value in content:
|
||||||
p.populate_set_out(msg, 'Tags')
|
self.redis_logger.warning('{} has an openssh private key message'.format(paste.p_name))
|
||||||
find = True
|
print('openssh private key message found')
|
||||||
|
|
||||||
if '-----BEGIN PGP PRIVATE KEY BLOCK-----' in content:
|
msg = 'infoleak:automatic-detection="private-ssh-key";{}'.format(message)
|
||||||
publisher.warning('{} has a pgp private key block message'.format(paste.p_name))
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
|
find = True
|
||||||
|
|
||||||
msg = 'infoleak:automatic-detection="pgp-private-key";{}'.format(message)
|
if KeyEnum.SSH2_ENCRYPTED_PRIVATE_KEY.value in content:
|
||||||
p.populate_set_out(msg, 'Tags')
|
self.redis_logger.warning('{} has an ssh2 private key message'.format(paste.p_name))
|
||||||
find = True
|
print('SSH2 private key message found')
|
||||||
|
|
||||||
if '-----BEGIN PUBLIC KEY-----' in content:
|
msg = 'infoleak:automatic-detection="private-ssh-key";{}'.format(message)
|
||||||
publisher.warning('{} has a public key message'.format(paste.p_name))
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
|
find = True
|
||||||
|
|
||||||
msg = 'infoleak:automatic-detection="public-key";{}'.format(message)
|
if KeyEnum.OPENVPN_STATIC_KEY_V1.value in content:
|
||||||
p.populate_set_out(msg, 'Tags')
|
self.redis_logger.warning('{} has an openssh private key message'.format(paste.p_name))
|
||||||
find = True
|
print('OpenVPN Static key message found')
|
||||||
|
|
||||||
# pgp content
|
msg = 'infoleak:automatic-detection="vpn-static-key";{}'.format(message)
|
||||||
if get_pgp_content:
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
p.populate_set_out(message, 'PgpDump')
|
find = True
|
||||||
|
|
||||||
if find :
|
if KeyEnum.DSA_PRIVATE_KEY.value in content:
|
||||||
|
self.redis_logger.warning('{} has a dsa private key message'.format(paste.p_name))
|
||||||
|
|
||||||
#Send to duplicate
|
msg = 'infoleak:automatic-detection="dsa-private-key";{}'.format(message)
|
||||||
p.populate_set_out(message, 'Duplicate')
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
print(message)
|
find = True
|
||||||
|
|
||||||
|
if KeyEnum.EC_PRIVATE_KEY.value in content:
|
||||||
|
self.redis_logger.warning('{} has an ec private key message'.format(paste.p_name))
|
||||||
|
|
||||||
|
msg = 'infoleak:automatic-detection="ec-private-key";{}'.format(message)
|
||||||
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
|
find = True
|
||||||
|
|
||||||
|
if KeyEnum.PGP_PRIVATE_KEY_BLOCK.value in content:
|
||||||
|
self.redis_logger.warning('{} has a pgp private key block message'.format(paste.p_name))
|
||||||
|
|
||||||
|
msg = 'infoleak:automatic-detection="pgp-private-key";{}'.format(message)
|
||||||
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
|
find = True
|
||||||
|
|
||||||
|
if KeyEnum.PUBLIC_KEY.value in content:
|
||||||
|
self.redis_logger.warning('{} has a public key message'.format(paste.p_name))
|
||||||
|
|
||||||
|
msg = 'infoleak:automatic-detection="public-key";{}'.format(message)
|
||||||
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
|
find = True
|
||||||
|
|
||||||
|
# pgp content
|
||||||
|
if get_pgp_content:
|
||||||
|
self.process.populate_set_out(message, 'PgpDump')
|
||||||
|
|
||||||
|
if find :
|
||||||
|
#Send to duplicate
|
||||||
|
self.process.populate_set_out(message, 'Duplicate')
|
||||||
|
self.redis_logger.debug(message)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
|
||||||
# Port of the redis instance used by pubsublogger
|
module = Keys()
|
||||||
publisher.port = 6380
|
module.run()
|
||||||
# Script is the default channel used for the modules.
|
|
||||||
publisher.channel = 'Script'
|
|
||||||
|
|
||||||
# Section name in bin/packages/modules.cfg
|
|
||||||
config_section = 'Keys'
|
|
||||||
|
|
||||||
# Setup the I/O queues
|
|
||||||
p = Process(config_section)
|
|
||||||
|
|
||||||
# Sent to the logging a description of the module
|
|
||||||
publisher.info("Run Keys module ")
|
|
||||||
|
|
||||||
# Endless loop getting messages from the input queue
|
|
||||||
while True:
|
|
||||||
# Get one message from the input queue
|
|
||||||
message = p.get_from_set()
|
|
||||||
if message is None:
|
|
||||||
publisher.debug("{} queue is empty, waiting".format(config_section))
|
|
||||||
time.sleep(1)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Do something with the message from the queue
|
|
||||||
paste = Paste.Paste(message)
|
|
||||||
search_key(paste)
|
|
||||||
|
|
||||||
# (Optional) Send that thing to the next queue
|
|
||||||
|
|
|
@ -5,151 +5,157 @@
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import External packages
|
||||||
|
##################################
|
||||||
import time
|
import time
|
||||||
import datetime
|
import datetime
|
||||||
import redis
|
import redis
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from module.abstract_module import AbstractModule
|
||||||
from packages.Date import Date
|
from packages.Date import Date
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
|
import ConfigLoader
|
||||||
# Config Var
|
|
||||||
max_set_cardinality = 8
|
|
||||||
|
|
||||||
def get_date_range(num_day):
|
|
||||||
curr_date = datetime.date.today()
|
|
||||||
date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2))
|
|
||||||
date_list = []
|
|
||||||
|
|
||||||
for i in range(0, num_day+1):
|
|
||||||
date_list.append(date.substract_day(i))
|
|
||||||
return date_list
|
|
||||||
|
|
||||||
|
|
||||||
def compute_most_posted(server, message):
|
class ModuleStats(AbstractModule):
|
||||||
module, num, keyword, paste_date = message.split(';')
|
"""
|
||||||
|
Module Statistics module for AIL framework
|
||||||
|
"""
|
||||||
|
|
||||||
redis_progression_name_set = 'top_'+ module +'_set_' + paste_date
|
# Config Var
|
||||||
|
MAX_SET_CARDINALITY = 8
|
||||||
# Add/Update in Redis
|
|
||||||
server.hincrby(paste_date, module+'-'+keyword, int(num))
|
|
||||||
|
|
||||||
# Compute Most Posted
|
|
||||||
date = get_date_range(0)[0]
|
|
||||||
# check if this keyword is eligible for progression
|
|
||||||
keyword_total_sum = 0
|
|
||||||
|
|
||||||
curr_value = server.hget(date, module+'-'+keyword)
|
|
||||||
keyword_total_sum += int(curr_value) if curr_value is not None else 0
|
|
||||||
|
|
||||||
if server.zcard(redis_progression_name_set) < max_set_cardinality:
|
|
||||||
server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
|
|
||||||
|
|
||||||
else: # not in set
|
|
||||||
member_set = server.zrangebyscore(redis_progression_name_set, '-inf', '+inf', withscores=True, start=0, num=1)
|
|
||||||
# Member set is a list of (value, score) pairs
|
|
||||||
if int(member_set[0][1]) < keyword_total_sum:
|
|
||||||
#remove min from set and add the new one
|
|
||||||
print(module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
|
|
||||||
server.zrem(redis_progression_name_set, member_set[0][0])
|
|
||||||
server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
|
|
||||||
print(redis_progression_name_set)
|
|
||||||
|
|
||||||
|
|
||||||
def compute_provider_info(server_trend, path):
|
def __init__(self):
|
||||||
redis_all_provider = 'all_provider_set'
|
|
||||||
|
|
||||||
paste = Paste.Paste(path)
|
super(ModuleStats, self).__init__()
|
||||||
|
|
||||||
paste_baseName = paste.p_name.split('.')[0]
|
# Waiting time in secondes between to message proccessed
|
||||||
paste_size = paste._get_p_size()
|
self.pending_seconds = 20
|
||||||
paste_provider = paste.p_source
|
|
||||||
paste_date = str(paste._get_p_date())
|
|
||||||
redis_sum_size_set = 'top_size_set_' + paste_date
|
|
||||||
redis_avg_size_name_set = 'top_avg_size_set_' + paste_date
|
|
||||||
redis_providers_name_set = 'providers_set_' + paste_date
|
|
||||||
|
|
||||||
# Add/Update in Redis
|
# Sent to the logging a description of the module
|
||||||
server_trend.sadd(redis_all_provider, paste_provider)
|
self.redis_logger.info("Makes statistics about valid URL")
|
||||||
|
|
||||||
num_paste = int(server_trend.hincrby(paste_provider+'_num', paste_date, 1))
|
# REDIS #
|
||||||
sum_size = float(server_trend.hincrbyfloat(paste_provider+'_size', paste_date, paste_size))
|
self.r_serv_trend = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Trending")
|
||||||
new_avg = float(sum_size) / float(num_paste)
|
|
||||||
server_trend.hset(paste_provider +'_avg', paste_date, new_avg)
|
self.r_serv_pasteName = ConfigLoader.ConfigLoader().get_redis_conn("Redis_Paste_Name")
|
||||||
|
|
||||||
|
|
||||||
#
|
def compute(self, message):
|
||||||
# Compute Most Posted
|
|
||||||
#
|
|
||||||
|
|
||||||
# Size
|
if len(message.split(';')) > 1:
|
||||||
if server_trend.zcard(redis_sum_size_set) < max_set_cardinality or server_trend.zscore(redis_sum_size_set, paste_provider) != "nil":
|
self.compute_most_posted(message)
|
||||||
server_trend.zadd(redis_sum_size_set, float(num_paste), paste_provider)
|
else:
|
||||||
server_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
|
self.compute_provider_info(message)
|
||||||
else: #set full capacity
|
|
||||||
member_set = server_trend.zrangebyscore(redis_sum_size_set, '-inf', '+inf', withscores=True, start=0, num=1)
|
|
||||||
# Member set is a list of (value, score) pairs
|
|
||||||
if float(member_set[0][1]) < new_avg:
|
|
||||||
#remove min from set and add the new one
|
|
||||||
print('Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
|
|
||||||
server_trend.zrem(redis_sum_size_set, member_set[0][0])
|
|
||||||
server_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider)
|
|
||||||
server_trend.zrem(redis_avg_size_name_set, member_set[0][0])
|
|
||||||
server_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
|
|
||||||
|
|
||||||
|
|
||||||
# Num
|
def get_date_range(self, num_day):
|
||||||
# if set not full or provider already present
|
curr_date = datetime.date.today()
|
||||||
if server_trend.zcard(redis_providers_name_set) < max_set_cardinality or server_trend.zscore(redis_providers_name_set, paste_provider) != "nil":
|
date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2))
|
||||||
server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
|
date_list = []
|
||||||
else: #set at full capacity
|
|
||||||
member_set = server_trend.zrangebyscore(redis_providers_name_set, '-inf', '+inf', withscores=True, start=0, num=1)
|
for i in range(0, num_day+1):
|
||||||
# Member set is a list of (value, score) pairs
|
date_list.append(date.substract_day(i))
|
||||||
if int(member_set[0][1]) < num_paste:
|
return date_list
|
||||||
#remove min from set and add the new one
|
|
||||||
print('Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
|
|
||||||
server_trend.zrem(member_set[0][0])
|
def compute_most_posted(self, message):
|
||||||
server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
|
module, num, keyword, paste_date = message.split(';')
|
||||||
|
|
||||||
|
redis_progression_name_set = 'top_'+ module +'_set_' + paste_date
|
||||||
|
|
||||||
|
# Add/Update in Redis
|
||||||
|
self.r_serv_trend.hincrby(paste_date, module+'-'+keyword, int(num))
|
||||||
|
|
||||||
|
# Compute Most Posted
|
||||||
|
date = self.get_date_range(0)[0]
|
||||||
|
# check if this keyword is eligible for progression
|
||||||
|
keyword_total_sum = 0
|
||||||
|
|
||||||
|
curr_value = self.r_serv_trend.hget(date, module+'-'+keyword)
|
||||||
|
keyword_total_sum += int(curr_value) if curr_value is not None else 0
|
||||||
|
|
||||||
|
if self.r_serv_trend.zcard(redis_progression_name_set) < self.MAX_SET_CARDINALITY:
|
||||||
|
self.r_serv_trend.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
|
||||||
|
|
||||||
|
else: # not in set
|
||||||
|
member_set = self.r_serv_trend.zrangebyscore(redis_progression_name_set, '-inf', '+inf', withscores=True, start=0, num=1)
|
||||||
|
# Member set is a list of (value, score) pairs
|
||||||
|
if int(member_set[0][1]) < keyword_total_sum:
|
||||||
|
#remove min from set and add the new one
|
||||||
|
self.redis_logger.debug(module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
|
||||||
|
self.r_serv_trend.zrem(redis_progression_name_set, member_set[0][0])
|
||||||
|
self.r_serv_trend.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
|
||||||
|
self.redis_logger.debug(redis_progression_name_set)
|
||||||
|
|
||||||
|
|
||||||
|
def compute_provider_info(self, message):
|
||||||
|
redis_all_provider = 'all_provider_set'
|
||||||
|
|
||||||
|
paste = Paste.Paste(message)
|
||||||
|
|
||||||
|
paste_baseName = paste.p_name.split('.')[0]
|
||||||
|
paste_size = paste._get_p_size()
|
||||||
|
paste_provider = paste.p_source
|
||||||
|
paste_date = str(paste._get_p_date())
|
||||||
|
redis_sum_size_set = 'top_size_set_' + paste_date
|
||||||
|
redis_avg_size_name_set = 'top_avg_size_set_' + paste_date
|
||||||
|
redis_providers_name_set = 'providers_set_' + paste_date
|
||||||
|
|
||||||
|
# Add/Update in Redis
|
||||||
|
self.r_serv_pasteName.sadd(paste_baseName, message)
|
||||||
|
self.r_serv_trend.sadd(redis_all_provider, paste_provider)
|
||||||
|
|
||||||
|
num_paste = int(self.r_serv_trend.hincrby(paste_provider+'_num', paste_date, 1))
|
||||||
|
sum_size = float(self.r_serv_trend.hincrbyfloat(paste_provider+'_size', paste_date, paste_size))
|
||||||
|
new_avg = float(sum_size) / float(num_paste)
|
||||||
|
self.r_serv_trend.hset(paste_provider +'_avg', paste_date, new_avg)
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Compute Most Posted
|
||||||
|
#
|
||||||
|
|
||||||
|
# Size
|
||||||
|
if self.r_serv_trend.zcard(redis_sum_size_set) < self.MAX_SET_CARDINALITY or self.r_serv_trend.zscore(redis_sum_size_set, paste_provider) != "nil":
|
||||||
|
self.r_serv_trend.zadd(redis_sum_size_set, float(num_paste), paste_provider)
|
||||||
|
self.r_serv_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
|
||||||
|
else: #set full capacity
|
||||||
|
member_set = self.r_serv_trend.zrangebyscore(redis_sum_size_set, '-inf', '+inf', withscores=True, start=0, num=1)
|
||||||
|
# Member set is a list of (value, score) pairs
|
||||||
|
if float(member_set[0][1]) < new_avg:
|
||||||
|
#remove min from set and add the new one
|
||||||
|
self.redis_logger.debug('Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
|
||||||
|
self.r_serv_trend.zrem(redis_sum_size_set, member_set[0][0])
|
||||||
|
self.r_serv_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider)
|
||||||
|
self.r_serv_trend.zrem(redis_avg_size_name_set, member_set[0][0])
|
||||||
|
self.r_serv_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
|
||||||
|
|
||||||
|
|
||||||
|
# Num
|
||||||
|
# if set not full or provider already present
|
||||||
|
if self.r_serv_trend.zcard(redis_providers_name_set) < self.MAX_SET_CARDINALITY or self.r_serv_trend.zscore(redis_providers_name_set, paste_provider) != "nil":
|
||||||
|
self.r_serv_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
|
||||||
|
else: #set at full capacity
|
||||||
|
member_set = self.r_serv_trend.zrangebyscore(redis_providers_name_set, '-inf', '+inf', withscores=True, start=0, num=1)
|
||||||
|
# Member set is a list of (value, score) pairs
|
||||||
|
if int(member_set[0][1]) < num_paste:
|
||||||
|
#remove min from set and add the new one
|
||||||
|
self.redis_logger.debug('Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
|
||||||
|
self.r_serv_trend.zrem(member_set[0][0])
|
||||||
|
self.r_serv_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
|
||||||
# Port of the redis instance used by pubsublogger
|
|
||||||
publisher.port = 6380
|
|
||||||
# Script is the default channel used for the modules.
|
|
||||||
publisher.channel = 'Script'
|
|
||||||
|
|
||||||
# Section name in bin/packages/modules.cfg
|
module = ModuleStats()
|
||||||
config_section = 'ModuleStats'
|
module.run()
|
||||||
|
|
||||||
# Setup the I/O queues
|
|
||||||
p = Process(config_section)
|
|
||||||
|
|
||||||
# Sent to the logging a description of the module
|
|
||||||
publisher.info("Makes statistics about valid URL")
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv_trend = redis.StrictRedis(
|
|
||||||
host=p.config.get("ARDB_Trending", "host"),
|
|
||||||
port=p.config.get("ARDB_Trending", "port"),
|
|
||||||
db=p.config.get("ARDB_Trending", "db"),
|
|
||||||
decode_responses=True)
|
|
||||||
|
|
||||||
# Endless loop getting messages from the input queue
|
|
||||||
while True:
|
|
||||||
# Get one message from the input queue
|
|
||||||
message = p.get_from_set()
|
|
||||||
|
|
||||||
if message is None:
|
|
||||||
publisher.debug("{} queue is empty, waiting".format(config_section))
|
|
||||||
print('sleeping')
|
|
||||||
time.sleep(20)
|
|
||||||
continue
|
|
||||||
|
|
||||||
else:
|
|
||||||
# Do something with the message from the queue
|
|
||||||
if len(message.split(';')) > 1:
|
|
||||||
compute_most_posted(r_serv_trend, message)
|
|
||||||
else:
|
|
||||||
compute_provider_info(r_serv_trend, message)
|
|
||||||
|
|
112
bin/Phone.py
112
bin/Phone.py
|
@ -11,72 +11,74 @@ It apply phone number regexes on paste content and warn if above a threshold.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import External packages
|
||||||
|
##################################
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
import phonenumbers
|
import phonenumbers
|
||||||
|
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from module.abstract_module import AbstractModule
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
|
|
||||||
|
|
||||||
def search_phone(message):
|
class Phone(AbstractModule):
|
||||||
paste = Paste.Paste(message)
|
"""
|
||||||
content = paste.get_p_content()
|
Phone module for AIL framework
|
||||||
|
"""
|
||||||
|
|
||||||
# regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
|
# regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
|
||||||
reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
|
# reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
|
||||||
reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
|
REG_PHONE = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
|
||||||
# list of the regex results in the Paste, may be null
|
|
||||||
results = reg_phone.findall(content)
|
|
||||||
|
|
||||||
# if the list is greater than 4, we consider the Paste may contain a list of phone numbers
|
|
||||||
if len(results) > 4:
|
|
||||||
print(results)
|
|
||||||
publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))
|
|
||||||
|
|
||||||
msg = 'infoleak:automatic-detection="phone-number";{}'.format(message)
|
def __init__(self):
|
||||||
p.populate_set_out(msg, 'Tags')
|
super(Phone, self).__init__()
|
||||||
|
|
||||||
|
# Waiting time in secondes between to message proccessed
|
||||||
|
self.pending_seconds = 1
|
||||||
|
|
||||||
|
|
||||||
|
def compute(self, message):
|
||||||
|
paste = Paste.Paste(message)
|
||||||
|
content = paste.get_p_content()
|
||||||
|
# List of the regex results in the Paste, may be null
|
||||||
|
results = self.REG_PHONE.findall(content)
|
||||||
|
|
||||||
|
# If the list is greater than 4, we consider the Paste may contain a list of phone numbers
|
||||||
|
if len(results) > 4:
|
||||||
|
self.redis_logger.debug(results)
|
||||||
|
self.redis_logger.warning('{} contains PID (phone numbers)'.format(paste.p_name))
|
||||||
|
|
||||||
|
msg = 'infoleak:automatic-detection="phone-number";{}'.format(message)
|
||||||
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
|
|
||||||
|
# Send to duplicate
|
||||||
|
self.process.populate_set_out(message, 'Duplicate')
|
||||||
|
|
||||||
|
stats = {}
|
||||||
|
for phone_number in results:
|
||||||
|
try:
|
||||||
|
x = phonenumbers.parse(phone_number, None)
|
||||||
|
country_code = x.country_code
|
||||||
|
if stats.get(country_code) is None:
|
||||||
|
stats[country_code] = 1
|
||||||
|
else:
|
||||||
|
stats[country_code] = stats[country_code] + 1
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
for country_code in stats:
|
||||||
|
if stats[country_code] > 4:
|
||||||
|
self.redis_logger.warning('{} contains Phone numbers with country code {}'.format(paste.p_name, country_code))
|
||||||
|
|
||||||
#Send to duplicate
|
|
||||||
p.populate_set_out(message, 'Duplicate')
|
|
||||||
stats = {}
|
|
||||||
for phone_number in results:
|
|
||||||
try:
|
|
||||||
x = phonenumbers.parse(phone_number, None)
|
|
||||||
country_code = x.country_code
|
|
||||||
if stats.get(country_code) is None:
|
|
||||||
stats[country_code] = 1
|
|
||||||
else:
|
|
||||||
stats[country_code] = stats[country_code] + 1
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
for country_code in stats:
|
|
||||||
if stats[country_code] > 4:
|
|
||||||
publisher.warning('{} contains Phone numbers with country code {}'.format(paste.p_name, country_code))
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
|
||||||
# Port of the redis instance used by pubsublogger
|
module = Phone()
|
||||||
publisher.port = 6380
|
module.run()
|
||||||
# Script is the default channel used for the modules.
|
|
||||||
publisher.channel = 'Script'
|
|
||||||
|
|
||||||
# Section name in bin/packages/modules.cfg
|
|
||||||
config_section = 'Phone'
|
|
||||||
|
|
||||||
# Setup the I/O queues
|
|
||||||
p = Process(config_section)
|
|
||||||
|
|
||||||
# Sent to the logging a description of the module
|
|
||||||
publisher.info("Run Phone module")
|
|
||||||
|
|
||||||
# Endless loop getting messages from the input queue
|
|
||||||
while True:
|
|
||||||
# Get one message from the input queue
|
|
||||||
message = p.get_from_set()
|
|
||||||
if message is None:
|
|
||||||
publisher.debug("{} queue is empty, waiting".format(config_section))
|
|
||||||
time.sleep(1)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Do something with the message from the queue
|
|
||||||
search_phone(message)
|
|
||||||
|
|
|
@ -5,30 +5,26 @@ The TermTracker Module
|
||||||
===================
|
===================
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import External packages
|
||||||
|
##################################
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import signal
|
import signal
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
from module.abstract_module import AbstractModule
|
||||||
import NotificationHelper
|
import NotificationHelper
|
||||||
|
|
||||||
from packages import Item
|
from packages import Item
|
||||||
from packages import Term
|
from packages import Term
|
||||||
|
|
||||||
from lib import Tracker
|
from lib import Tracker
|
||||||
|
|
||||||
full_item_url = "/object/item?id="
|
|
||||||
|
|
||||||
mail_body_template = "AIL Framework,\nNew occurrence for term tracked term: {}\nitem id: {}\nurl: {}{}"
|
|
||||||
|
|
||||||
# loads tracked words
|
|
||||||
list_tracked_words = Term.get_tracked_words_list()
|
|
||||||
last_refresh_word = time.time()
|
|
||||||
set_tracked_words_list = Term.get_set_tracked_words_list()
|
|
||||||
last_refresh_set = time.time()
|
|
||||||
|
|
||||||
class TimeoutException(Exception):
|
class TimeoutException(Exception):
|
||||||
pass
|
pass
|
||||||
|
@ -36,89 +32,105 @@ def timeout_handler(signum, frame):
|
||||||
raise TimeoutException
|
raise TimeoutException
|
||||||
signal.signal(signal.SIGALRM, timeout_handler)
|
signal.signal(signal.SIGALRM, timeout_handler)
|
||||||
|
|
||||||
def new_term_found(term, term_type, item_id, item_date):
|
|
||||||
uuid_list = Term.get_term_uuid_list(term, term_type)
|
|
||||||
print('new tracked term found: {} in {}'.format(term, item_id))
|
|
||||||
|
|
||||||
for term_uuid in uuid_list:
|
class TermTrackerMod(AbstractModule):
|
||||||
Term.add_tracked_item(term_uuid, item_id, item_date)
|
|
||||||
|
|
||||||
tags_to_add = Term.get_term_tags(term_uuid)
|
mail_body_template = "AIL Framework,\nNew occurrence for term tracked term: {}\nitem id: {}\nurl: {}{}"
|
||||||
for tag in tags_to_add:
|
|
||||||
msg = '{};{}'.format(tag, item_id)
|
|
||||||
p.populate_set_out(msg, 'Tags')
|
|
||||||
|
|
||||||
mail_to_notify = Term.get_term_mails(term_uuid)
|
"""
|
||||||
if mail_to_notify:
|
TermTrackerMod module for AIL framework
|
||||||
mail_subject = Tracker.get_email_subject(term_uuid)
|
"""
|
||||||
mail_body = mail_body_template.format(term, item_id, full_item_url, item_id)
|
def __init__(self):
|
||||||
for mail in mail_to_notify:
|
super(TermTrackerMod, self).__init__()
|
||||||
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
|
|
||||||
|
self.pending_seconds = 5
|
||||||
|
|
||||||
|
self.max_execution_time = self.process.config.getint('TermTrackerMod', "max_execution_time")
|
||||||
|
|
||||||
|
self.full_item_url = self.process.config.get("Notifications", "ail_domain") + "/object/item?id="
|
||||||
|
|
||||||
|
# loads tracked words
|
||||||
|
self.list_tracked_words = Term.get_tracked_words_list()
|
||||||
|
self.last_refresh_word = time.time()
|
||||||
|
self.set_tracked_words_list = Term.get_set_tracked_words_list()
|
||||||
|
self.last_refresh_set = time.time()
|
||||||
|
|
||||||
|
# Send module state to logs
|
||||||
|
self.redis_logger.info("Module %s initialized"%(self._module_name()))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def compute(self, item_id):
|
||||||
|
# Cast message as Item
|
||||||
|
item_date = Item.get_item_date(item_id)
|
||||||
|
item_content = Item.get_item_content(item_id)
|
||||||
|
|
||||||
publisher.port = 6380
|
signal.alarm(self.max_execution_time)
|
||||||
publisher.channel = "Script"
|
|
||||||
publisher.info("Script TermTrackerMod started")
|
|
||||||
|
|
||||||
config_section = 'TermTrackerMod'
|
dict_words_freq = None
|
||||||
p = Process(config_section)
|
try:
|
||||||
max_execution_time = p.config.getint(config_section, "max_execution_time")
|
dict_words_freq = Term.get_text_word_frequency(item_content)
|
||||||
|
except TimeoutException:
|
||||||
full_item_url = p.config.get("Notifications", "ail_domain") + full_item_url
|
self.redis_logger.warning("{0} processing timeout".format(item_id))
|
||||||
|
else:
|
||||||
while True:
|
signal.alarm(0)
|
||||||
|
|
||||||
item_id = p.get_from_set()
|
|
||||||
|
|
||||||
if item_id is not None:
|
|
||||||
|
|
||||||
item_date = Item.get_item_date(item_id)
|
|
||||||
item_content = Item.get_item_content(item_id)
|
|
||||||
|
|
||||||
signal.alarm(max_execution_time)
|
|
||||||
try:
|
|
||||||
dict_words_freq = Term.get_text_word_frequency(item_content)
|
|
||||||
except TimeoutException:
|
|
||||||
print ("{0} processing timeout".format(item_id))
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
signal.alarm(0)
|
|
||||||
|
|
||||||
|
if dict_words_freq:
|
||||||
# create token statistics
|
# create token statistics
|
||||||
#for word in dict_words_freq:
|
#for word in dict_words_freq:
|
||||||
# Term.create_token_statistics(item_date, word, dict_words_freq[word])
|
# Term.create_token_statistics(item_date, word, dict_words_freq[word])
|
||||||
|
|
||||||
# check solo words
|
# check solo words
|
||||||
for word in list_tracked_words:
|
for word in self.list_tracked_words:
|
||||||
if word in dict_words_freq:
|
if word in dict_words_freq:
|
||||||
new_term_found(word, 'word', item_id, item_date)
|
self.new_term_found(word, 'word', item_id, item_date)
|
||||||
|
|
||||||
# check words set
|
# check words set
|
||||||
for elem in set_tracked_words_list:
|
for elem in self.set_tracked_words_list:
|
||||||
list_words = elem[0]
|
list_words = elem[0]
|
||||||
nb_words_threshold = elem[1]
|
nb_words_threshold = elem[1]
|
||||||
word_set = elem[2]
|
word_set = elem[2]
|
||||||
nb_uniq_word = 0
|
nb_uniq_word = 0
|
||||||
|
|
||||||
for word in list_words:
|
for word in list_words:
|
||||||
if word in dict_words_freq:
|
if word in dict_words_freq:
|
||||||
nb_uniq_word += 1
|
nb_uniq_word += 1
|
||||||
if nb_uniq_word >= nb_words_threshold:
|
if nb_uniq_word >= nb_words_threshold:
|
||||||
new_term_found(word_set, 'set', item_id, item_date)
|
self.new_term_found(word_set, 'set', item_id, item_date)
|
||||||
|
|
||||||
else:
|
# refresh Tracked term
|
||||||
time.sleep(5)
|
if self.last_refresh_word < Term.get_tracked_term_last_updated_by_type('word'):
|
||||||
|
self.list_tracked_words = Term.get_tracked_words_list()
|
||||||
|
self.last_refresh_word = time.time()
|
||||||
|
self.redis_logger.debug('Tracked word refreshed')
|
||||||
|
|
||||||
|
if self.last_refresh_set < Term.get_tracked_term_last_updated_by_type('set'):
|
||||||
|
self.set_tracked_words_list = Term.get_set_tracked_words_list()
|
||||||
|
self.last_refresh_set = time.time()
|
||||||
|
self.redis_logger.debug('Tracked set refreshed')
|
||||||
|
|
||||||
|
|
||||||
# refresh Tracked term
|
def new_term_found(self, term, term_type, item_id, item_date):
|
||||||
if last_refresh_word < Term.get_tracked_term_last_updated_by_type('word'):
|
uuid_list = Term.get_term_uuid_list(term, term_type)
|
||||||
list_tracked_words = Term.get_tracked_words_list()
|
self.redis_logger.info('new tracked term found: {} in {}'.format(term, item_id))
|
||||||
last_refresh_word = time.time()
|
|
||||||
print('Tracked word refreshed')
|
|
||||||
|
|
||||||
if last_refresh_set < Term.get_tracked_term_last_updated_by_type('set'):
|
for term_uuid in uuid_list:
|
||||||
set_tracked_words_list = Term.get_set_tracked_words_list()
|
Term.add_tracked_item(term_uuid, item_id, item_date)
|
||||||
last_refresh_set = time.time()
|
|
||||||
print('Tracked set refreshed')
|
tags_to_add = Term.get_term_tags(term_uuid)
|
||||||
|
for tag in tags_to_add:
|
||||||
|
msg = '{};{}'.format(tag, item_id)
|
||||||
|
self.process.populate_set_out(msg, 'Tags')
|
||||||
|
|
||||||
|
mail_to_notify = Term.get_term_mails(term_uuid)
|
||||||
|
if mail_to_notify:
|
||||||
|
mail_subject = Tracker.get_email_subject(term_uuid)
|
||||||
|
mail_body = TermTrackerMod.mail_body_template.format(term, item_id, self.full_item_url, item_id)
|
||||||
|
for mail in mail_to_notify:
|
||||||
|
self.redis_logger.debug('Send Mail {}'.format(mail_subject))
|
||||||
|
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
module = TermTrackerMod()
|
||||||
|
module.run()
|
||||||
|
|
294
bin/Web.py
294
bin/Web.py
|
@ -9,152 +9,186 @@ This module tries to parse URLs and warns if some defined contry code are presen
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import External packages
|
||||||
|
##################################
|
||||||
import redis
|
import redis
|
||||||
import pprint
|
import pprint
|
||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
import dns.exception
|
import dns.exception
|
||||||
from packages import Paste
|
|
||||||
from packages import lib_refine
|
|
||||||
from pubsublogger import publisher
|
|
||||||
from pyfaup.faup import Faup
|
from pyfaup.faup import Faup
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# Country and ASN lookup
|
# Country and ASN lookup
|
||||||
from cymru.ip2asn.dns import DNSClient as ip2asn
|
from cymru.ip2asn.dns import DNSClient as ip2asn
|
||||||
import socket
|
import socket
|
||||||
import pycountry
|
import pycountry
|
||||||
import ipaddress
|
import ipaddress
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from module.abstract_module import AbstractModule
|
||||||
|
from packages import Paste
|
||||||
|
from packages import lib_refine
|
||||||
|
from pubsublogger import publisher
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
|
|
||||||
# Used to prevent concat with empty fields due to url parsing
|
|
||||||
def avoidNone(a_string):
|
|
||||||
if a_string is None:
|
|
||||||
return ""
|
|
||||||
else:
|
|
||||||
return a_string
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
class Web(AbstractModule):
|
||||||
publisher.port = 6380
|
"""
|
||||||
publisher.channel = "Script"
|
Web module for AIL framework
|
||||||
|
"""
|
||||||
config_section = 'Web'
|
|
||||||
|
|
||||||
p = Process(config_section)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv2 = redis.StrictRedis(
|
|
||||||
host=p.config.get("Redis_Cache", "host"),
|
|
||||||
port=p.config.getint("Redis_Cache", "port"),
|
|
||||||
db=p.config.getint("Redis_Cache", "db"),
|
|
||||||
decode_responses=True)
|
|
||||||
|
|
||||||
# Protocol file path
|
|
||||||
protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
|
|
||||||
p.config.get("Directories", "protocolsfile"))
|
|
||||||
|
|
||||||
# Country to log as critical
|
|
||||||
cc_critical = p.config.get("Url", "cc_critical")
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("Script URL Started")
|
|
||||||
|
|
||||||
message = p.get_from_set()
|
|
||||||
prec_filename = None
|
|
||||||
faup = Faup()
|
|
||||||
|
|
||||||
# Get all uri from protocolsfile (Used for Curve)
|
|
||||||
uri_scheme = ""
|
|
||||||
with open(protocolsfile_path, 'r') as scheme_file:
|
|
||||||
for scheme in scheme_file:
|
|
||||||
uri_scheme += scheme[:-1]+"|"
|
|
||||||
uri_scheme = uri_scheme[:-1]
|
|
||||||
|
|
||||||
url_regex = "("+uri_scheme+")\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
|
|
||||||
|
|
||||||
while True:
|
|
||||||
if message is not None:
|
|
||||||
filename, score = message.split()
|
|
||||||
|
|
||||||
if prec_filename is None or filename != prec_filename:
|
|
||||||
domains_list = []
|
|
||||||
PST = Paste.Paste(filename)
|
|
||||||
client = ip2asn()
|
|
||||||
for x in PST.get_regex(url_regex):
|
|
||||||
matching_url = re.search(url_regex, PST.get_p_content())
|
|
||||||
url = matching_url.group(0)
|
|
||||||
|
|
||||||
to_send = "{} {} {}".format(url, PST._get_p_date(), filename)
|
|
||||||
p.populate_set_out(to_send, 'Url')
|
|
||||||
|
|
||||||
faup.decode(url)
|
|
||||||
domain = faup.get_domain()
|
|
||||||
subdomain = faup.get_subdomain()
|
|
||||||
|
|
||||||
publisher.debug('{} Published'.format(url))
|
|
||||||
|
|
||||||
if subdomain is not None:
|
|
||||||
## TODO: # FIXME: remove me
|
|
||||||
try:
|
|
||||||
subdomain = subdomain.decode()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if domain is not None:
|
|
||||||
## TODO: # FIXME: remove me
|
|
||||||
try:
|
|
||||||
domain = domain.decode()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
domains_list.append(domain)
|
|
||||||
|
|
||||||
hostl = avoidNone(subdomain) + avoidNone(domain)
|
|
||||||
|
|
||||||
try:
|
|
||||||
socket.setdefaulttimeout(1)
|
|
||||||
ip = socket.gethostbyname(hostl)
|
|
||||||
except:
|
|
||||||
# If the resolver is not giving any IPv4 address,
|
|
||||||
# ASN/CC lookup is skip.
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
l = client.lookup(ip, qType='IP')
|
|
||||||
|
|
||||||
except ipaddress.AddressValueError:
|
|
||||||
continue
|
|
||||||
cc = getattr(l, 'cc')
|
|
||||||
asn = ''
|
|
||||||
if getattr(l, 'asn') is not None:
|
|
||||||
asn = getattr(l, 'asn')[2:] #remobe b'
|
|
||||||
|
|
||||||
# EU is not an official ISO 3166 code (but used by RIPE
|
|
||||||
# IP allocation)
|
|
||||||
if cc is not None and cc != "EU":
|
|
||||||
print(hostl, asn, cc, \
|
|
||||||
pycountry.countries.get(alpha_2=cc).name)
|
|
||||||
if cc == cc_critical:
|
|
||||||
to_print = 'Url;{};{};{};Detected {} {}'.format(
|
|
||||||
PST.p_source, PST.p_date, PST.p_name,
|
|
||||||
hostl, cc)
|
|
||||||
#publisher.warning(to_print)
|
|
||||||
print(to_print)
|
|
||||||
else:
|
|
||||||
print(hostl, asn, cc)
|
|
||||||
|
|
||||||
A_values = lib_refine.checking_A_record(r_serv2,
|
|
||||||
domains_list)
|
|
||||||
|
|
||||||
if A_values[0] >= 1:
|
|
||||||
pprint.pprint(A_values)
|
|
||||||
publisher.info('Url;{};{};{};Checked {} URL;{}'.format(
|
|
||||||
PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path))
|
|
||||||
prec_filename = filename
|
|
||||||
|
|
||||||
|
# Used to prevent concat with empty fields due to url parsing
|
||||||
|
def avoidNone(self, a_string):
|
||||||
|
if a_string is None:
|
||||||
|
return ""
|
||||||
else:
|
else:
|
||||||
publisher.debug("Script url is Idling 10s")
|
return a_string
|
||||||
print('Sleeping')
|
|
||||||
time.sleep(10)
|
|
||||||
|
|
||||||
message = p.get_from_set()
|
def __init__(self):
|
||||||
|
"""
|
||||||
|
Init Web
|
||||||
|
"""
|
||||||
|
super(Web, self).__init__()
|
||||||
|
|
||||||
|
# REDIS Cache
|
||||||
|
self.r_serv2 = redis.StrictRedis(
|
||||||
|
host=self.process.config.get("Redis_Cache", "host"),
|
||||||
|
port=self.process.config.getint("Redis_Cache", "port"),
|
||||||
|
db=self.process.config.getint("Redis_Cache", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
# Country to log as critical
|
||||||
|
self.cc_critical = self.process.config.get("Url", "cc_critical")
|
||||||
|
|
||||||
|
# FUNCTIONS #
|
||||||
|
self.redis_logger.info("Script URL subscribed to channel web_categ")
|
||||||
|
|
||||||
|
# FIXME For retro compatibility
|
||||||
|
self.channel = 'web_categ'
|
||||||
|
|
||||||
|
self.faup = Faup()
|
||||||
|
|
||||||
|
# Protocol file path
|
||||||
|
protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
|
||||||
|
self.process.config.get("Directories", "protocolsfile"))
|
||||||
|
# Get all uri from protocolsfile (Used for Curve)
|
||||||
|
uri_scheme = ""
|
||||||
|
with open(protocolsfile_path, 'r') as scheme_file:
|
||||||
|
for scheme in scheme_file:
|
||||||
|
uri_scheme += scheme[:-1]+"|"
|
||||||
|
uri_scheme = uri_scheme[:-1]
|
||||||
|
|
||||||
|
self.url_regex = "((?i:"+uri_scheme + \
|
||||||
|
")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||||
|
|
||||||
|
self.prec_filename = None
|
||||||
|
|
||||||
|
# Send module state to logs
|
||||||
|
self.redis_logger.info("Module %s initialized" % (self.module_name))
|
||||||
|
|
||||||
|
def compute(self, message):
|
||||||
|
"""
|
||||||
|
Search for Web links from given message
|
||||||
|
"""
|
||||||
|
# Extract item
|
||||||
|
filename, score = message.split()
|
||||||
|
|
||||||
|
if self.prec_filename is None or filename != self.prec_filename:
|
||||||
|
domains_list = set()
|
||||||
|
PST = Paste.Paste(filename)
|
||||||
|
client = ip2asn()
|
||||||
|
|
||||||
|
detected_urls = PST.get_regex(self.url_regex)
|
||||||
|
if len(detected_urls) > 0:
|
||||||
|
to_print = 'Web;{};{};{};'.format(
|
||||||
|
PST.p_source, PST.p_date, PST.p_name)
|
||||||
|
self.redis_logger.info('{}Detected {} URL;{}'.format(
|
||||||
|
to_print, len(detected_urls), PST.p_rel_path))
|
||||||
|
|
||||||
|
for url in detected_urls:
|
||||||
|
self.redis_logger.debug("match regex: %s" % (url))
|
||||||
|
|
||||||
|
# self.redis_logger.debug("match regex search: %s"%(url))
|
||||||
|
|
||||||
|
to_send = "{} {} {}".format(url, PST._get_p_date(), filename)
|
||||||
|
self.process.populate_set_out(to_send, 'Url')
|
||||||
|
self.redis_logger.debug("url_parsed: %s" % (to_send))
|
||||||
|
|
||||||
|
self.faup.decode(url)
|
||||||
|
domain = self.faup.get_domain()
|
||||||
|
subdomain = self.faup.get_subdomain()
|
||||||
|
|
||||||
|
self.redis_logger.debug('{} Published'.format(url))
|
||||||
|
|
||||||
|
if subdomain is not None:
|
||||||
|
# TODO: # FIXME: remove me
|
||||||
|
try:
|
||||||
|
subdomain = subdomain.decode()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if domain is not None:
|
||||||
|
# TODO: # FIXME: remove me
|
||||||
|
try:
|
||||||
|
domain = domain.decode()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
domains_list.add(domain)
|
||||||
|
|
||||||
|
hostl = self.avoidNone(subdomain) + self.avoidNone(domain)
|
||||||
|
|
||||||
|
try:
|
||||||
|
socket.setdefaulttimeout(1)
|
||||||
|
ip = socket.gethostbyname(hostl)
|
||||||
|
# If the resolver is not giving any IPv4 address,
|
||||||
|
# ASN/CC lookup is skip.
|
||||||
|
l = client.lookup(ip, qType='IP')
|
||||||
|
except ipaddress.AddressValueError:
|
||||||
|
self.redis_logger.error(
|
||||||
|
'ASN/CC lookup failed for IP {}'.format(ip))
|
||||||
|
continue
|
||||||
|
except:
|
||||||
|
self.redis_logger.error(
|
||||||
|
'Resolver IPv4 address failed for host {}'.format(hostl))
|
||||||
|
continue
|
||||||
|
|
||||||
|
cc = getattr(l, 'cc')
|
||||||
|
asn = ''
|
||||||
|
if getattr(l, 'asn') is not None:
|
||||||
|
asn = getattr(l, 'asn')[2:] # remobe b'
|
||||||
|
|
||||||
|
# EU is not an official ISO 3166 code (but used by RIPE
|
||||||
|
# IP allocation)
|
||||||
|
if cc is not None and cc != "EU":
|
||||||
|
self.redis_logger.debug('{};{};{};{}'.format(hostl, asn, cc,
|
||||||
|
pycountry.countries.get(alpha_2=cc).name))
|
||||||
|
if cc == self.cc_critical:
|
||||||
|
to_print = 'Url;{};{};{};Detected {} {}'.format(
|
||||||
|
PST.p_source, PST.p_date, PST.p_name,
|
||||||
|
hostl, cc)
|
||||||
|
self.redis_logger.info(to_print)
|
||||||
|
else:
|
||||||
|
self.redis_logger.debug('{};{};{}'.format(hostl, asn, cc))
|
||||||
|
|
||||||
|
A_values = lib_refine.checking_A_record(self.r_serv2,
|
||||||
|
domains_list)
|
||||||
|
|
||||||
|
if A_values[0] >= 1:
|
||||||
|
PST.__setattr__(self.channel, A_values)
|
||||||
|
PST.save_attribute_redis(self.channel, (A_values[0],
|
||||||
|
list(A_values[1])))
|
||||||
|
|
||||||
|
pprint.pprint(A_values)
|
||||||
|
# self.redis_logger.info('Url;{};{};{};Checked {} URL;{}'.format(
|
||||||
|
# PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path))
|
||||||
|
|
||||||
|
self.prec_filename = filename
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
module = Web()
|
||||||
|
module.run()
|
||||||
|
|
338
bin/WebStats.py
338
bin/WebStats.py
|
@ -10,182 +10,198 @@ It consider the TLD, Domain and protocol.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import External packages
|
||||||
|
##################################
|
||||||
import time
|
import time
|
||||||
import datetime
|
import datetime
|
||||||
import redis
|
import redis
|
||||||
import os
|
import os
|
||||||
from packages import lib_words
|
|
||||||
from packages.Date import Date
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from Helper import Process
|
|
||||||
from pyfaup.faup import Faup
|
from pyfaup.faup import Faup
|
||||||
|
|
||||||
# Config Var
|
|
||||||
threshold_total_sum = 200 # Above this value, a keyword is eligible for a progression
|
|
||||||
threshold_increase = 1.0 # The percentage representing the keyword occurence since num_day_to_look
|
|
||||||
max_set_cardinality = 10 # The cardinality of the progression set
|
|
||||||
num_day_to_look = 5 # the detection of the progression start num_day_to_look in the past
|
|
||||||
|
|
||||||
def analyse(server, field_name, date, url_parsed):
|
##################################
|
||||||
field = url_parsed[field_name]
|
# Import Project packages
|
||||||
if field is not None:
|
##################################
|
||||||
try: # faup version
|
from module.abstract_module import AbstractModule
|
||||||
field = field.decode()
|
from packages import lib_words
|
||||||
except:
|
from packages.Date import Date
|
||||||
pass
|
from Helper import Process
|
||||||
server.hincrby(field, date, 1)
|
|
||||||
if field_name == "domain": #save domain in a set for the monthly plot
|
|
||||||
domain_set_name = "domain_set_" + date[0:6]
|
|
||||||
server.sadd(domain_set_name, field)
|
|
||||||
print("added in " + domain_set_name +": "+ field)
|
|
||||||
|
|
||||||
def get_date_range(num_day):
|
|
||||||
curr_date = datetime.date.today()
|
|
||||||
date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2))
|
|
||||||
date_list = []
|
|
||||||
|
|
||||||
for i in range(0, num_day+1):
|
|
||||||
date_list.append(date.substract_day(i))
|
|
||||||
return date_list
|
|
||||||
|
|
||||||
# Compute the progression for one keyword
|
|
||||||
def compute_progression_word(server, num_day, keyword):
|
|
||||||
date_range = get_date_range(num_day)
|
|
||||||
# check if this keyword is eligible for progression
|
|
||||||
keyword_total_sum = 0
|
|
||||||
value_list = []
|
|
||||||
for date in date_range: # get value up to date_range
|
|
||||||
curr_value = server.hget(keyword, date)
|
|
||||||
value_list.append(int(curr_value if curr_value is not None else 0))
|
|
||||||
keyword_total_sum += int(curr_value) if curr_value is not None else 0
|
|
||||||
oldest_value = value_list[-1] if value_list[-1] != 0 else 1 #Avoid zero division
|
|
||||||
|
|
||||||
# The progression is based on the ratio: value[i] / value[i-1]
|
|
||||||
keyword_increase = 0
|
|
||||||
value_list_reversed = value_list[:]
|
|
||||||
value_list_reversed.reverse()
|
|
||||||
for i in range(1, len(value_list_reversed)):
|
|
||||||
divisor = value_list_reversed[i-1] if value_list_reversed[i-1] != 0 else 1
|
|
||||||
keyword_increase += value_list_reversed[i] / divisor
|
|
||||||
|
|
||||||
return (keyword_increase, keyword_total_sum)
|
|
||||||
|
|
||||||
|
|
||||||
'''
|
class WebStats(AbstractModule):
|
||||||
recompute the set top_progression zset
|
"""
|
||||||
- Compute the current field progression
|
WebStats module for AIL framework
|
||||||
- re-compute the current progression for each first 2*max_set_cardinality fields in the top_progression_zset
|
"""
|
||||||
'''
|
|
||||||
def compute_progression(server, field_name, num_day, url_parsed):
|
|
||||||
redis_progression_name_set = "z_top_progression_"+field_name
|
|
||||||
|
|
||||||
keyword = url_parsed[field_name]
|
# Config Var
|
||||||
if keyword is not None:
|
THRESHOLD_TOTAL_SUM = 200 # Above this value, a keyword is eligible for a progression
|
||||||
|
THRESHOLD_INCREASE = 1.0 # The percentage representing the keyword occurence since num_day_to_look
|
||||||
|
MAX_SET_CARDINALITY = 10 # The cardinality of the progression set
|
||||||
|
NUM_DAY_TO_LOOK = 5 # the detection of the progression start num_day_to_look in the past
|
||||||
|
|
||||||
#compute the progression of the current word
|
|
||||||
keyword_increase, keyword_total_sum = compute_progression_word(server, num_day, keyword)
|
|
||||||
|
|
||||||
#re-compute the progression of 2*max_set_cardinality
|
def __init__(self):
|
||||||
current_top = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*max_set_cardinality)
|
super(WebStats, self).__init__()
|
||||||
for word, value in current_top:
|
|
||||||
word_inc, word_tot_sum = compute_progression_word(server, num_day, word)
|
|
||||||
server.zrem(redis_progression_name_set, word)
|
|
||||||
if (word_tot_sum > threshold_total_sum) and (word_inc > threshold_increase):
|
|
||||||
server.zadd(redis_progression_name_set, float(word_inc), word)
|
|
||||||
|
|
||||||
# filter before adding
|
# Send module state to logs
|
||||||
if (keyword_total_sum > threshold_total_sum) and (keyword_increase > threshold_increase):
|
self.redis_logger.info("Module %s initialized"%(self.module_name))
|
||||||
server.zadd(redis_progression_name_set, float(keyword_increase), keyword)
|
# Sent to the logging a description of the module
|
||||||
|
self.redis_logger.info("Makes statistics about valid URL")
|
||||||
|
|
||||||
|
self.pending_seconds = 5*60
|
||||||
|
|
||||||
|
# REDIS #
|
||||||
|
self.r_serv_trend = redis.StrictRedis(
|
||||||
|
host=self.process.config.get("ARDB_Trending", "host"),
|
||||||
|
port=self.process.config.get("ARDB_Trending", "port"),
|
||||||
|
db=self.process.config.get("ARDB_Trending", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
# FILE CURVE SECTION #
|
||||||
|
self.csv_path_proto = os.path.join(os.environ['AIL_HOME'],
|
||||||
|
self.process.config.get("Directories", "protocolstrending_csv"))
|
||||||
|
self.protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
|
||||||
|
self.process.config.get("Directories", "protocolsfile"))
|
||||||
|
|
||||||
|
self.csv_path_tld = os.path.join(os.environ['AIL_HOME'],
|
||||||
|
self.process.config.get("Directories", "tldstrending_csv"))
|
||||||
|
self.tldsfile_path = os.path.join(os.environ['AIL_HOME'],
|
||||||
|
self.process.config.get("Directories", "tldsfile"))
|
||||||
|
|
||||||
|
self.csv_path_domain = os.path.join(os.environ['AIL_HOME'],
|
||||||
|
self.process.config.get("Directories", "domainstrending_csv"))
|
||||||
|
|
||||||
|
self.faup = Faup()
|
||||||
|
self.generate_new_graph = False
|
||||||
|
|
||||||
|
|
||||||
|
def computeNone(self):
|
||||||
|
if self.generate_new_graph:
|
||||||
|
self.generate_new_graph = False
|
||||||
|
|
||||||
|
today = datetime.date.today()
|
||||||
|
year = today.year
|
||||||
|
month = today.month
|
||||||
|
|
||||||
|
self.redis_logger.debug('Building protocol graph')
|
||||||
|
lib_words.create_curve_with_word_file(self.r_serv_trend, csv_path_proto,
|
||||||
|
protocolsfile_path, year,
|
||||||
|
month)
|
||||||
|
|
||||||
|
self.redis_logger.debug('Building tld graph')
|
||||||
|
lib_words.create_curve_with_word_file(self.r_serv_trend, csv_path_tld,
|
||||||
|
tldsfile_path, year,
|
||||||
|
month)
|
||||||
|
|
||||||
|
self.redis_logger.debug('Building domain graph')
|
||||||
|
lib_words.create_curve_from_redis_set(self.r_serv_trend, csv_path_domain,
|
||||||
|
"domain", year,
|
||||||
|
month)
|
||||||
|
self.redis_logger.debug('end building')
|
||||||
|
|
||||||
|
|
||||||
|
def compute(self, message):
|
||||||
|
self.generate_new_graph = True
|
||||||
|
|
||||||
|
# Do something with the message from the queue
|
||||||
|
url, date, path = message.split()
|
||||||
|
self.faup.decode(url)
|
||||||
|
url_parsed = self.faup.get()
|
||||||
|
|
||||||
|
# Scheme analysis
|
||||||
|
self.analyse('scheme', date, url_parsed)
|
||||||
|
# Tld analysis
|
||||||
|
self.analyse('tld', date, url_parsed)
|
||||||
|
# Domain analysis
|
||||||
|
self.analyse('domain', date, url_parsed)
|
||||||
|
|
||||||
|
self.compute_progression('scheme', self.NUM_DAY_TO_LOOK, url_parsed)
|
||||||
|
self.compute_progression('tld', self.NUM_DAY_TO_LOOK, url_parsed)
|
||||||
|
self.compute_progression('domain', self.NUM_DAY_TO_LOOK, url_parsed)
|
||||||
|
|
||||||
|
|
||||||
|
def analyse(self, field_name, date, url_parsed):
|
||||||
|
field = url_parsed[field_name]
|
||||||
|
|
||||||
|
if field is not None:
|
||||||
|
try: # faup version
|
||||||
|
field = field.decode()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
self.r_serv_trend.hincrby(field, date, 1)
|
||||||
|
|
||||||
|
if field_name == "domain": #save domain in a set for the monthly plot
|
||||||
|
domain_set_name = "domain_set_" + date[0:6]
|
||||||
|
self.r_serv_trend.sadd(domain_set_name, field)
|
||||||
|
self.redis_logger.debug("added in " + domain_set_name +": "+ field)
|
||||||
|
|
||||||
|
|
||||||
|
def get_date_range(self, num_day):
|
||||||
|
curr_date = datetime.date.today()
|
||||||
|
date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2))
|
||||||
|
date_list = []
|
||||||
|
|
||||||
|
for i in range(0, num_day+1):
|
||||||
|
date_list.append(date.substract_day(i))
|
||||||
|
return date_list
|
||||||
|
|
||||||
|
|
||||||
|
def compute_progression_word(self, num_day, keyword):
|
||||||
|
"""
|
||||||
|
Compute the progression for one keyword
|
||||||
|
"""
|
||||||
|
date_range = self.get_date_range(num_day)
|
||||||
|
# check if this keyword is eligible for progression
|
||||||
|
keyword_total_sum = 0
|
||||||
|
value_list = []
|
||||||
|
for date in date_range: # get value up to date_range
|
||||||
|
curr_value = self.r_serv_trend.hget(keyword, date)
|
||||||
|
value_list.append(int(curr_value if curr_value is not None else 0))
|
||||||
|
keyword_total_sum += int(curr_value) if curr_value is not None else 0
|
||||||
|
oldest_value = value_list[-1] if value_list[-1] != 0 else 1 #Avoid zero division
|
||||||
|
|
||||||
|
# The progression is based on the ratio: value[i] / value[i-1]
|
||||||
|
keyword_increase = 0
|
||||||
|
value_list_reversed = value_list[:]
|
||||||
|
value_list_reversed.reverse()
|
||||||
|
for i in range(1, len(value_list_reversed)):
|
||||||
|
divisor = value_list_reversed[i-1] if value_list_reversed[i-1] != 0 else 1
|
||||||
|
keyword_increase += value_list_reversed[i] / divisor
|
||||||
|
|
||||||
|
return (keyword_increase, keyword_total_sum)
|
||||||
|
|
||||||
|
|
||||||
|
def compute_progression(self, field_name, num_day, url_parsed):
|
||||||
|
"""
|
||||||
|
recompute the set top_progression zset
|
||||||
|
- Compute the current field progression
|
||||||
|
- re-compute the current progression for each first 2*self.MAX_SET_CARDINALITY fields in the top_progression_zset
|
||||||
|
"""
|
||||||
|
redis_progression_name_set = "z_top_progression_"+field_name
|
||||||
|
|
||||||
|
keyword = url_parsed[field_name]
|
||||||
|
if keyword is not None:
|
||||||
|
|
||||||
|
#compute the progression of the current word
|
||||||
|
keyword_increase, keyword_total_sum = self.compute_progression_word(num_day, keyword)
|
||||||
|
|
||||||
|
#re-compute the progression of 2*self.MAX_SET_CARDINALITY
|
||||||
|
current_top = self.r_serv_trend.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*self.MAX_SET_CARDINALITY)
|
||||||
|
for word, value in current_top:
|
||||||
|
word_inc, word_tot_sum = self.compute_progression_word(num_day, word)
|
||||||
|
self.r_serv_trend.zrem(redis_progression_name_set, word)
|
||||||
|
if (word_tot_sum > self.THRESHOLD_TOTAL_SUM) and (word_inc > self.THRESHOLD_INCREASE):
|
||||||
|
self.r_serv_trend.zadd(redis_progression_name_set, float(word_inc), word)
|
||||||
|
|
||||||
|
# filter before adding
|
||||||
|
if (keyword_total_sum > self.THRESHOLD_TOTAL_SUM) and (keyword_increase > self.THRESHOLD_INCREASE):
|
||||||
|
self.r_serv_trend.zadd(redis_progression_name_set, float(keyword_increase), keyword)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
|
||||||
# Port of the redis instance used by pubsublogger
|
module = WebStats()
|
||||||
publisher.port = 6380
|
module.run()
|
||||||
# Script is the default channel used for the modules.
|
|
||||||
publisher.channel = 'Script'
|
|
||||||
|
|
||||||
# Section name in bin/packages/modules.cfg
|
|
||||||
config_section = 'WebStats'
|
|
||||||
|
|
||||||
# Setup the I/O queues
|
|
||||||
p = Process(config_section)
|
|
||||||
|
|
||||||
# Sent to the logging a description of the module
|
|
||||||
publisher.info("Makes statistics about valid URL")
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv_trend = redis.StrictRedis(
|
|
||||||
host=p.config.get("ARDB_Trending", "host"),
|
|
||||||
port=p.config.get("ARDB_Trending", "port"),
|
|
||||||
db=p.config.get("ARDB_Trending", "db"),
|
|
||||||
decode_responses=True)
|
|
||||||
|
|
||||||
# FILE CURVE SECTION #
|
|
||||||
csv_path_proto = os.path.join(os.environ['AIL_HOME'],
|
|
||||||
p.config.get("Directories", "protocolstrending_csv"))
|
|
||||||
protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
|
|
||||||
p.config.get("Directories", "protocolsfile"))
|
|
||||||
|
|
||||||
csv_path_tld = os.path.join(os.environ['AIL_HOME'],
|
|
||||||
p.config.get("Directories", "tldstrending_csv"))
|
|
||||||
tldsfile_path = os.path.join(os.environ['AIL_HOME'],
|
|
||||||
p.config.get("Directories", "tldsfile"))
|
|
||||||
|
|
||||||
csv_path_domain = os.path.join(os.environ['AIL_HOME'],
|
|
||||||
p.config.get("Directories", "domainstrending_csv"))
|
|
||||||
|
|
||||||
faup = Faup()
|
|
||||||
generate_new_graph = False
|
|
||||||
# Endless loop getting messages from the input queue
|
|
||||||
while True:
|
|
||||||
# Get one message from the input queue
|
|
||||||
message = p.get_from_set()
|
|
||||||
|
|
||||||
if message is None:
|
|
||||||
if generate_new_graph:
|
|
||||||
generate_new_graph = False
|
|
||||||
today = datetime.date.today()
|
|
||||||
year = today.year
|
|
||||||
month = today.month
|
|
||||||
|
|
||||||
print('Building protocol graph')
|
|
||||||
lib_words.create_curve_with_word_file(r_serv_trend, csv_path_proto,
|
|
||||||
protocolsfile_path, year,
|
|
||||||
month)
|
|
||||||
|
|
||||||
print('Building tld graph')
|
|
||||||
lib_words.create_curve_with_word_file(r_serv_trend, csv_path_tld,
|
|
||||||
tldsfile_path, year,
|
|
||||||
month)
|
|
||||||
|
|
||||||
print('Building domain graph')
|
|
||||||
lib_words.create_curve_from_redis_set(r_serv_trend, csv_path_domain,
|
|
||||||
"domain", year,
|
|
||||||
month)
|
|
||||||
print('end building')
|
|
||||||
|
|
||||||
|
|
||||||
publisher.debug("{} queue is empty, waiting".format(config_section))
|
|
||||||
print('sleeping')
|
|
||||||
time.sleep(5*60)
|
|
||||||
continue
|
|
||||||
|
|
||||||
else:
|
|
||||||
generate_new_graph = True
|
|
||||||
# Do something with the message from the queue
|
|
||||||
url, date, path = message.split()
|
|
||||||
faup.decode(url)
|
|
||||||
url_parsed = faup.get()
|
|
||||||
|
|
||||||
# Scheme analysis
|
|
||||||
analyse(r_serv_trend, 'scheme', date, url_parsed)
|
|
||||||
# Tld analysis
|
|
||||||
analyse(r_serv_trend, 'tld', date, url_parsed)
|
|
||||||
# Domain analysis
|
|
||||||
analyse(r_serv_trend, 'domain', date, url_parsed)
|
|
||||||
|
|
||||||
compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed)
|
|
||||||
compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed)
|
|
||||||
compute_progression(r_serv_trend, 'domain', num_day_to_look, url_parsed)
|
|
||||||
|
|
0
bin/module/__init__.py
Normal file
0
bin/module/__init__.py
Normal file
98
bin/module/abstract_module.py
Normal file
98
bin/module/abstract_module.py
Normal file
|
@ -0,0 +1,98 @@
|
||||||
|
# coding: utf-8
|
||||||
|
"""
|
||||||
|
Base Class for AIL Modules
|
||||||
|
"""
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import External packages
|
||||||
|
##################################
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
import time
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from pubsublogger import publisher
|
||||||
|
from Helper import Process
|
||||||
|
|
||||||
|
|
||||||
|
class AbstractModule(ABC):
|
||||||
|
"""
|
||||||
|
Abstract Module class
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, module_name=None, queue_name=None):
|
||||||
|
"""
|
||||||
|
Init Module
|
||||||
|
module_name: str; set the module name if different from the instance ClassName
|
||||||
|
"""
|
||||||
|
# Module name if provided else instance className
|
||||||
|
self.module_name = module_name if module_name else self._module_name()
|
||||||
|
|
||||||
|
# Module name if provided else instance className
|
||||||
|
self.queue_name = queue_name if queue_name else self._module_name()
|
||||||
|
|
||||||
|
# Init Redis Logger
|
||||||
|
self.redis_logger = publisher
|
||||||
|
# Port of the redis instance used by pubsublogger
|
||||||
|
self.redis_logger.port = 6380
|
||||||
|
# Channel name to publish logs
|
||||||
|
self.redis_logger.channel = 'Script'
|
||||||
|
# TODO modify generic channel Script to a namespaced channel like:
|
||||||
|
# publish module logs to script:<ModuleName> channel
|
||||||
|
# self.redis_logger.channel = 'script:%s'%(self.module_name)
|
||||||
|
|
||||||
|
# Run module endlessly
|
||||||
|
self.proceed = True
|
||||||
|
|
||||||
|
# Waiting time in secondes between two proccessed messages
|
||||||
|
self.pending_seconds = 10
|
||||||
|
|
||||||
|
# Setup the I/O queues
|
||||||
|
self.process = Process(self.queue_name)
|
||||||
|
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
"""
|
||||||
|
Run Module endless process
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Endless loop processing messages from the input queue
|
||||||
|
while self.proceed:
|
||||||
|
# Get one message (paste) from the QueueIn (copy of Redis_Global publish)
|
||||||
|
message = self.process.get_from_set()
|
||||||
|
|
||||||
|
if message is None:
|
||||||
|
self.computeNone()
|
||||||
|
# Wait before next process
|
||||||
|
self.redis_logger.debug('%s, waiting for new message, Idling %ds'%(self.module_name, self.pending_seconds))
|
||||||
|
time.sleep(self.pending_seconds)
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Module processing with the message from the queue
|
||||||
|
self.compute(message)
|
||||||
|
except Exception as err:
|
||||||
|
self.redis_logger.error("Error in module %s: %s"%(self.module_name, err))
|
||||||
|
|
||||||
|
|
||||||
|
def _module_name(self):
|
||||||
|
"""
|
||||||
|
Returns the instance class name (ie. the Module Name)
|
||||||
|
"""
|
||||||
|
return self.__class__.__name__
|
||||||
|
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def compute(self, message):
|
||||||
|
"""
|
||||||
|
Main method of the Module to implement
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def computeNone(self):
|
||||||
|
"""
|
||||||
|
Method of the Module when there is no message
|
||||||
|
"""
|
||||||
|
pass
|
|
@ -1,45 +1,57 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
"""
|
"""
|
||||||
Template for new modules
|
The Template Module
|
||||||
|
======================
|
||||||
|
|
||||||
|
This module is a template for Template for new modules
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import External packages
|
||||||
|
##################################
|
||||||
import time
|
import time
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from module.abstract_module import AbstractModule
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
|
|
||||||
|
|
||||||
def do_something(message):
|
class Template(AbstractModule):
|
||||||
return None
|
"""
|
||||||
|
Template module for AIL framework
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(Template, self).__init__()
|
||||||
|
|
||||||
|
# Send module state to logs
|
||||||
|
self.redis_logger.info("Module %s initialized"%(self.module_name))
|
||||||
|
|
||||||
|
# Pending time between two computation in seconds
|
||||||
|
self.pending_seconds = 10
|
||||||
|
|
||||||
|
|
||||||
|
def computeNone(self):
|
||||||
|
"""
|
||||||
|
Compute when no message in queue
|
||||||
|
"""
|
||||||
|
self.redis_logger.debug("No message in queue")
|
||||||
|
|
||||||
|
|
||||||
|
def compute(self, message):
|
||||||
|
"""
|
||||||
|
Compute a message in queue
|
||||||
|
"""
|
||||||
|
self.redis_logger.debug("Compute message in queue")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
|
||||||
# Port of the redis instance used by pubsublogger
|
module = Template()
|
||||||
publisher.port = 6380
|
module.run()
|
||||||
# Script is the default channel used for the modules.
|
|
||||||
publisher.channel = 'Script'
|
|
||||||
|
|
||||||
# Section name in bin/packages/modules.cfg
|
|
||||||
config_section = '<section name>'
|
|
||||||
|
|
||||||
# Setup the I/O queues
|
|
||||||
p = Process(config_section)
|
|
||||||
|
|
||||||
# Sent to the logging a description of the module
|
|
||||||
publisher.info("<description of the module>")
|
|
||||||
|
|
||||||
# Endless loop getting messages from the input queue
|
|
||||||
while True:
|
|
||||||
# Get one message from the input queue
|
|
||||||
message = p.get_from_set()
|
|
||||||
if message is None:
|
|
||||||
publisher.debug("{} queue is empty, waiting".format(config_section))
|
|
||||||
time.sleep(1)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Do something with the message from the queue
|
|
||||||
something_has_been_done = do_something(message)
|
|
||||||
|
|
||||||
# (Optional) Send that thing to the next queue
|
|
||||||
p.populate_set_out(something_has_been_done)
|
|
||||||
|
|
Loading…
Reference in a new issue