fix: stuck queues and submit paste

This commit is contained in:
Olivier SAGIT 2021-04-28 15:24:33 +02:00
parent f133207fd6
commit f88e53925e
15 changed files with 1458 additions and 1106 deletions

View file

@ -23,6 +23,9 @@ Redis organization:
""" """
##################################
# Import External packages
##################################
import time import time
import os import os
import sys import sys
@ -30,34 +33,30 @@ import datetime
import re import re
import redis import redis
from pyfaup.faup import Faup from pyfaup.faup import Faup
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process import lib.regex_helper as regex_helper
import signal
##################################
# Import Project packages
##################################
from module.abstract_module import AbstractModule
from Helper import Process
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Item import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader import ConfigLoader
import regex_helper
## LOAD CONFIG ##
config_loader = ConfigLoader.ConfigLoader()
server_cred = config_loader.get_redis_conn("ARDB_TermCred")
server_statistics = config_loader.get_redis_conn("ARDB_Statistics")
minimumLengthThreshold = config_loader.get_config_int("Credential", "minimumLengthThreshold") class Credential(AbstractModule):
criticalNumberToAlert = config_loader.get_config_int("Credential", "criticalNumberToAlert") """
minTopPassList = config_loader.get_config_int("Credential", "minTopPassList") Credential module for AIL framework
"""
config_loader = None
## -- ##
import signal
max_execution_time = 30 max_execution_time = 30
#split username with spec. char or with upper case, distinguish start with upper # Split username with spec. char or with upper case, distinguish start with upper
REGEX_CRED = "[a-z]+|[A-Z]{3,}|[A-Z]{1,2}[a-z]+|[0-9]+" REGEX_CRED = "[a-z]+|[A-Z]{3,}|[A-Z]{1,2}[a-z]+|[0-9]+"
REDIS_KEY_NUM_USERNAME = 'uniqNumForUsername' REDIS_KEY_NUM_USERNAME = 'uniqNumForUsername'
REDIS_KEY_NUM_PATH = 'uniqNumForUsername' REDIS_KEY_NUM_PATH = 'uniqNumForUsername'
@ -67,62 +66,67 @@ REDIS_KEY_ALL_PATH_SET = 'AllPath'
REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev' REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev'
REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping' REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping'
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = "Credential"
module_name = "Credential"
p = Process(config_section)
publisher.info("Find credentials")
faup = Faup() def __init__(self):
super(Credential, self).__init__()
regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" self.faup = Faup()
#regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"
redis_cache_key = regex_helper.generate_redis_cache_key(module_name) self.regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
self.regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
self.regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"
while True: self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
message = p.get_from_set()
if message is None: # Database
publisher.debug("Script Credential is Idling 10s") self.server_cred = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_TermCred")
time.sleep(10) self.server_statistics = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Statistics")
continue
# Config values
self.minimumLengthThreshold = ConfigLoader.ConfigLoader().get_config_int("Credential", "minimumLengthThreshold")
self.criticalNumberToAlert = ConfigLoader.ConfigLoader().get_config_int("Credential", "criticalNumberToAlert")
# Waiting time in secondes between to message proccessed
self.pending_seconds = 10
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message):
item_id, count = message.split() item_id, count = message.split()
item_content = Item.get_item_content(item_id) item_content = Item.get_item_content(item_id)
# Extract all credentials # Extract all credentials
all_credentials = regex_helper.regex_findall(module_name, redis_cache_key, regex_cred, item_id, item_content, max_time=max_execution_time) all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item_id, item_content, max_time=Credential.max_execution_time)
if not all_credentials: if all_credentials:
continue nb_cred = len(all_credentials)
message = f'Checked {nb_cred} credentials found.'
all_sites = regex_helper.regex_findall(module_name, redis_cache_key, regex_web, item_id, item_content, max_time=max_execution_time) all_sites = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_web, item_id, item_content, max_time=Credential.max_execution_time)
message = 'Checked {} credentials found.'.format(len(all_credentials))
if all_sites: if all_sites:
message += ' Related websites: {}'.format( (', '.join(all_sites)) ) discovered_sites = ', '.join(all_sites)
print(message) message += f' Related websites: {discovered_sites}'
to_print = 'Credential;{};{};{};{};{}'.format(Item.get_source(item_id), Item.get_item_date(item_id), Item.get_item_basename(item_id), message, item_id) self.redis_logger.debug(message)
to_print = f'Credential;{Item.get_source(item_id)};{Item.get_item_date(item_id)};{Item.get_item_basename(item_id)};{message};{item_id}'
#num of creds above tresh, publish an alert #num of creds above tresh, publish an alert
if len(all_credentials) > criticalNumberToAlert: if nb_cred > self.criticalNumberToAlert:
print("========> Found more than 10 credentials in this file : {}".format( item_id )) self.redis_logger.debug(f"========> Found more than 10 credentials in this file : {item_id}")
publisher.warning(to_print) self.redis_logger.warning(to_print)
# Send to duplicate # Send to duplicate
p.populate_set_out(item_id, 'Duplicate') self.process.populate_set_out(item_id, 'Duplicate')
msg = 'infoleak:automatic-detection="credential";{}'.format(item_id) msg = f'infoleak:automatic-detection="credential";{item_id}'
p.populate_set_out(msg, 'Tags') self.process.populate_set_out(msg, 'Tags')
site_occurence = regex_helper.regex_findall(module_name, redis_cache_key, regex_site_for_stats, item_id, item_content, max_time=max_execution_time, r_set=False) site_occurence = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_site_for_stats, item_id, item_content, max_time=Credential.max_execution_time, r_set=False)
creds_sites = {} creds_sites = {}
@ -134,8 +138,8 @@ if __name__ == "__main__":
creds_sites[site_domain] = 1 creds_sites[site_domain] = 1
for url in all_sites: for url in all_sites:
faup.decode(url) self.faup.decode(url)
domain = faup.get()['domain'] domain = self.faup.get()['domain']
## TODO: # FIXME: remove me ## TODO: # FIXME: remove me
try: try:
domain = domain.decode() domain = domain.decode()
@ -148,51 +152,58 @@ if __name__ == "__main__":
for site, num in creds_sites.items(): # Send for each different site to moduleStats for site, num in creds_sites.items(): # Send for each different site to moduleStats
mssg = 'credential;{};{};{}'.format(num, site, Item.get_item_date(item_id)) mssg = f'credential;{num};{site};{Item.get_item_date(item_id)}'
print(mssg) self.redis_logger.debug(mssg)
p.populate_set_out(mssg, 'ModuleStats') self.process.populate_set_out(mssg, 'ModuleStats')
if all_sites: if all_sites:
print("=======> Probably on : {}".format(', '.join(all_sites))) discovered_sites = ', '.join(all_sites)
self.redis_logger.debug(f"=======> Probably on : {discovered_sites}")
date = datetime.datetime.now().strftime("%Y%m") date = datetime.datetime.now().strftime("%Y%m")
for cred in all_credentials: for cred in all_credentials:
maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0] maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0]
faup.decode(maildomains) self.faup.decode(maildomains)
tld = faup.get()['tld'] tld = self.faup.get()['tld']
## TODO: # FIXME: remove me ## TODO: # FIXME: remove me
try: try:
tld = tld.decode() tld = tld.decode()
except: except:
pass pass
server_statistics.hincrby('credential_by_tld:'+date, tld, 1) self.server_statistics.hincrby('credential_by_tld:'+date, tld, 1)
else: else:
publisher.info(to_print) self.redis_logger.info(to_print)
print('found {} credentials'.format(len(all_credentials))) self.redis_logger.debug(f'found {nb_cred} credentials')
# For searching credential in termFreq
#for searching credential in termFreq
for cred in all_credentials: for cred in all_credentials:
cred = cred.split('@')[0] #Split to ignore mail address cred = cred.split('@')[0] #Split to ignore mail address
# unique number attached to unique path # unique number attached to unique path
uniq_num_path = server_cred.incr(REDIS_KEY_NUM_PATH) uniq_num_path = self.server_cred.incr(Credential.REDIS_KEY_NUM_PATH)
server_cred.hmset(REDIS_KEY_ALL_PATH_SET, {item_id: uniq_num_path}) self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item_id: uniq_num_path})
server_cred.hmset(REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item_id}) self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item_id})
# unique number attached to unique username # unique number attached to unique username
uniq_num_cred = server_cred.hget(REDIS_KEY_ALL_CRED_SET, cred) uniq_num_cred = self.server_cred.hget(Credential.REDIS_KEY_ALL_CRED_SET, cred)
if uniq_num_cred is None: #cred do not exist, create new entries if uniq_num_cred is None:
uniq_num_cred = server_cred.incr(REDIS_KEY_NUM_USERNAME) # cred do not exist, create new entries
server_cred.hmset(REDIS_KEY_ALL_CRED_SET, {cred: uniq_num_cred}) uniq_num_cred = self.server_cred.incr(Credential.REDIS_KEY_NUM_USERNAME)
server_cred.hmset(REDIS_KEY_ALL_CRED_SET_REV, {uniq_num_cred: cred}) self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET, {cred: uniq_num_cred})
self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET_REV, {uniq_num_cred: cred})
# Add the mapping between the credential and the path # Add the mapping between the credential and the path
server_cred.sadd(REDIS_KEY_MAP_CRED_TO_PATH+'_'+str(uniq_num_cred), uniq_num_path) self.server_cred.sadd(Credential.REDIS_KEY_MAP_CRED_TO_PATH+'_'+str(uniq_num_cred), uniq_num_path)
# Split credentials on capital letters, numbers, dots and so on # Split credentials on capital letters, numbers, dots and so on
# Add the split to redis, each split point towards its initial credential unique number # Add the split to redis, each split point towards its initial credential unique number
splitedCred = re.findall(REGEX_CRED, cred) splitedCred = re.findall(Credential.REGEX_CRED, cred)
for partCred in splitedCred: for partCred in splitedCred:
if len(partCred) > minimumLengthThreshold: if len(partCred) > self.minimumLengthThreshold:
server_cred.sadd(partCred, uniq_num_cred) self.server_cred.sadd(partCred, uniq_num_cred)
if __name__ == '__main__':
module = Credential()
module.run()

View file

@ -11,29 +11,31 @@ It apply credit card regexes on paste content and warn if above a threshold.
""" """
##################################
# Import External packages
##################################
import pprint import pprint
import time import time
from packages import Paste
from packages import lib_refine
from pubsublogger import publisher from pubsublogger import publisher
import re import re
import sys import sys
##################################
# Import Project packages
##################################
from module.abstract_module import AbstractModule
from packages import Paste
from packages import lib_refine
from Helper import Process from Helper import Process
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'CreditCards' class CreditCards(AbstractModule):
"""
CreditCards module for AIL framework
"""
p = Process(config_section) def __init__(self):
super(CreditCards, self).__init__()
# FUNCTIONS #
publisher.info("CreditCards script started")
creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"
# Source: http://www.richardsramblings.com/regex/credit-card-numbers/ # Source: http://www.richardsramblings.com/regex/credit-card-numbers/
cards = [ cards = [
@ -45,41 +47,48 @@ if __name__ == "__main__":
r'\b(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}\b', # Maestro r'\b(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}\b', # Maestro
] ]
regex = re.compile('|'.join(cards)) self.regex = re.compile('|'.join(cards))
while True: # Waiting time in secondes between to message proccessed
message = p.get_from_set() self.pending_seconds = 10
if message is not None:
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message):
filename, score = message.split() filename, score = message.split()
paste = Paste.Paste(filename) paste = Paste.Paste(filename)
content = paste.get_p_content() content = paste.get_p_content()
all_cards = re.findall(regex, content) all_cards = re.findall(self.regex, content)
if len(all_cards) > 0: if len(all_cards) > 0:
print('All matching', all_cards) self.redis_logger.debug('All matching', all_cards)
creditcard_set = set([]) creditcard_set = set([])
for card in all_cards: for card in all_cards:
clean_card = re.sub('[^0-9]', '', card) clean_card = re.sub('[^0-9]', '', card)
# TODO purpose of this assignation ?
clean_card = clean_card clean_card = clean_card
if lib_refine.is_luhn_valid(clean_card): if lib_refine.is_luhn_valid(clean_card):
print(clean_card, 'is valid') self.redis_logger.debug(clean_card, 'is valid')
creditcard_set.add(clean_card) creditcard_set.add(clean_card)
pprint.pprint(creditcard_set) pprint.pprint(creditcard_set)
to_print = 'CreditCard;{};{};{};'.format( to_print = f'CreditCard;{paste.p_source};{paste.p_date};{paste.p_name};'
paste.p_source, paste.p_date, paste.p_name)
if (len(creditcard_set) > 0):
publisher.warning('{}Checked {} valid number(s);{}'.format(
to_print, len(creditcard_set), paste.p_rel_path))
print('{}Checked {} valid number(s);{}'.format(
to_print, len(creditcard_set), paste.p_rel_path))
#Send to duplicate
p.populate_set_out(filename, 'Duplicate')
msg = 'infoleak:automatic-detection="credit-card";{}'.format(filename) if (len(creditcard_set) > 0):
p.populate_set_out(msg, 'Tags') self.redis_logger.warning(f'{to_print}Checked {len(creditcard_set)} valid number(s);{paste.p_rel_path}')
#Send to duplicate
self.process.populate_set_out(filename, 'Duplicate')
msg = f'infoleak:automatic-detection="credit-card";{filename}'
self.process.populate_set_out(msg, 'Tags')
else: else:
publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_rel_path)) self.redis_logger.info(f'{to_print}CreditCard related;{paste.p_rel_path}')
else:
publisher.debug("Script creditcard is idling 1m") if __name__ == '__main__':
time.sleep(10)
module = CreditCards()
module.run()

View file

@ -9,12 +9,20 @@ The DomClassifier modules extract and classify Internet domains/hostnames/IP add
the out output of the Global module. the out output of the Global module.
""" """
##################################
# Import External packages
##################################
import os import os
import sys import sys
import time import time
from pubsublogger import publisher from pubsublogger import publisher
import DomainClassifier.domainclassifier import DomainClassifier.domainclassifier
##################################
# Import Project packages
##################################
from module.abstract_module import AbstractModule
from Helper import Process from Helper import Process
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
@ -22,60 +30,63 @@ import d4
import item_basic import item_basic
def main(): class DomClassifier(AbstractModule):
publisher.port = 6380 """
publisher.channel = "Script" DomClassifier module for AIL framework
"""
config_section = 'DomClassifier' def __init__(self):
super(DomClassifier, self).__init__()
p = Process(config_section) # Waiting time in secondes between to message proccessed
addr_dns = p.config.get("DomClassifier", "dns") self.pending_seconds = 1
publisher.info("""ZMQ DomainClassifier is Running""") addr_dns = self.process.config.get("DomClassifier", "dns")
c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns]) self.redis_logger.info("""ZMQ DomainClassifier is Running""")
cc = p.config.get("DomClassifier", "cc") self.c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
cc_tld = p.config.get("DomClassifier", "cc_tld")
while True: self.cc = self.process.config.get("DomClassifier", "cc")
self.cc_tld = self.process.config.get("DomClassifier", "cc_tld")
# Send module state to logs
self.redis_logger.info("Module %s initialized" % (self.module_name))
def compute(self, message):
try: try:
item_id = p.get_from_set() item_content = item_basic.get_item_content(message)
mimetype = item_basic.get_item_mimetype(message)
if item_id is None: item_basename = item_basic.get_basename(message)
publisher.debug("Script DomClassifier is idling 1s") item_source = item_basic.get_source(message)
time.sleep(1) item_date = item_basic.get_item_date(message)
continue
item_content = item_basic.get_item_content(item_id)
mimetype = item_basic.get_item_mimetype(item_id)
item_basename = item_basic.get_basename(item_id)
item_source = item_basic.get_source(item_id)
item_date = item_basic.get_item_date(item_id)
if mimetype.split('/')[0] == "text": if mimetype.split('/')[0] == "text":
c.text(rawtext=item_content) self.c.text(rawtext=item_content)
c.potentialdomain() self.c.potentialdomain()
c.validdomain(passive_dns=True, extended=False) self.c.validdomain(passive_dns=True, extended=False)
print(c.vdomain) self.redis_logger.debug(self.c.vdomain)
if c.vdomain and d4.is_passive_dns_enabled(): if self.c.vdomain and d4.is_passive_dns_enabled():
for dns_record in c.vdomain: for dns_record in self.c.vdomain:
p.populate_set_out(dns_record) self.process.populate_set_out(dns_record)
localizeddomains = c.include(expression=cc_tld) localizeddomains = self.c.include(expression=self.cc_tld)
if localizeddomains: if localizeddomains:
print(localizeddomains) self.redis_logger.debug(localizeddomains)
publisher.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {cc_tld};{item_id}") self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{message}")
localizeddomains = c.localizedomain(cc=cc) localizeddomains = self.c.localizedomain(cc=self.cc)
if localizeddomains: if localizeddomains:
print(localizeddomains) self.redis_logger.debug(localizeddomains)
publisher.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {cc};{item_id}") self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{message}")
except IOError as err:
self.redis_logger.error(f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed")
raise Exception(f"CRC Checksum Failed on: {message}")
except IOError:
print("CRC Checksum Failed on :", item_id)
publisher.error(f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed")
if __name__ == "__main__": if __name__ == "__main__":
main() module = DomClassifier()
module.run()

View file

@ -20,6 +20,10 @@ Requirements
*Need the ZMQ_Feed_Q Module running to be able to work properly. *Need the ZMQ_Feed_Q Module running to be able to work properly.
""" """
##################################
# Import External packages
##################################
import base64 import base64
import hashlib import hashlib
import io import io
@ -28,146 +32,90 @@ import os
import sys import sys
import time import time
import uuid import uuid
import datetime import datetime
import redis import redis
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
from pubsublogger import publisher from pubsublogger import publisher
##################################
# Import Project packages
##################################
from module.abstract_module import AbstractModule
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
from Helper import Process from Helper import Process
config_loader = ConfigLoader.ConfigLoader()
r_stats = config_loader.get_redis_conn("ARDB_Statistics")
config_loader = None
def gunzip_bytes_obj(bytes_obj): class Global(AbstractModule):
in_ = io.BytesIO() """
in_.write(bytes_obj) Global module for AIL framework
in_.seek(0) """
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
gunzipped_bytes_obj = fo.read()
return gunzipped_bytes_obj
def rreplace(s, old, new, occurrence): def __init__(self):
li = s.rsplit(old, occurrence) super(Global, self).__init__()
return new.join(li)
self.r_stats = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Statistics")
self.processed_paste = 0
# TODO rename time_1 explicitely
self.time_1 = time.time()
# Get and sanityze PASTE DIRECTORY
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], self.process.config.get("Directories", "pastes"))
self.PASTES_FOLDERS = self.PASTES_FOLDER + '/'
self.PASTES_FOLDERS = os.path.join(os.path.realpath(self.PASTES_FOLDERS), '')
# Waiting time in secondes between to message proccessed
self.pending_seconds = 0.5
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
if __name__ == '__main__': def computeNone(self):
publisher.port = 6380 difftime = time.time() - self.time_1
publisher.channel = 'Script' if int(difftime) > 30:
processed_paste = 0 to_print = f'Global; ; ; ;glob Processed {self.processed_paste} paste(s) in {difftime} s'
time_1 = time.time() self.redis_logger.debug(to_print)
config_section = 'Global' self.time_1 = time.time()
self.processed_paste = 0
p = Process(config_section)
# get and sanityze PASTE DIRECTORY def compute(self, message):
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) # Recovering the streamed message informations
PASTES_FOLDERS = PASTES_FOLDER + '/'
PASTES_FOLDERS = os.path.join(os.path.realpath(PASTES_FOLDERS), '')
# LOGGING #
publisher.info("Feed Script started to receive & publish.")
while True:
message = p.get_from_set()
# Recovering the streamed message informations.
if message is not None:
splitted = message.split() splitted = message.split()
if len(splitted) == 2: if len(splitted) == 2:
paste, gzip64encoded = splitted paste, gzip64encoded = splitted
else:
# TODO Store the name of the empty paste inside a Redis-list.
print("Empty Paste: not processed")
publisher.debug("Empty Paste: {0} not processed".format(message))
continue
else:
#print("Empty Queues: Waiting...")
if int(time.time() - time_1) > 30:
to_print = 'Global; ; ; ;glob Processed {0} paste(s) in {1} s'.format(processed_paste, time.time() - time_1)
print(to_print)
#publisher.info(to_print)
time_1 = time.time()
processed_paste = 0
time.sleep(0.5)
continue
# remove PASTES_FOLDER from item path (crawled item + submited) # Remove PASTES_FOLDER from item path (crawled item + submited)
if PASTES_FOLDERS in paste: if self.PASTES_FOLDERS in paste:
paste = paste.replace(PASTES_FOLDERS, '', 1) paste = paste.replace(self.PASTES_FOLDERS, '', 1)
file_name_paste = paste.split('/')[-1] file_name_paste = paste.split('/')[-1]
if len(file_name_paste) > 255: if len(file_name_paste) > 255:
new_file_name_paste = '{}{}.gz'.format(file_name_paste[:215], str(uuid.uuid4())) new_file_name_paste = '{}{}.gz'.format(file_name_paste[:215], str(uuid.uuid4()))
paste = rreplace(paste, file_name_paste, new_file_name_paste, 1) paste = self.rreplace(paste, file_name_paste, new_file_name_paste, 1)
# Creating the full filepath # Creating the full filepath
filename = os.path.join(PASTES_FOLDER, paste) filename = os.path.join(self.PASTES_FOLDER, paste)
filename = os.path.realpath(filename) filename = os.path.realpath(filename)
# incorrect filename # Incorrect filename
if not os.path.commonprefix([filename, PASTES_FOLDER]) == PASTES_FOLDER: if not os.path.commonprefix([filename, self.PASTES_FOLDER]) == self.PASTES_FOLDER:
print('Path traversal detected {}'.format(filename)) self.redis_logger.warning(f'Global; Path traversal detected {filename}')
publisher.warning('Global; Path traversal detected')
else:
# decode compressed base64 else:
# Decode compressed base64
decoded = base64.standard_b64decode(gzip64encoded) decoded = base64.standard_b64decode(gzip64encoded)
try: new_file_content = self.gunzip_bytes_obj(decoded)
new_file_content = gunzip_bytes_obj(decoded)
except Exception as e:
print('{}, {}'.format(filename, e))
publisher.warning('Global; Invalid Gzip file: {}, {}'.format(filename, e))
continue
# check if file exist if new_file_content:
if os.path.isfile(filename):
print('File already exist {}'.format(filename))
publisher.warning('Global; File already exist')
try: filename = self.check_filename(filename, new_file_content)
with gzip.open(filename, 'rb') as f:
curr_file_content = f.read()
except EOFError:
publisher.warning('Global; Incomplete file: {}'.format(filename))
# save daily stats
r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
# discard item
continue
except OSError:
publisher.warning('Global; Not a gzipped file: {}'.format(filename))
# save daily stats
r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
# discard item
continue
curr_file_md5 = hashlib.md5(curr_file_content).hexdigest() if filename:
new_file_md5 = hashlib.md5(new_file_content).hexdigest()
if new_file_md5 != curr_file_md5:
if filename.endswith('.gz'):
filename = '{}_{}.gz'.format(filename[:-3], new_file_md5)
else:
filename = '{}_{}'.format(filename, new_file_md5)
# continue if new file already exist
if os.path.isfile(filename):
print('ignore duplicated file')
continue
print('new file: {}'.format(filename))
# ignore duplicate
else:
print('ignore duplicated file')
continue
# create subdir # create subdir
dirname = os.path.dirname(filename) dirname = os.path.dirname(filename)
@ -178,9 +126,109 @@ if __name__ == '__main__':
f.write(decoded) f.write(decoded)
paste = filename paste = filename
# remove PASTES_FOLDER from # remove self.PASTES_FOLDER from
if PASTES_FOLDERS in paste: if self.PASTES_FOLDERS in paste:
paste = paste.replace(PASTES_FOLDERS, '', 1) paste = paste.replace(self.PASTES_FOLDERS, '', 1)
p.populate_set_out(paste) self.process.populate_set_out(paste)
processed_paste+=1 self.processed_paste+=1
else:
# TODO Store the name of the empty paste inside a Redis-list
self.redis_logger.debug(f"Empty Paste: {message} not processed")
def check_filename(self, filename, new_file_content):
"""
Check if file is not a duplicated file
return the filename if new file, else None
"""
# check if file exist
if os.path.isfile(filename):
self.redis_logger.warning(f'File already exist {filename}')
# Check that file already exists but content differs
curr_file_content = self.gunzip_file(filename)
if curr_file_content:
# Compare file content with message content with MD5 checksums
curr_file_md5 = hashlib.md5(curr_file_content).hexdigest()
new_file_md5 = hashlib.md5(new_file_content).hexdigest()
if new_file_md5 != curr_file_md5:
# MD5 are not equals, verify filename
if filename.endswith('.gz'):
filename = f'{filename[:-3]}_{new_file_md5}.gz'
else:
filename = f'{filename}_{new_file_md5}'
self.redis_logger.debug(f'new file to check: {filename}')
if os.path.isfile(filename):
# Ignore duplicate
self.redis_logger.debug(f'ignore duplicated file {filename}')
filename = None
else:
# Ignore duplicate checksum equals
self.redis_logger.debug(f'ignore duplicated file {filename}')
filename = None
else:
# File not unzipped
filename = None
return filename
def gunzip_file(self, filename):
"""
Unzip a file
publish stats if failure
"""
curr_file_content = None
try:
with gzip.open(filename, 'rb') as f:
curr_file_content = f.read()
except EOFError:
self.redis_logger.warning(f'Global; Incomplete file: {filename}')
# save daily stats
self.r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
except OSError:
self.redis_logger.warning(f'Global; Not a gzipped file: {filename}')
# save daily stats
self.r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
return curr_file_content
def gunzip_bytes_obj(self, bytes_obj):
gunzipped_bytes_obj = None
try:
in_ = io.BytesIO()
in_.write(bytes_obj)
in_.seek(0)
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
gunzipped_bytes_obj = fo.read()
except Exception as e:
self.redis_logger.warning(f'Global; Invalid Gzip file: {filename}, {e}')
return gunzipped_bytes_obj
def rreplace(self, s, old, new, occurrence):
li = s.rsplit(old, occurrence)
return new.join(li)
if __name__ == '__main__':
module = Global()
module.run()

View file

@ -54,9 +54,9 @@ class Phone(AbstractModule):
# If the list is greater than 4, we consider the Paste may contain a list of phone numbers # If the list is greater than 4, we consider the Paste may contain a list of phone numbers
if len(results) > 4: if len(results) > 4:
self.redis_logger.debug(results) self.redis_logger.debug(results)
self.redis_logger.warning('{} contains PID (phone numbers)'.format(paste.p_name)) self.redis_logger.warning(f'{paste.p_name} contains PID (phone numbers)')
msg = 'infoleak:automatic-detection="phone-number";{}'.format(message) msg = f'infoleak:automatic-detection="phone-number";{message}'
self.process.populate_set_out(msg, 'Tags') self.process.populate_set_out(msg, 'Tags')
# Send to duplicate # Send to duplicate
@ -75,7 +75,7 @@ class Phone(AbstractModule):
pass pass
for country_code in stats: for country_code in stats:
if stats[country_code] > 4: if stats[country_code] > 4:
self.redis_logger.warning('{} contains Phone numbers with country code {}'.format(paste.p_name, country_code)) self.redis_logger.warning(f'{paste.p_name} contains Phone numbers with country code {country_code}')
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -14,47 +14,99 @@
Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text. Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014. Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text. Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
""" """
##################################
# Import External packages
##################################
import os import os
import sys import sys
import time import time
import datetime import datetime
import calendar import calendar
import redis import redis
import json import json
import signal
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process
from packages import Paste
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
from nltk.sentiment.vader import SentimentIntensityAnalyzer from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk import tokenize, download from nltk import tokenize, download
##################################
# Import Project packages
##################################
from module.abstract_module import AbstractModule
from Helper import Process
from packages import Paste
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
class SentimentAnalysis(AbstractModule):
"""
SentimentAnalysis module for AIL framework
"""
# Config Variables # Config Variables
accepted_Mime_type = ['text/plain'] accepted_Mime_type = ['text/plain']
size_threshold = 250
line_max_length_threshold = 1000 line_max_length_threshold = 1000
#time_clean_sentiment_db = 60*60
def Analyse(message, server): def __init__(self):
path = message super(SentimentAnalysis, self).__init__()
paste = Paste.Paste(path)
self.sentiment_lexicon_file = ConfigLoader.ConfigLoader().get_config_str("Directories", "sentiment_lexicon_file")
# REDIS_LEVEL_DB #
self.db = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Sentiment")
self.time1 = time.time()
# Waiting time in secondes between to message proccessed
self.pending_seconds = 1
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message):
# Max time to compute one entry
signal.alarm(60)
try:
self.analyse(message)
except TimeoutException:
self.process.incr_module_timeout_statistic()
self.redis_logger.debug(f"{message} processing timeout")
else:
signal.alarm(0)
def analyse(self, message):
paste = Paste.Paste(message)
# get content with removed line + number of them # get content with removed line + number of them
num_line_removed, p_content = paste.get_p_content_with_removed_lines(line_max_length_threshold) num_line_removed, p_content = paste.get_p_content_with_removed_lines(SentimentAnalysis.line_max_length_threshold)
provider = paste.p_source provider = paste.p_source
p_date = str(paste._get_p_date()) p_date = str(paste._get_p_date())
p_MimeType = paste._get_p_encoding() p_MimeType = paste._get_p_encoding()
# Perform further analysis # Perform further analysis
if p_MimeType == "text/plain": if p_MimeType == "text/plain":
if isJSON(p_content): if self.isJSON(p_content):
p_MimeType = "JSON" p_MimeType = "JSON"
if p_MimeType in accepted_Mime_type: if p_MimeType in SentimentAnalysis.accepted_Mime_type:
self.redis_logger.debug(f'Accepted :{p_MimeType}')
the_date = datetime.date(int(p_date[0:4]), int(p_date[4:6]), int(p_date[6:8])) the_date = datetime.date(int(p_date[0:4]), int(p_date[4:6]), int(p_date[6:8]))
the_time = datetime.datetime.now() the_time = datetime.datetime.now()
@ -102,19 +154,19 @@ def Analyse(message, server):
# {Provider_TimestampInHour_i -> UniqID_i}_j # {Provider_TimestampInHour_i -> UniqID_i}_j
# (UniqID_i -> PasteValue_i) # (UniqID_i -> PasteValue_i)
server.sadd('Provider_set', provider) self.db.sadd('Provider_set', provider)
provider_timestamp = provider + '_' + str(timestamp) provider_timestamp = provider + '_' + str(timestamp)
server.incr('UniqID') self.db.incr('UniqID')
UniqID = server.get('UniqID') UniqID = self.db.get('UniqID')
print(provider_timestamp, '->', UniqID, 'dropped', num_line_removed, 'lines') self.redis_logger.debug(f'{provider_timestamp}->{UniqID}dropped{num_line_removed}lines')
server.sadd(provider_timestamp, UniqID) self.db.sadd(provider_timestamp, UniqID)
server.set(UniqID, avg_score) self.db.set(UniqID, avg_score)
else: else:
print('Dropped:', p_MimeType) self.redis_logger.debug(f'Dropped:{p_MimeType}')
def isJSON(content): def isJSON(self, content):
try: try:
json.loads(content) json.loads(content)
return True return True
@ -122,58 +174,8 @@ def isJSON(content):
except Exception: except Exception:
return False return False
import signal
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
if __name__ == '__main__': if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
# Port of the redis instance used by pubsublogger
publisher.port = 6380
# Script is the default channel used for the modules.
publisher.channel = 'Script'
# Section name in bin/packages/modules.cfg module = SentimentAnalysis()
config_section = 'SentimentAnalysis' module.run()
# Setup the I/O queues
p = Process(config_section)
# Sent to the logging a description of the module
publisher.info("<description of the module>")
config_loader = ConfigLoader.ConfigLoader()
sentiment_lexicon_file = config_loader.get_config_str("Directories", "sentiment_lexicon_file")
# REDIS_LEVEL_DB #
server = config_loader.get_redis_conn("ARDB_Sentiment")
config_loader = None
time1 = time.time()
while True:
message = p.get_from_set()
if message is None:
#if int(time.time() - time1) > time_clean_sentiment_db:
# clean_db()
# time1 = time.time()
# continue
#else:
publisher.debug("{} queue is empty, waiting".format(config_section))
time.sleep(1)
continue
signal.alarm(60)
try:
Analyse(message, server)
except TimeoutException:
p.incr_module_timeout_statistic()
print ("{0} processing timeout".format(message))
continue
else:
signal.alarm(0)

View file

@ -8,42 +8,47 @@ The Tags Module
This module create tags. This module create tags.
""" """
import time
##################################
# Import External packages
##################################
import time
from pubsublogger import publisher from pubsublogger import publisher
##################################
# Import Project packages
##################################
from module.abstract_module import AbstractModule
from Helper import Process from Helper import Process
from packages import Tag from packages import Tag
if __name__ == '__main__':
# Port of the redis instance used by pubsublogger class Tags(AbstractModule):
publisher.port = 6380 """
# Script is the default channel used for the modules. Tags module for AIL framework
publisher.channel = 'Script' """
# Section name in bin/packages/modules.cfg def __init__(self):
config_section = 'Tags' super(Tags, self).__init__()
# Setup the I/O queues # Waiting time in secondes between to message proccessed
p = Process(config_section) self.pending_seconds = 10
# Sent to the logging a description of the module # Send module state to logs
publisher.info("Tags module started") self.redis_logger.info(f"Module {self.module_name} initialized")
# Endless loop getting messages from the input queue
while True:
# Get one message from the input queue
message = p.get_from_set()
if message is None: def compute(self, message):
publisher.debug("{} queue is empty, waiting 10s".format(config_section)) self.redis_logger.debug(message)
time.sleep(10)
continue
else:
print(message)
tag, item_id = message.split(';') tag, item_id = message.split(';')
Tag.add_tag("item", tag, item_id) Tag.add_tag("item", tag, item_id)
p.populate_set_out(message, 'MISP_The_Hive_feeder') self.process.populate_set_out(message, 'MISP_The_Hive_feeder')
if __name__ == '__main__':
module = Tags()
module.run()

View file

@ -50,7 +50,7 @@ class Web(AbstractModule):
""" """
Init Web Init Web
""" """
super(Web, self).__init__() super(Web, self).__init__(logger_channel='script:web')
# REDIS Cache # REDIS Cache
self.r_serv2 = redis.StrictRedis( self.r_serv2 = redis.StrictRedis(
@ -82,7 +82,8 @@ class Web(AbstractModule):
self.prec_filename = None self.prec_filename = None
# Send module state to logs # Send module state to logs
self.redis_logger.info("Module %s initialized" % (self.module_name)) self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message): def compute(self, message):
""" """
@ -91,49 +92,48 @@ class Web(AbstractModule):
# Extract item # Extract item
filename, score = message.split() filename, score = message.split()
if self.prec_filename is None or filename != self.prec_filename:
domains_list = set() domains_list = set()
hosts_list = set()
if self.prec_filename is None or filename != self.prec_filename:
domains_list.clear()
hosts_list.clear()
PST = Paste.Paste(filename) PST = Paste.Paste(filename)
client = ip2asn() client = ip2asn()
detected_urls = PST.get_regex(self.url_regex) detected_urls = PST.get_regex(self.url_regex)
if len(detected_urls) > 0: if len(detected_urls) > 0:
to_print = 'Web;{};{};{};'.format( to_print = f'Web;{PST.p_source};{PST.p_date};{PST.p_name};'
PST.p_source, PST.p_date, PST.p_name) self.redis_logger.info(f'{to_print}Detected {len(detected_urls)} URL;{PST.p_rel_path}')
self.redis_logger.info('{}Detected {} URL;{}'.format(
to_print, len(detected_urls), PST.p_rel_path))
for url in detected_urls: for url in detected_urls:
self.redis_logger.debug("match regex: %s" % (url))
# self.redis_logger.debug("match regex search: %s"%(url)) if url.endswith(".on"):
# URL is an onion link skip
# TODO send to TOR crawler ?
# self.redis_logger.debug("Skip onion link")
continue
to_send = "{} {} {}".format(url, PST._get_p_date(), filename) self.redis_logger.debug(f"match regex: {url}")
to_send = f"{url} {PST._get_p_date()} {filename}"
self.process.populate_set_out(to_send, 'Url') self.process.populate_set_out(to_send, 'Url')
self.redis_logger.debug("url_parsed: %s" % (to_send)) self.redis_logger.debug(f"url_parsed: {to_send}")
self.faup.decode(url) self.faup.decode(url)
domain = self.faup.get_domain() domain = self.faup.get_domain()
subdomain = self.faup.get_subdomain() subdomain = self.faup.get_subdomain()
self.redis_logger.debug('{} Published'.format(url)) self.redis_logger.debug(f'{url} Published')
if subdomain is not None:
# TODO: # FIXME: remove me
try:
subdomain = subdomain.decode()
except:
pass
if domain is not None:
# TODO: # FIXME: remove me
try:
domain = domain.decode()
except:
pass
domains_list.add(domain) domains_list.add(domain)
hostl = self.avoidNone(subdomain) + self.avoidNone(domain) hostl = f'{subdomain}.{domain}' if subdomain else domain
if hostl not in hosts_list:
# test host only once a host in a paste
hosts_list.add(hostl)
try: try:
socket.setdefaulttimeout(1) socket.setdefaulttimeout(1)
@ -158,15 +158,13 @@ class Web(AbstractModule):
# EU is not an official ISO 3166 code (but used by RIPE # EU is not an official ISO 3166 code (but used by RIPE
# IP allocation) # IP allocation)
if cc is not None and cc != "EU": if cc is not None and cc != "EU":
self.redis_logger.debug('{};{};{};{}'.format(hostl, asn, cc, countryname = pycountry.countries.get(alpha_2=cc).name
pycountry.countries.get(alpha_2=cc).name)) self.redis_logger.debug(f'{hostl};{asn};{cc};{countryname}')
if cc == self.cc_critical: if cc == self.cc_critical:
to_print = 'Url;{};{};{};Detected {} {}'.format( to_print = f'Url;{PST.p_source};{PST.p_date};{PST.p_name};Detected {hostl} {cc}'
PST.p_source, PST.p_date, PST.p_name,
hostl, cc)
self.redis_logger.info(to_print) self.redis_logger.info(to_print)
else: else:
self.redis_logger.debug('{};{};{}'.format(hostl, asn, cc)) self.redis_logger.debug(f'{hostl};{asn};{cc}')
A_values = lib_refine.checking_A_record(self.r_serv2, A_values = lib_refine.checking_A_record(self.r_serv2,
domains_list) domains_list)

View file

@ -21,10 +21,12 @@ class AbstractModule(ABC):
Abstract Module class Abstract Module class
""" """
def __init__(self, module_name=None, queue_name=None): def __init__(self, module_name=None, queue_name=None, logger_channel='Script'):
""" """
Init Module Init Module
module_name: str; set the module name if different from the instance ClassName module_name: str; set the module name if different from the instance ClassName
queue_name: str; set the queue name if different from the instance ClassName
logger_channel: str; set the logger channel name, 'Script' by default
""" """
# Module name if provided else instance className # Module name if provided else instance className
self.module_name = module_name if module_name else self._module_name() self.module_name = module_name if module_name else self._module_name()
@ -34,12 +36,13 @@ class AbstractModule(ABC):
# Init Redis Logger # Init Redis Logger
self.redis_logger = publisher self.redis_logger = publisher
# Port of the redis instance used by pubsublogger # Port of the redis instance used by pubsublogger
self.redis_logger.port = 6380 self.redis_logger.port = 6380
# Channel name to publish logs # Channel name to publish logs
self.redis_logger.channel = 'Script' # If provided could be a namespaced channel like script:<ModuleName>
# TODO modify generic channel Script to a namespaced channel like: self.redis_logger.channel = logger_channel
# publish module logs to script:<ModuleName> channel
# self.redis_logger.channel = 'script:%s'%(self.module_name) # self.redis_logger.channel = 'script:%s'%(self.module_name)
# Run module endlessly # Run module endlessly
@ -62,18 +65,17 @@ class AbstractModule(ABC):
# Get one message (paste) from the QueueIn (copy of Redis_Global publish) # Get one message (paste) from the QueueIn (copy of Redis_Global publish)
message = self.process.get_from_set() message = self.process.get_from_set()
if message is None: if message:
self.computeNone()
# Wait before next process
self.redis_logger.debug(f"{self.module_name}, waiting for new message, Idling {self.pending_seconds}s")
time.sleep(self.pending_seconds)
continue
try: try:
# Module processing with the message from the queue # Module processing with the message from the queue
self.compute(message) self.compute(message)
except Exception as err: except Exception as err:
self.redis_logger.critical(f"Error in module {self.module_name}: {err}") self.redis_logger.critical(f"Error in module {self.module_name}: {err}")
else:
self.computeNone()
# Wait before next process
self.redis_logger.debug(f"{self.module_name}, waiting for new message, Idling {self.pending_seconds}s")
time.sleep(self.pending_seconds)
def _module_name(self): def _module_name(self):

View file

@ -1,19 +1,27 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
##################################
# Import External packages
##################################
import os import os
import sys import sys
import uuid import uuid
import redis import redis
##################################
# Import Project packages
##################################
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader import ConfigLoader
config_loader = ConfigLoader.ConfigLoader() config_loader = ConfigLoader.ConfigLoader()
r_serv_db = config_loader.get_redis_conn("ARDB_DB") r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_log_submit = config_loader.get_redis_conn("Redis_Log_submit") r_serv_log_submit = config_loader.get_redis_conn("Redis_Log_submit")
config_loader = None config_loader = None
def is_valid_uuid_v4(UUID): def is_valid_uuid_v4(UUID):
UUID = UUID.replace('-', '') UUID = UUID.replace('-', '')
try: try:
@ -22,7 +30,8 @@ def is_valid_uuid_v4(UUID):
except: except:
return False return False
def create_import_queue(tags, galaxy, paste_content, UUID, password=None, isfile = False):
def create_import_queue(tags, galaxy, paste_content, UUID, password=None, isfile=False, source=None):
# save temp value on disk # save temp value on disk
for tag in tags: for tag in tags:
@ -35,6 +44,9 @@ def create_import_queue(tags, galaxy, paste_content, UUID, password=None, isfil
if password: if password:
r_serv_db.set(UUID + ':password', password) r_serv_db.set(UUID + ':password', password)
if source:
r_serv_db.set(UUID + ':source', source)
r_serv_db.set(UUID + ':isfile', isfile) r_serv_db.set(UUID + ':isfile', isfile)
r_serv_log_submit.set(UUID + ':end', 0) r_serv_log_submit.set(UUID + ':end', 0)
@ -45,8 +57,10 @@ def create_import_queue(tags, galaxy, paste_content, UUID, password=None, isfil
# save UUID on disk # save UUID on disk
r_serv_db.sadd('submitted:uuid', UUID) r_serv_db.sadd('submitted:uuid', UUID)
return UUID return UUID
def check_import_status(UUID): def check_import_status(UUID):
if not is_valid_uuid_v4(UUID): if not is_valid_uuid_v4(UUID):
return ({'status': 'error', 'reason': 'Invalid uuid'}, 400) return ({'status': 'error', 'reason': 'Invalid uuid'}, 400)

View file

@ -1,6 +1,17 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
"""
The Submit paste module
================
This module is taking paste in redis queue ARDB_DB and submit to global
"""
##################################
# Import External packages
##################################
import os import os
import sys import sys
import gzip import gzip
@ -9,47 +20,280 @@ import redis
import base64 import base64
import datetime import datetime
import time import time
# from sflock.main import unpack
# import sflock
from sflock.main import unpack ##################################
import sflock # Import Project packages
##################################
from module.abstract_module import AbstractModule
from Helper import Process from Helper import Process
from pubsublogger import publisher from pubsublogger import publisher
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Tag import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader import ConfigLoader
def create_paste(uuid, paste_content, ltags, ltagsgalaxies, name):
class SubmitPaste(AbstractModule):
"""
Company Credentials module for AIL framework
"""
expire_time = 120
# Text max size
TEXT_MAX_SIZE = ConfigLoader.ConfigLoader().get_config_int("SubmitPaste", "TEXT_MAX_SIZE")
# File max size
FILE_MAX_SIZE = ConfigLoader.ConfigLoader().get_config_int("SubmitPaste", "FILE_MAX_SIZE")
# Allowed file type
ALLOWED_EXTENSIONS = ConfigLoader.ConfigLoader().get_config_str("SubmitPaste", "FILE_ALLOWED_EXTENSIONS").split(',')
def __init__(self):
"""
init
"""
super(SubmitPaste, self).__init__(queue_name='submit_paste')
self.r_serv_db = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_DB")
self.r_serv_log_submit = ConfigLoader.ConfigLoader().get_redis_conn("Redis_Log_submit")
self.r_serv_tags = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Tags")
self.r_serv_metadata = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Metadata")
self.serv_statistics = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Statistics")
self.pending_seconds = 3
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], ConfigLoader.ConfigLoader().get_config_str("Directories", "pastes")) + '/'
def compute(self, uuid):
"""
Main method of the Module to implement
"""
self.redis_logger.debug(f'compute UUID {uuid}')
# get temp value save on disk
ltags = self.r_serv_db.smembers(f'{uuid}:ltags')
ltagsgalaxies = self.r_serv_db.smembers(f'{uuid}:ltagsgalaxies')
paste_content = self.r_serv_db.get(f'{uuid}:paste_content')
isfile = self.r_serv_db.get(f'{uuid}:isfile')
password = self.r_serv_db.get(f'{uuid}:password')
source = self.r_serv_db.get(f'{uuid}:source')
self.redis_logger.debug(f'isfile UUID {isfile}')
self.redis_logger.debug(f'source UUID {source}')
self.redis_logger.debug(f'paste_content UUID {paste_content}')
# needed if redis is restarted
self.r_serv_log_submit.set(f'{uuid}:end', 0)
self.r_serv_log_submit.set(f'{uuid}:processing', 0)
self.r_serv_log_submit.set(f'{uuid}:nb_total', -1)
self.r_serv_log_submit.set(f'{uuid}:nb_end', 0)
self.r_serv_log_submit.set(f'{uuid}:nb_sucess', 0)
self.r_serv_log_submit.set(f'{uuid}:processing', 1)
if isfile == 'True':
# file input
self._manage_file(uuid, paste_content, ltags, ltagsgalaxies, source)
else:
# textarea input paste
self._manage_text(uuid, paste_content, ltags, ltagsgalaxies, source)
# new paste created from file, remove uuid ref
self.remove_submit_uuid(uuid)
def run(self):
"""
Run Module endless process
"""
# Endless loop processing messages from the input queue
while self.proceed:
# Get one message (paste) from the QueueIn (copy of Redis_Global publish)
nb_submit = self.r_serv_db.scard('submitted:uuid')
if nb_submit > 0:
try:
uuid = self.r_serv_db.srandmember('submitted:uuid')
# Module processing with the message from the queue
self.redis_logger.debug(uuid)
self.compute(uuid)
except Exception as err:
self.redis_logger.error(f'Error in module {self.module_name}: {err}')
# Remove uuid ref
self.remove_submit_uuid(uuid)
else:
# Wait before next process
self.redis_logger.debug(f'{self.module_name}, waiting for new message, Idling {self.pending_seconds}s')
time.sleep(self.pending_seconds)
def _manage_text(self, uuid, paste_content, ltags, ltagsgalaxies, source):
"""
Create a paste for given text
"""
if sys.getsizeof(paste_content) < SubmitPaste.TEXT_MAX_SIZE:
self.r_serv_log_submit.set(f'{uuid}:nb_total', 1)
self.create_paste(uuid, paste_content.encode(), ltags, ltagsgalaxies, uuid, source)
time.sleep(0.5)
else:
self.abord_file_submission(uuid, f'Text size is over {SubmitPaste.TEXT_MAX_SIZE} bytes')
def _manage_file(self, uuid, file_full_path, ltags, ltagsgalaxies, source):
"""
Create a paste for given file
"""
self.redis_logger.debug('manage')
if os.path.exists(file_full_path):
self.redis_logger.debug(f'file exists {file_full_path}')
file_size = os.stat(file_full_path).st_size
self.redis_logger.debug(f'file size {file_size}')
# Verify file length
if file_size < SubmitPaste.FILE_MAX_SIZE:
# TODO sanitize filename
filename = file_full_path.split('/')[-1]
self.redis_logger.debug(f'sanitize filename {filename}')
self.redis_logger.debug('file size allowed')
if not '.' in filename:
self.redis_logger.debug('no extension for filename')
try:
# Read file
with open(file_full_path,'r') as f:
content = f.read()
self.r_serv_log_submit.set(uuid + ':nb_total', 1)
self.create_paste(uuid, content.encode(), ltags, ltagsgalaxies, uuid, source)
self.remove_submit_uuid(uuid)
except:
self.abord_file_submission(uuid, "file error")
else:
file_type = filename.rsplit('.', 1)[1]
file_type = file_type.lower()
self.redis_logger.debug(f'file ext {file_type}')
if file_type in SubmitPaste.ALLOWED_EXTENSIONS:
self.redis_logger.debug('Extension allowed')
# TODO enum of possible file extension ?
# TODO verify file hash with virus total ?
if not self._is_compressed_type(file_type):
self.redis_logger.debug('Plain text file')
# plain txt file
with open(file_full_path,'r') as f:
content = f.read()
self.r_serv_log_submit.set(uuid + ':nb_total', 1)
self.create_paste(uuid, content.encode(), ltags, ltagsgalaxies, uuid, source)
else:
# Compressed file
self.abord_file_submission(uuid, "file decompression should be implemented")
# TODO add compress file management
# #decompress file
# try:
# if password == None:
# files = unpack(file_full_path.encode())
# #print(files.children)
# else:
# try:
# files = unpack(file_full_path.encode(), password=password.encode())
# #print(files.children)
# except sflock.exception.IncorrectUsageException:
# self.abord_file_submission(uuid, "Wrong Password")
# raise
# except:
# self.abord_file_submission(uuid, "file decompression error")
# raise
# self.redis_logger.debug('unpacking {} file'.format(files.unpacker))
# if(not files.children):
# self.abord_file_submission(uuid, "Empty compressed file")
# raise
# # set number of files to submit
# self.r_serv_log_submit.set(uuid + ':nb_total', len(files.children))
# n = 1
# for child in files.children:
# if self.verify_extention_filename(child.filename.decode()):
# self.create_paste(uuid, child.contents, ltags, ltagsgalaxies, uuid+'_'+ str(n) , source)
# n = n + 1
# else:
# self.redis_logger.error("Error in module %s: bad extention"%(self.module_name))
# self.addError(uuid, 'Bad file extension: {}'.format(child.filename.decode()) )
# except FileNotFoundError:
# self.redis_logger.error("Error in module %s: file not found"%(self.module_name))
# self.addError(uuid, 'File not found: {}'.format(file_full_path), uuid )
else:
self.abord_file_submission(uuid, f'File :{file_full_path} too large, over {SubmitPaste.FILE_MAX_SIZE} bytes')
else:
self.abord_file_submission(uuid, "Server Error, the archive can't be found")
def _is_compressed_type(self, file_type):
"""
Check if file type is in the list of compressed file extensions format
"""
compressed_type = ['zip', 'gz', 'tar.gz']
return file_type in compressed_type
def remove_submit_uuid(self, uuid):
# save temp value on disk
self.r_serv_db.delete(f'{uuid}:ltags')
self.r_serv_db.delete(f'{uuid}:ltagsgalaxies')
self.r_serv_db.delete(f'{uuid}:paste_content')
self.r_serv_db.delete(f'{uuid}:isfile')
self.r_serv_db.delete(f'{uuid}:password')
self.r_serv_db.delete(f'{uuid}:source')
self.r_serv_log_submit.expire(f'{uuid}:end', SubmitPaste.expire_time)
self.r_serv_log_submit.expire(f'{uuid}:processing', SubmitPaste.expire_time)
self.r_serv_log_submit.expire(f'{uuid}:nb_total', SubmitPaste.expire_time)
self.r_serv_log_submit.expire(f'{uuid}:nb_sucess', SubmitPaste.expire_time)
self.r_serv_log_submit.expire(f'{uuid}:nb_end', SubmitPaste.expire_time)
self.r_serv_log_submit.expire(f'{uuid}:error', SubmitPaste.expire_time)
self.r_serv_log_submit.expire(f'{uuid}:paste_submit_link', SubmitPaste.expire_time)
# delete uuid
self.r_serv_db.srem('submitted:uuid', uuid)
self.redis_logger.debug(f'{uuid} all file submitted')
def create_paste(self, uuid, paste_content, ltags, ltagsgalaxies, name, source=None):
result = False
now = datetime.datetime.now() now = datetime.datetime.now()
save_path = 'submitted/' + now.strftime("%Y") + '/' + now.strftime("%m") + '/' + now.strftime("%d") + '/' + name + '.gz' source = source if source else 'submitted'
save_path = source + '/' + now.strftime("%Y") + '/' + now.strftime("%m") + '/' + now.strftime("%d") + '/' + name + '.gz'
full_path = filename = os.path.join(os.environ['AIL_HOME'], full_path = filename = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "pastes"), save_path) self.process.config.get("Directories", "pastes"), save_path)
if os.path.isfile(full_path): self.redis_logger.debug(f'file path of the paste {full_path}')
addError(uuid, 'File: ' + save_path + ' already exist in submitted pastes')
return 1
try: if not os.path.isfile(full_path):
gzipencoded = gzip.compress(paste_content) # file not exists in AIL paste directory
gzip64encoded = base64.standard_b64encode(gzipencoded).decode() self.redis_logger.debug(f"new paste {paste_content}")
except:
abord_file_submission(uuid, "file error") gzip64encoded = self._compress_encode_content(paste_content)
return 1
if gzip64encoded:
# use relative path # use relative path
rel_item_path = save_path.replace(PASTES_FOLDER, '', 1) rel_item_path = save_path.replace(self.PASTES_FOLDER, '', 1)
self.redis_logger.debug(f"relative path {rel_item_path}")
# send paste to Global module # send paste to Global module
relay_message = "{0} {1}".format(rel_item_path, gzip64encoded) relay_message = f"{rel_item_path} {gzip64encoded}"
p.populate_set_out(relay_message, 'Mixer') self.process.populate_set_out(relay_message, 'Mixer')
# increase nb of paste by feeder name # increase nb of paste by feeder name
r_serv_log_submit.hincrby("mixer_cache:list_feeder", "submitted", 1) self.r_serv_log_submit.hincrby("mixer_cache:list_feeder", source, 1)
# add tags # add tags
for tag in ltags: for tag in ltags:
@ -58,201 +302,75 @@ def create_paste(uuid, paste_content, ltags, ltagsgalaxies, name):
for tag in ltagsgalaxies: for tag in ltagsgalaxies:
Tag.add_tag('item', tag, rel_item_path) Tag.add_tag('item', tag, rel_item_path)
r_serv_log_submit.incr(uuid + ':nb_end') self.r_serv_log_submit.incr(f'{uuid}:nb_end')
r_serv_log_submit.incr(uuid + ':nb_sucess') self.r_serv_log_submit.incr(f'{uuid}:nb_sucess')
if r_serv_log_submit.get(uuid + ':nb_end') == r_serv_log_submit.get(uuid + ':nb_total'): if self.r_serv_log_submit.get(f'{uuid}:nb_end') == self.r_serv_log_submit.get(f'{uuid}:nb_total'):
r_serv_log_submit.set(uuid + ':end', 1) self.r_serv_log_submit.set(f'{uuid}:end', 1)
print(' {} send to Global'.format(rel_item_path)) self.redis_logger.debug(f' {rel_item_path} send to Global')
r_serv_log_submit.sadd(uuid + ':paste_submit_link', rel_item_path) self.r_serv_log_submit.sadd(f'{uuid}:paste_submit_link', rel_item_path)
curr_date = datetime.date.today() curr_date = datetime.date.today()
serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'submit_paste', 1) self.serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'submit_paste', 1)
self.redis_logger.debug("paste submitted")
else:
self.addError(uuid, f'File: {save_path} already exist in submitted pastes')
return 0 return result
def addError(uuid, errorMessage):
print(errorMessage) def _compress_encode_content(self, content):
error = r_serv_log_submit.get(uuid + ':error') gzip64encoded = None
try:
gzipencoded = gzip.compress(content)
gzip64encoded = base64.standard_b64encode(gzipencoded).decode()
except:
self.abord_file_submission(uuid, "file error")
return gzip64encoded
def addError(self, uuid, errorMessage):
self.redis_logger.debug(errorMessage)
error = self.r_serv_log_submit.get(f'{uuid}:error')
if error != None: if error != None:
r_serv_log_submit.set(uuid + ':error', error + '<br></br>' + errorMessage) self.r_serv_log_submit.set(f'{uuid}:error', error + '<br></br>' + errorMessage)
r_serv_log_submit.incr(uuid + ':nb_end')
def abord_file_submission(uuid, errorMessage): self.r_serv_log_submit.incr(f'{uuid}:nb_end')
addError(uuid, errorMessage)
r_serv_log_submit.set(uuid + ':end', 1)
def abord_file_submission(self, uuid, errorMessage):
self.redis_logger.debug(f'abord {uuid}, {errorMessage}')
self.addError(uuid, errorMessage)
self.r_serv_log_submit.set(f'{uuid}:end', 1)
curr_date = datetime.date.today() curr_date = datetime.date.today()
serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'submit_abord', 1) self.serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'submit_abord', 1)
remove_submit_uuid(uuid) self.remove_submit_uuid(uuid)
def remove_submit_uuid(uuid): def get_item_date(self, item_filename):
# save temp value on disk
r_serv_db.delete(uuid + ':ltags')
r_serv_db.delete(uuid + ':ltagsgalaxies')
r_serv_db.delete(uuid + ':paste_content')
r_serv_db.delete(uuid + ':isfile')
r_serv_db.delete(uuid + ':password')
r_serv_log_submit.expire(uuid + ':end', expire_time)
r_serv_log_submit.expire(uuid + ':processing', expire_time)
r_serv_log_submit.expire(uuid + ':nb_total', expire_time)
r_serv_log_submit.expire(uuid + ':nb_sucess', expire_time)
r_serv_log_submit.expire(uuid + ':nb_end', expire_time)
r_serv_log_submit.expire(uuid + ':error', expire_time)
r_serv_log_submit.expire(uuid + ':paste_submit_link', expire_time)
# delete uuid
r_serv_db.srem('submitted:uuid', uuid)
print('{} all file submitted'.format(uuid))
def get_item_date(item_filename):
l_directory = item_filename.split('/') l_directory = item_filename.split('/')
return '{}{}{}'.format(l_directory[-4], l_directory[-3], l_directory[-2]) return f'{l_directory[-4]}{l_directory[-3]}{l_directory[-2]}'
def verify_extention_filename(filename):
def verify_extention_filename(self, filename):
if not '.' in filename: if not '.' in filename:
return True return True
else: else:
file_type = filename.rsplit('.', 1)[1] file_type = filename.rsplit('.', 1)[1]
#txt file #txt file
if file_type in ALLOWED_EXTENSIONS: if file_type in SubmitPaste.ALLOWED_EXTENSIONS:
return True return True
else: else:
return False return False
if __name__ == "__main__":
publisher.port = 6380 if __name__ == '__main__':
publisher.channel = "Script"
config_loader = ConfigLoader.ConfigLoader() module = SubmitPaste()
module.run()
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_log_submit = config_loader.get_redis_conn("Redis_Log_submit")
r_serv_tags = config_loader.get_redis_conn("ARDB_Tags")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
serv_statistics = config_loader.get_redis_conn("ARDB_Statistics")
expire_time = 120
MAX_FILE_SIZE = 1000000000
ALLOWED_EXTENSIONS = ['txt', 'sh', 'pdf']
config_section = 'submit_paste'
p = Process(config_section)
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
config_loader = None
while True:
# paste submitted
if r_serv_db.scard('submitted:uuid') > 0:
uuid = r_serv_db.srandmember('submitted:uuid')
# get temp value save on disk
ltags = r_serv_db.smembers(uuid + ':ltags')
ltagsgalaxies = r_serv_db.smembers(uuid + ':ltagsgalaxies')
paste_content = r_serv_db.get(uuid + ':paste_content')
isfile = r_serv_db.get(uuid + ':isfile')
password = r_serv_db.get(uuid + ':password')
# needed if redis is restarted
r_serv_log_submit.set(uuid + ':end', 0)
r_serv_log_submit.set(uuid + ':processing', 0)
r_serv_log_submit.set(uuid + ':nb_total', -1)
r_serv_log_submit.set(uuid + ':nb_end', 0)
r_serv_log_submit.set(uuid + ':nb_sucess', 0)
r_serv_log_submit.set(uuid + ':processing', 1)
if isfile == 'True':
file_full_path = paste_content
if not os.path.exists(file_full_path):
abord_file_submission(uuid, "Server Error, the archive can't be found")
continue
#verify file lengh
if os.stat(file_full_path).st_size > MAX_FILE_SIZE:
abord_file_submission(uuid, 'File :{} too large'.format(file_full_path))
else:
filename = file_full_path.split('/')[-1]
if not '.' in filename:
# read file
try:
with open(file_full_path,'r') as f:
content = f.read()
except:
abord_file_submission(uuid, "file error")
continue
r_serv_log_submit.set(uuid + ':nb_total', 1)
create_paste(uuid, content.encode(), ltags, ltagsgalaxies, uuid)
remove_submit_uuid(uuid)
else:
file_type = filename.rsplit('.', 1)[1]
#txt file
if file_type in ALLOWED_EXTENSIONS:
with open(file_full_path,'r') as f:
content = f.read()
r_serv_log_submit.set(uuid + ':nb_total', 1)
create_paste(uuid, content.encode(), ltags, ltagsgalaxies, uuid)
remove_submit_uuid(uuid)
#compressed file
else:
#decompress file
try:
if password == None:
files = unpack(file_full_path.encode())
#print(files.children)
else:
try:
files = unpack(file_full_path.encode(), password=password.encode())
#print(files.children)
except sflock.exception.IncorrectUsageException:
abord_file_submission(uuid, "Wrong Password")
continue
except:
abord_file_submission(uuid, "file decompression error")
continue
print('unpacking {} file'.format(files.unpacker))
if(not files.children):
abord_file_submission(uuid, "Empty compressed file")
continue
# set number of files to submit
r_serv_log_submit.set(uuid + ':nb_total', len(files.children))
n = 1
for child in files.children:
if verify_extention_filename(child.filename.decode()):
create_paste(uuid, child.contents, ltags, ltagsgalaxies, uuid+'_'+ str(n) )
n = n + 1
else:
print('bad extention')
addError(uuid, 'Bad file extension: {}'.format(child.filename.decode()) )
except FileNotFoundError:
print('file not found')
addError(uuid, 'File not found: {}'.format(file_full_path), uuid )
remove_submit_uuid(uuid)
# textarea input paste
else:
r_serv_log_submit.set(uuid + ':nb_total', 1)
create_paste(uuid, paste_content.encode(), ltags, ltagsgalaxies, uuid)
remove_submit_uuid(uuid)
time.sleep(0.5)
# wait for paste
else:
publisher.debug("Script submit_paste is Idling 10s")
time.sleep(3)

View file

@ -280,3 +280,12 @@ domain_proxy = onion.foundation
# list of comma-separated CIDR that you wish to be alerted for. e.g: # list of comma-separated CIDR that you wish to be alerted for. e.g:
#networks = 192.168.34.0/24,10.0.0.0/8,192.168.33.0/24 #networks = 192.168.34.0/24,10.0.0.0/8,192.168.33.0/24
networks = networks =
[SubmitPaste]
# 1 Mb Max text paste size for text submission
TEXT_MAX_SIZE = 1000000
# 1 Gb Max file size for file submission
FILE_MAX_SIZE = 1000000000
# Managed file extenions for file submission, comma separated
# TODO add 'zip', 'gz' and 'tar.gz'
FILE_ALLOWED_EXTENSIONS = 'txt','sh','pdf'

View file

@ -4,12 +4,19 @@
''' '''
Flask global variables shared accross modules Flask global variables shared accross modules
''' '''
##################################
# Import External packages
##################################
import os import os
import re import re
import sys import sys
##################################
# Import Project packages
##################################
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader import ConfigLoader
from pubsublogger import publisher
# FLASK # # FLASK #
app = None app = None
@ -32,6 +39,15 @@ r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_statistics = config_loader.get_redis_conn("ARDB_Statistics") r_serv_statistics = config_loader.get_redis_conn("ARDB_Statistics")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
# Logger (Redis)
redis_logger = publisher
# Port of the redis instance used by pubsublogger
redis_logger.port = 6380
# Channel name to publish logs
redis_logger.channel = 'front'
sys.path.append('../../configs/keys') sys.path.append('../../configs/keys')
# MISP # # MISP #
try: try:
@ -110,7 +126,11 @@ crawler_enabled = config_loader.get_config_boolean("Crawler", "activate_crawler"
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}' email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
email_regex = re.compile(email_regex) email_regex = re.compile(email_regex)
IMPORT_MAX_TEXT_SIZE = 900000 # size in bytes # SubmitPaste vars
SUBMIT_PASTE_TEXT_MAX_SIZE = int(config_loader.get_config_str("SubmitPaste", "TEXT_MAX_SIZE"))
SUBMIT_PASTE_FILE_MAX_SIZE = int(config_loader.get_config_str("SubmitPaste", "FILE_MAX_SIZE"))
SUBMIT_PASTE_FILE_ALLOWED_EXTENSIONS = [item.strip() for item in config_loader.get_config_str("SubmitPaste", "FILE_ALLOWED_EXTENSIONS").split(',')]
# VT # VT
try: try:

View file

@ -4,28 +4,33 @@
''' '''
Flask functions and routes for the trending modules page Flask functions and routes for the trending modules page
''' '''
import redis ##################################
from flask import Flask, render_template, jsonify, request, Blueprint, url_for, redirect # Import External packages
##################################
from Role_Manager import login_admin, login_analyst
from flask_login import login_required
import unicodedata
import string
import subprocess
import os import os
import sys import sys
import json
import string
import subprocess
import datetime import datetime
import redis
import unicodedata
import uuid import uuid
from io import BytesIO from io import BytesIO
from Date import Date from Date import Date
import json
from flask import Flask, render_template, jsonify, request, Blueprint, url_for, redirect, abort
from functools import wraps
from Role_Manager import login_admin, login_analyst
from flask_login import login_required
##################################
# Import Project packages
##################################
import Paste import Paste
import Import_helper import Import_helper
import Tag import Tag
from pytaxonomies import Taxonomies from pytaxonomies import Taxonomies
from pymispgalaxies import Galaxies, Clusters from pymispgalaxies import Galaxies, Clusters
@ -49,6 +54,8 @@ r_serv_metadata = Flask_config.r_serv_metadata
r_serv_db = Flask_config.r_serv_db r_serv_db = Flask_config.r_serv_db
r_serv_log_submit = Flask_config.r_serv_log_submit r_serv_log_submit = Flask_config.r_serv_log_submit
logger = Flask_config.redis_logger
pymisp = Flask_config.pymisp pymisp = Flask_config.pymisp
if pymisp is False: if pymisp is False:
flag_misp = False flag_misp = False
@ -61,12 +68,32 @@ PasteSubmit = Blueprint('PasteSubmit', __name__, template_folder='templates')
valid_filename_chars = "-_ %s%s" % (string.ascii_letters, string.digits) valid_filename_chars = "-_ %s%s" % (string.ascii_letters, string.digits)
ALLOWED_EXTENSIONS = set(['txt', 'sh', 'pdf', 'zip', 'gz', 'tar.gz'])
UPLOAD_FOLDER = Flask_config.UPLOAD_FOLDER UPLOAD_FOLDER = Flask_config.UPLOAD_FOLDER
misp_event_url = Flask_config.misp_event_url misp_event_url = Flask_config.misp_event_url
hive_case_url = Flask_config.hive_case_url hive_case_url = Flask_config.hive_case_url
text_max_size = int(Flask_config.SUBMIT_PASTE_TEXT_MAX_SIZE) / (1000*1000)
file_max_size = int(Flask_config.SUBMIT_PASTE_FILE_MAX_SIZE) / (1000*1000*1000)
allowed_extensions = ", ". join(Flask_config.SUBMIT_PASTE_FILE_ALLOWED_EXTENSIONS)
# ============ Validators ============
def limit_content_length():
def decorator(f):
@wraps(f)
def wrapper(*args, **kwargs):
logger.debug('decorator')
cl = request.content_length
if cl is not None:
if cl > Flask_config.SUBMIT_PASTE_FILE_MAX_SIZE or ('file' not in request.files and cl > Flask_config.SUBMIT_PASTE_TEXT_MAX_SIZE):
logger.debug('abort')
abort(413)
return f(*args, **kwargs)
return wrapper
return decorator
# ============ FUNCTIONS ============ # ============ FUNCTIONS ============
def one(): def one():
return 1 return 1
@ -75,7 +102,10 @@ def allowed_file(filename):
if not '.' in filename: if not '.' in filename:
return True return True
else: else:
return filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS file_ext = filename.rsplit('.', 1)[1].lower()
logger.debug(file_ext)
logger.debug(Flask_config.SUBMIT_PASTE_FILE_ALLOWED_EXTENSIONS)
return file_ext in Flask_config.SUBMIT_PASTE_FILE_ALLOWED_EXTENSIONS
def clean_filename(filename, whitelist=valid_filename_chars, replace=' '): def clean_filename(filename, whitelist=valid_filename_chars, replace=' '):
# replace characters # replace characters
@ -197,22 +227,22 @@ def hive_create_case(hive_tlp, threat_level, hive_description, hive_case_title,
res = HiveApi.create_case_observable(id,observ_sensor) res = HiveApi.create_case_observable(id,observ_sensor)
if res.status_code != 201: if res.status_code != 201:
print('ko: {}/{}'.format(res.status_code, res.text)) logger.info(f'ko sensor: {res.status_code}/{res.text}')
res = HiveApi.create_case_observable(id, observ_source) res = HiveApi.create_case_observable(id, observ_source)
if res.status_code != 201: if res.status_code != 201:
print('ko: {}/{}'.format(res.status_code, res.text)) logger.info(f'ko source: {res.status_code}/{res.text}')
res = HiveApi.create_case_observable(id, observ_file) res = HiveApi.create_case_observable(id, observ_file)
if res.status_code != 201: if res.status_code != 201:
print('ko: {}/{}'.format(res.status_code, res.text)) logger.info(f'ko file: {res.status_code}/{res.text}')
res = HiveApi.create_case_observable(id, observ_last_seen) res = HiveApi.create_case_observable(id, observ_last_seen)
if res.status_code != 201: if res.status_code != 201:
print('ko: {}/{}'.format(res.status_code, res.text)) logger.info(f'ko last_seen: {res.status_code}/{res.text}')
r_serv_metadata.set('hive_cases:'+path, id) r_serv_metadata.set('hive_cases:'+path, id)
return hive_case_url.replace('id_here', id) return hive_case_url.replace('id_here', id)
else: else:
print('ko: {}/{}'.format(response.status_code, response.text)) logger.info(f'ko: {response.status_code}/{response.text}')
return False return False
# ============= ROUTES ============== # ============= ROUTES ==============
@ -227,27 +257,35 @@ def PasteSubmit_page():
return render_template("submit_items.html", return render_template("submit_items.html",
active_taxonomies = active_taxonomies, active_taxonomies = active_taxonomies,
active_galaxies = active_galaxies) active_galaxies = active_galaxies,
text_max_size = text_max_size,
file_max_size = file_max_size,
allowed_extensions = allowed_extensions)
@PasteSubmit.route("/PasteSubmit/submit", methods=['POST']) @PasteSubmit.route("/PasteSubmit/submit", methods=['POST'])
@login_required @login_required
@login_analyst @login_analyst
@limit_content_length()
def submit(): def submit():
#paste_name = request.form['paste_name'] #paste_name = request.form['paste_name']
logger.debug('submit')
password = request.form['password'] password = request.form['password']
ltags = request.form['tags_taxonomies'] ltags = request.form['tags_taxonomies']
ltagsgalaxies = request.form['tags_galaxies'] ltagsgalaxies = request.form['tags_galaxies']
paste_content = request.form['paste_content'] paste_content = request.form['paste_content']
paste_source = request.form['paste_source']
is_file = False is_file = False
if 'file' in request.files: if 'file' in request.files:
file = request.files['file'] file_import = request.files['file']
if file: if file_import:
if file.filename: if file_import.filename:
is_file = True is_file = True
logger.debug(f'is file ? {is_file}')
submitted_tag = 'infoleak:submission="manual"' submitted_tag = 'infoleak:submission="manual"'
#active taxonomies #active taxonomies
@ -256,13 +294,13 @@ def submit():
active_galaxies = Tag.get_active_galaxies() active_galaxies = Tag.get_active_galaxies()
if ltags or ltagsgalaxies: if ltags or ltagsgalaxies:
logger.debug(f'ltags ? {ltags} {ltagsgalaxies}')
ltags = Tag.unpack_str_tags_list(ltags) ltags = Tag.unpack_str_tags_list(ltags)
ltagsgalaxies = Tag.unpack_str_tags_list(ltagsgalaxies) ltagsgalaxies = Tag.unpack_str_tags_list(ltagsgalaxies)
if not Tag.is_valid_tags_taxonomies_galaxy(ltags, ltagsgalaxies): if not Tag.is_valid_tags_taxonomies_galaxy(ltags, ltagsgalaxies):
content = 'INVALID TAGS' content = 'INVALID TAGS'
print(content) logger.info(content)
return content, 400 return content, 400
# add submitted tags # add submitted tags
@ -271,9 +309,10 @@ def submit():
ltags.append(submitted_tag) ltags.append(submitted_tag)
if is_file: if is_file:
if file: logger.debug('file management')
if file and allowed_file(file.filename): if allowed_file(file_import.filename):
logger.debug('file extension allowed')
# get UUID # get UUID
UUID = str(uuid.uuid4()) UUID = str(uuid.uuid4())
@ -284,24 +323,28 @@ def submit():
# create submitted dir # create submitted dir
if not os.path.exists(UPLOAD_FOLDER): if not os.path.exists(UPLOAD_FOLDER):
logger.debug('create folder')
os.makedirs(UPLOAD_FOLDER) os.makedirs(UPLOAD_FOLDER)
if not '.' in file.filename: if not '.' in file_import.filename:
logger.debug('add UUID to path')
full_path = os.path.join(UPLOAD_FOLDER, UUID) full_path = os.path.join(UPLOAD_FOLDER, UUID)
else: else:
if file.filename[-6:] == 'tar.gz': if file_import.filename[-6:] == 'tar.gz':
logger.debug('file extension is tar.gz')
file_type = 'tar.gz' file_type = 'tar.gz'
else: else:
file_type = file.filename.rsplit('.', 1)[1] file_type = file_import.filename.rsplit('.', 1)[1]
logger.debug(f'file type {file_type}')
name = UUID + '.' + file_type name = UUID + '.' + file_type
full_path = os.path.join(UPLOAD_FOLDER, name) full_path = os.path.join(UPLOAD_FOLDER, name)
logger.debug(f'full path {full_path}')
#Flask verify the file size #Flask verify the file size
file.save(full_path) file_import.save(full_path)
logger.debug('file saved')
paste_content = full_path Import_helper.create_import_queue(ltags, ltagsgalaxies, full_path, UUID, password, True)
Import_helper.create_import_queue(ltags, ltagsgalaxies, paste_content, UUID, password ,True)
return render_template("submit_items.html", return render_template("submit_items.html",
active_taxonomies = active_taxonomies, active_taxonomies = active_taxonomies,
@ -309,30 +352,32 @@ def submit():
UUID = UUID) UUID = UUID)
else: else:
content = 'wrong file type, allowed_extensions: sh, pdf, zip, gz, tar.gz or remove the extension' content = f'wrong file type, allowed_extensions: {allowed_extensions} or remove the extension'
print(content) logger.info(content)
return content, 400 return content, 400
elif paste_content != '': elif paste_content != '':
if sys.getsizeof(paste_content) < 900000: logger.debug(f'entering text paste management')
if sys.getsizeof(paste_content) < Flask_config.SUBMIT_PASTE_TEXT_MAX_SIZE:
logger.debug(f'size {sys.getsizeof(paste_content)}')
# get id # get id
UUID = str(uuid.uuid4()) UUID = str(uuid.uuid4())
Import_helper.create_import_queue(ltags, ltagsgalaxies, paste_content, UUID, password) logger.debug('create import')
Import_helper.create_import_queue(ltags, ltagsgalaxies, paste_content, UUID, password, source=paste_source)
logger.debug('import OK')
return render_template("submit_items.html", return render_template("submit_items.html",
active_taxonomies = active_taxonomies, active_taxonomies = active_taxonomies,
active_galaxies = active_galaxies, active_galaxies = active_galaxies,
UUID = UUID) UUID = UUID)
else: else:
content = 'size error' content = f'text paste size is over {Flask_config.SUBMIT_PASTE_TEXT_MAX_SIZE} bytes limit'
print(content) logger.info(content)
return content, 400 return content, 400
content = 'submit aborded' content = 'submit aborded'
print(content) logger.error(content)
return content, 400 return content, 400

View file

@ -17,6 +17,7 @@
<script src="{{ url_for('static', filename='js/tags.js') }}"></script> <script src="{{ url_for('static', filename='js/tags.js') }}"></script>
</head> </head>
<body> <body>
{% if UUID %} {% if UUID %}
@ -24,8 +25,11 @@
<div class="modal-dialog modal-lg"> <div class="modal-dialog modal-lg">
<div id="mymodalcontent" class="modal-content"> <div id="mymodalcontent" class="modal-content">
<div class="modal-header" style="border-bottom: 4px solid #cccccc; background-color: #cccccc; color: #ffffff;"> <div class="modal-header"
<p class="heading"><h1>Submitting Items ...</h1></p> style="border-bottom: 4px solid #cccccc; background-color: #cccccc; color: #ffffff;">
<p class="heading">
<h1>Submitting Items ...</h1>
</p>
</div> </div>
@ -62,15 +66,19 @@
</div> </div>
<div class="modal-footer"> <div class="modal-footer">
<a class="btn btn-light text-secondary" href="{{ url_for('tags_ui.get_obj_by_tags') }}?object_type=item&ltags=infoleak:submission=&quot;manual&quot;" target="_blank" id="submit_result" hidden> <a class="btn btn-light text-secondary"
href="{{ url_for('tags_ui.get_obj_by_tags') }}?object_type=item&ltags=infoleak:submission=&quot;manual&quot;"
target="_blank" id="submit_result" hidden>
<i class="fas fa-paper-plane fa-2x"></i> <i class="fas fa-paper-plane fa-2x"></i>
<span class="label-icon">Submitted Items </span> <span class="label-icon">Submitted Items </span>
</a> </a>
<button class="btn btn-success btn-tags ml-auto" data-dismiss="modal" id="success_submit_button" hidden> <button class="btn btn-success btn-tags ml-auto" data-dismiss="modal" id="success_submit_button"
hidden>
<span class="label-icon">Success </span> <span class="label-icon">Success </span>
<i class="fas fa-check"></i> <i class="fas fa-check"></i>
</button> </button>
<button class="btn btn-danger btn-tags ml-auto" data-dismiss="modal" id="error_submit_button" hidden> <button class="btn btn-danger btn-tags ml-auto" data-dismiss="modal" id="error_submit_button"
hidden>
<span class="label-icon">ERROR </span> <span class="label-icon">ERROR </span>
<i class="fas fa-times"></i> <i class="fas fa-times"></i>
</button> </button>
@ -91,89 +99,129 @@
<div class="col-12 col-lg-10" id="core_content"> <div class="col-12 col-lg-10" id="core_content">
<form action="{{ url_for('PasteSubmit.submit') }}" id="pasteSubmitForm" method="post" enctype=multipart/form-data onsubmit="submitPaste()"> {% if message %}
<p>{{ message }}</p>
{% endif %}
<form action="{{ url_for('PasteSubmit.submit') }}" id="pasteSubmitForm" method="post"
enctype=multipart/form-data onsubmit="submitPaste()">
<input type="hidden" id="tags_taxonomies" name="tags_taxonomies" value="test"> <input type="hidden" id="tags_taxonomies" name="tags_taxonomies" value="test">
<input type="hidden" id="tags_galaxies" name="tags_galaxies" value="test"> <input type="hidden" id="tags_galaxies" name="tags_galaxies" value="test">
<div class="row">
<div class="col-xl-5">
<div class="card mt-2"> <div class="card mt-2 mb-4">
<div class="card-header text-white" style="background-color: #337ab7;"> <div class="card-header py-3 d-flex flex-row align-items-center justify-content-between">
Files submission <h6 class="m-0 font-weight-bold text-primary">Submit Paste</h6>
</div> </div>
<div class="card-body"> <div class="card-body">
<div class="form-group"> <div class="row mb-3">
<label for="file">Submit a file</label> <div class="form-check form-check-inline">
<input type="file" class="form-control-file" id="file" name="file"> <input class="form-check-input" type="radio" id="radioimportfile" name="radioimport"
</div> value="file" checked />
<label class="form-check-label" for="radioimportfile"> Submit a file </label>
<div class="form-group">
<label for="paste_name">Archive Password</label>
<input type="password" class="form-control" id="password" name="password" placeholder="Optional">
</div> </div>
<div class="form-check form-check-inline">
<input class="form-check-input" type="radio" type="radio" id="radioimporttext"
name="radioimport" value="text" />
<label class="form-check-label" for="radioimporttext"> Submit a text </label>
</div> </div>
</div> </div>
</div> <label for="ltagsgalaxies">Optional Tags:</label>
<div class="col-xl-7"> <div class="row mb-3">
<div class="col">
<div class="card mt-2">
<div class="card-header text-white" style="background-color: #337ab7;">
Tags :
</div>
<div class="card-body">
<div class="input-group"> <div class="input-group">
<input id="ltags" type="text" class="form-control" autocomplete="off"> <input id="ltags" type="text" class="form-control" autocomplete="off">
</div> </div>
<div class="dropdown"> <div class="dropdown">
<button type="button" class="btn btn-info dropdown-toggle mt-1 mb-3" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false" id="dropdown-taxonomie"> <button type="button" class="btn btn-info dropdown-toggle mt-1 mb-3"
data-toggle="dropdown" aria-haspopup="true" aria-expanded="false"
id="dropdown-taxonomie">
Taxonomie Selected Taxonomie Selected
</button> </button>
<div class="dropdown-menu dropdown-menu-right" aria-labelledby="dropdown-taxonomie"> <!-- TODO: make dropdown-scrollable --> <div class="dropdown-menu dropdown-menu-right"
aria-labelledby="dropdown-taxonomie">
<!-- TODO: make dropdown-scrollable -->
<h6 class="dropdown-header">Taxonomie Tags</h6> <h6 class="dropdown-header">Taxonomie Tags</h6>
<button class="dropdown-item" type="button" id="all-tags-taxonomies">All Tags <i class="fas fa-tags"></i></button> <button class="dropdown-item" type="button" id="all-tags-taxonomies">All
Tags <i class="fas fa-tags"></i></button>
<div class="dropdown-divider"></div> <div class="dropdown-divider"></div>
{% for taxo in active_taxonomies %} {% for taxo in active_taxonomies %}
<button class="dropdown-item" type="button" id="{{ taxo }}-id{{ loop.index0 }}">{{ taxo }}</button> <button class="dropdown-item" type="button"
id="{{ taxo }}-id{{ loop.index0 }}">{{ taxo }}</button>
{% endfor %} {% endfor %}
</div> </div>
</div> </div>
</div>
<div class="col">
<div class="input-group"> <div class="input-group">
<input id="ltagsgalaxies" type="text" class="form-control" autocomplete="off"> <input id="ltagsgalaxies" type="text" class="form-control" autocomplete="off">
</div> </div>
<div class="dropdown"> <div class="dropdown">
<button type="button" class="btn btn-info dropdown-toggle mt-1" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false" id="dropdown-galaxy"> <button type="button" class="btn btn-info dropdown-toggle mt-1 mb-3"
data-toggle="dropdown" aria-haspopup="true" aria-expanded="false"
id="dropdown-galaxy">
Galaxy Selected Galaxy Selected
</button> </button>
<div class="dropdown-menu dropdown-menu-right" aria-labelledby="dropdown-galaxy"> <!-- TODO: make dropdown-scrollable --> <div class="dropdown-menu dropdown-menu-right"
aria-labelledby="dropdown-galaxy">
<!-- TODO: make dropdown-scrollable -->
<h6 class="dropdown-header">Galaxy Tags</h6> <h6 class="dropdown-header">Galaxy Tags</h6>
<button class="dropdown-item" type="button" id="all-tags-galaxies">All Tags <i class="fas fa-tags"></i></button> <button class="dropdown-item" type="button" id="all-tags-galaxies">All Tags
<i class="fas fa-tags"></i></button>
<div class="dropdown-divider"></div> <div class="dropdown-divider"></div>
{% for galaxy in active_galaxies %} {% for galaxy in active_galaxies %}
<button class="dropdown-item" type="button" id="{{ galaxy }}-idgalax{{ loop.index0 }}">{{ galaxy }}</button> <button class="dropdown-item" type="button"
id="{{ galaxy }}-idgalax{{ loop.index0 }}">{{ galaxy }}</button>
{% endfor %} {% endfor %}
</div> </div>
</div> </div>
</div>
</div> </div>
</div> </div>
<!-- Card Import File -->
<div class="input-group mt-2 mb-2" id="cardimportfile">
<div class="card-body">
<div class="form-group">
<label for="file">Submit a file, format allowed: {{allowed_extensions}}, max size
{{file_max_size}} Gb</label>
<input type="file" class="form-control-file" id="file" name="file">
</div>
<!-- TODO reactivate while zipped format are enabled -->
<div class="form-group d-none">
<label for="paste_name">Archive Password</label>
<input type="password" class="form-control" id="password" name="password"
placeholder="Optional">
</div>
</div> </div>
</div> </div>
<!-- Card Import Text -->
<div class="input-group mb-4 mb-2" id="cardimporttext">
<div class="card-body">
<div class="form-group">
<label for="paste_source">Submit a text, max size {{text_max_size}} Mb</label>
<input id="paste_source" type="text" name="paste_source"
class="form-control bg-light border-0 small" placeholder="Source"
aria-label="Source" aria-describedby="basic-addon2">
</div>
<div class="form-group mt-3"> <div class="form-group mt-3">
<textarea class="form-control" id="paste_content" name="paste_content" rows="25"></textarea> <textarea class="form-control" id="paste_content" name="paste_content"
rows="20"></textarea>
</div>
</div>
</div> </div>
<div class="form-group"> <div class="form-group">
<button class="btn btn-primary " name="submit" type="submit">Submit Item</button> <button class="btn btn-primary btn-lg btn-block" name="submit" type="submit">Submit
Item</button>
</div>
</div> </div>
</form> </form>
@ -186,6 +234,21 @@
</body> </body>
<!-- Toggle import div related to radio button selected -->
<script>
$(document).ready(function () {
$("#cardimporttext").hide();
$('input[type="radio"]').click(function () {
var inputValue = $(this).attr("value");
$("#cardimportfile").hide();
$("#cardimporttext").hide();
var targetBox = $("#cardimport" + inputValue);
$(targetBox).show();
});
});
</script>
<script> <script>
var ltags var ltags
var ltagsgalaxies var ltagsgalaxies
@ -354,7 +417,4 @@ function submitPaste(){
</script> </script>
</html> </html>