chg: [logs] add new logger

This commit is contained in:
Terrtia 2023-05-12 15:29:53 +02:00
parent 98cbaad1ad
commit 9b2e8718d7
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
50 changed files with 259 additions and 282 deletions

View file

@ -44,7 +44,7 @@ class ApiKey(AbstractModule):
re.compile(self.re_google_api_key)
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
self.logger.info(f"Module {self.module_name} initialized")
def compute(self, message, r_result=False):
item_id, score = message.split()

View file

@ -148,18 +148,18 @@ class Credential(AbstractModule):
discovered_sites = ', '.join(all_sites)
print(f"=======> Probably on : {discovered_sites}")
date = datetime.now().strftime("%Y%m")
nb_tlds = {}
for cred in all_credentials:
maildomains = re.findall(r"@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0]
self.faup.decode(maildomains)
tld = self.faup.get()['tld']
# # TODO: # FIXME: remove me
try:
tld = tld.decode()
except:
pass
nb_tlds[tld] = nb_tlds.get(tld, 0) + 1
# date = datetime.now().strftime("%Y%m")
# nb_tlds = {}
# for cred in all_credentials:
# maildomains = re.findall(r"@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0]
# self.faup.decode(maildomains)
# tld = self.faup.get()['tld']
# # # TODO: # FIXME: remove me
# try:
# tld = tld.decode()
# except:
# pass
# nb_tlds[tld] = nb_tlds.get(tld, 0) + 1
# for tld in nb_tlds:
# Statistics.add_module_tld_stats_by_date('credential', date, tld, nb_tlds[tld])
else:

View file

@ -51,7 +51,7 @@ class CreditCards(AbstractModule):
self.pending_seconds = 10
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
self.logger.info(f"Module {self.module_name} initialized")
def get_valid_card(self, card):
clean_card = re.sub(self.re_clean_card, '', card)
@ -74,7 +74,7 @@ class CreditCards(AbstractModule):
all_cards = self.regex_findall(self.regex, item.id, content)
if len(all_cards) > 0:
# self.redis_logger.debug(f'All matching {all_cards}')
# self.logger.debug(f'All matching {all_cards}')
creditcard_set = set()
for card in all_cards:
print(card)

View file

@ -111,7 +111,7 @@ class Cryptocurrencies(AbstractModule, ABC):
self.pending_seconds = 1
# Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized')
self.logger.info(f'Module {self.module_name} initialized')
def compute(self, message):
item = Item(message)

View file

@ -41,7 +41,7 @@ class CveModule(AbstractModule):
self.pending_seconds = 1
# Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized')
self.logger.info(f'Module {self.module_name} initialized')
def compute(self, message):

View file

@ -84,7 +84,7 @@ class Decoder(AbstractModule):
self.tracker_yara = Tracker_Yara(queue=False)
# Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized')
self.logger.info(f'Module {self.module_name} initialized')
def compute(self, message):
@ -122,13 +122,11 @@ class Decoder(AbstractModule):
mimetype = decoded.get_mimetype()
decoded.add(dname, date, item.id, mimetype=mimetype)
# DEBUG
self.redis_logger.debug(f'{item.id} : {dname} - {decoded.id} - {mimetype}')
print(f'{item.id} : {dname} - {decoded.id} - {mimetype}')
# new_decodeds.append(decoded.id)
self.logger.info(f'{item.id} : {dname} - {decoded.id} - {mimetype}')
if find:
self.redis_logger.info(f'{item.id} - {dname}')
print(f'{item.id} - {dname}')
self.logger.info(f'{item.id} - {dname}')
# Send to Tags
msg = f'infoleak:automatic-detection="{dname}";{item.id}'
@ -137,8 +135,11 @@ class Decoder(AbstractModule):
####################
# TRACKERS DECODED
for decoded_id in new_decodeds:
self.tracker_term.compute(decoded_id, obj_type='decoded')
self.tracker_regex.compute(decoded_id, obj_type='decoded')
try:
self.tracker_term.compute(decoded_id, obj_type='decoded')
self.tracker_regex.compute(decoded_id, obj_type='decoded')
except UnicodeDecodeError:
pass
self.tracker_yara.compute(decoded_id, obj_type='decoded')

View file

@ -48,7 +48,7 @@ class DomClassifier(AbstractModule):
self.cc_tld = config_loader.get_config_str("DomClassifier", "cc_tld")
# Send module state to logs
self.redis_logger.info(f"Module: {self.module_name} Launched")
self.logger.info(f"Module: {self.module_name} Launched")
def compute(self, message, r_result=False):
host, item_id = message.split()
@ -62,7 +62,7 @@ class DomClassifier(AbstractModule):
self.c.text(rawtext=host)
print(self.c.domain)
self.c.validdomain(passive_dns=True, extended=False)
# self.redis_logger.debug(self.c.vdomain)
# self.logger.debug(self.c.vdomain)
print(self.c.vdomain)
print()

View file

@ -47,7 +47,7 @@ class Duplicates(AbstractModule):
"tlsh": {"threshold": THRESHOLD_TLSH}
}
self.redis_logger.info(f"Module: {self.module_name} Launched")
self.logger.info(f"Module: {self.module_name} Launched")
def compute(self, message):
# IOError: "CRC Checksum Failed on : {id}"
@ -65,7 +65,7 @@ class Duplicates(AbstractModule):
x = time.time()
# Get Hashs
content = item.get_content(binary=True)
content = item.get_content(r_type='bytes')
self.algos['ssdeep']['hash'] = Duplicate.get_ssdeep_hash(content)
self.algos['tlsh']['hash'] = Duplicate.get_tlsh_hash(content)

View file

@ -67,8 +67,8 @@ class Global(AbstractModule):
self.pending_seconds = 0.5
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
# Send module state to logs
self.logger.info(f"Module {self.module_name} initialized")
# Send module state to logs # TODO MOVE ME IN INIT SCRIPT
self.redis_logger.critical(f"AIL {get_ail_uuid()} started")
def computeNone(self):
@ -103,7 +103,7 @@ class Global(AbstractModule):
# Incorrect filename
if not os.path.commonprefix([filename, self.ITEMS_FOLDER]) == self.ITEMS_FOLDER:
self.redis_logger.warning(f'Global; Path traversal detected {filename}')
self.logger.warning(f'Global; Path traversal detected {filename}')
print(f'Global; Path traversal detected {filename}')
else:
@ -146,7 +146,7 @@ class Global(AbstractModule):
return item_id
else:
self.redis_logger.debug(f"Empty Item: {message} not processed")
self.logger.debug(f"Empty Item: {message} not processed")
print(f"Empty Item: {message} not processed")
def check_filename(self, filename, new_file_content):
@ -157,7 +157,7 @@ class Global(AbstractModule):
# check if file exist
if os.path.isfile(filename):
self.redis_logger.warning(f'File already exist {filename}')
self.logger.warning(f'File already exist {filename}')
print(f'File already exist {filename}')
# Check that file already exists but content differs
@ -174,17 +174,17 @@ class Global(AbstractModule):
filename = f'{filename[:-3]}_{new_file_md5}.gz'
else:
filename = f'{filename}_{new_file_md5}'
self.redis_logger.debug(f'new file to check: {filename}')
self.logger.debug(f'new file to check: {filename}')
if os.path.isfile(filename):
# Ignore duplicate
self.redis_logger.debug(f'ignore duplicated file {filename}')
self.logger.debug(f'ignore duplicated file {filename}')
print(f'ignore duplicated file {filename}')
filename = None
else:
# Ignore duplicate checksum equals
self.redis_logger.debug(f'ignore duplicated file {filename}')
self.logger.debug(f'ignore duplicated file {filename}')
print(f'ignore duplicated file {filename}')
filename = None
@ -205,13 +205,13 @@ class Global(AbstractModule):
with gzip.open(filename, 'rb') as f:
curr_file_content = f.read()
except EOFError:
self.redis_logger.warning(f'Global; Incomplete file: {filename}')
self.logger.warning(f'Global; Incomplete file: {filename}')
print(f'Global; Incomplete file: {filename}')
# save daily stats
# self.r_stats.zincrby('module:Global:incomplete_file', 1, datetime.datetime.now().strftime('%Y%m%d'))
# Statistics.
except OSError:
self.redis_logger.warning(f'Global; Not a gzipped file: {filename}')
self.logger.warning(f'Global; Not a gzipped file: {filename}')
print(f'Global; Not a gzipped file: {filename}')
# save daily stats
# self.r_stats.zincrby('module:Global:invalid_file', 1, datetime.datetime.now().strftime('%Y%m%d'))
@ -229,7 +229,7 @@ class Global(AbstractModule):
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
gunzipped_bytes_obj = fo.read()
except Exception as e:
self.redis_logger.warning(f'Global; Invalid Gzip file: {filename}, {e}')
self.logger.warning(f'Global; Invalid Gzip file: {filename}, {e}')
print(f'Global; Invalid Gzip file: {filename}, {e}')
return gunzipped_bytes_obj

View file

@ -46,7 +46,7 @@ class Hosts(AbstractModule):
self.host_regex = r'\b([a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)\b'
re.compile(self.host_regex)
self.redis_logger.info(f"Module: {self.module_name} Launched")
self.logger.info(f"Module: {self.module_name} Launched")
def compute(self, message):
item = Item(message)

View file

@ -63,12 +63,11 @@ class Indexer(AbstractModule):
# create the index register if not present
time_now = int(time.time())
if not os.path.isfile(self.indexRegister_path): # index are not organised
self.redis_logger.debug("Indexes are not organized")
self.redis_logger.debug(
"moving all files in folder 'old_index' ")
self.logger.debug("Indexes are not organized")
self.logger.debug("moving all files in folder 'old_index' ")
# move all files to old_index folder
self.move_index_into_old_index_folder()
self.redis_logger.debug("Creating new index")
self.logger.debug("Creating new index")
# create all_index.txt
with open(self.indexRegister_path, 'w') as f:
f.write(str(time_now))
@ -100,7 +99,7 @@ class Indexer(AbstractModule):
item_id = item.get_id()
item_content = item.get_content()
self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}")
self.logger.debug(f"Indexing - {self.indexname}: {docpath}")
print(f"Indexing - {self.indexname}: {docpath}")
try:
@ -109,7 +108,7 @@ class Indexer(AbstractModule):
self.last_refresh = time.time()
if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD*(1000*1000):
timestamp = int(time.time())
self.redis_logger.debug(f"Creating new index {timestamp}")
self.logger.debug(f"Creating new index {timestamp}")
print(f"Creating new index {timestamp}")
self.indexpath = join(self.baseindexpath, str(timestamp))
self.indexname = str(timestamp)
@ -129,9 +128,9 @@ class Indexer(AbstractModule):
indexwriter.commit()
except IOError:
self.redis_logger.debug(f"CRC Checksum Failed on: {item_id}")
self.logger.debug(f"CRC Checksum Failed on: {item_id}")
print(f"CRC Checksum Failed on: {item_id}")
self.redis_logger.error(f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};CRC Checksum Failed')
self.logger.error(f'{item_id} CRC Checksum Failed')
def check_index_size(self):
"""

View file

@ -170,7 +170,7 @@ class Keys(AbstractModule):
# if find :
# # Send to duplicate
# self.add_message_to_queue(item.get_id(), 'Duplicate')
# self.redis_logger.debug(f'{item.get_id()} has key(s)')
# self.logger.debug(f'{item.get_id()} has key(s)')
# print(f'{item.get_id()} has key(s)')

View file

@ -22,7 +22,7 @@ class Languages(AbstractModule):
super(Languages, self).__init__()
# Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized')
self.logger.info(f'Module {self.module_name} initialized')
def compute(self, message):
item = Item(message)

View file

@ -71,7 +71,7 @@ class Mixer(AbstractModule):
self.feeders_processed = {}
self.feeders_duplicate = {}
self.redis_logger.info(f"Module: {self.module_name} Launched")
self.logger.info(f"Module: {self.module_name} Launched")
# TODO Save stats in cache
# def get_feeders(self):
@ -154,7 +154,7 @@ class Mixer(AbstractModule):
feeder_name, item_id, gzip64encoded = splitted
else:
print('Invalid message: not processed')
self.redis_logger.debug('Invalid Item: {message} not processed')
self.logger.debug(f'Invalid Item: {item_id} not processed')
return None
# remove absolute path

View file

@ -49,7 +49,7 @@ class Onion(AbstractModule):
re.compile(self.onion_regex)
# re.compile(self.i2p_regex)
self.redis_logger.info(f"Module: {self.module_name} Launched")
self.logger.info(f"Module: {self.module_name} Launched")
# TEMP var: SAVE I2P Domain (future I2P crawler)
# self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p")

View file

@ -69,7 +69,7 @@ class PgpDump(AbstractModule):
self.symmetrically_encrypted = False
# Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized')
self.logger.info(f'Module {self.module_name} initialized')
def remove_html(self, pgp_block):
try:
@ -130,7 +130,7 @@ class PgpDump(AbstractModule):
try:
output = output.decode()
except UnicodeDecodeError:
self.redis_logger.error(f'Error PgpDump UnicodeDecodeError: {self.item_id}')
self.logger.error(f'Error PgpDump UnicodeDecodeError: {self.item_id}')
output = ''
return output

View file

@ -50,7 +50,7 @@ class Phone(AbstractModule):
# If the list is greater than 4, we consider the Item may contain a list of phone numbers
if len(results) > 4:
self.redis_logger.debug(results)
self.logger.debug(results)
self.redis_logger.warning(f'{item.get_id()} contains PID (phone numbers)')
msg = f'infoleak:automatic-detection="phone-number";{item.get_id()}'

View file

@ -41,7 +41,7 @@ class SQLInjectionDetection(AbstractModule):
self.faup = Faup()
self.redis_logger.info(f"Module: {self.module_name} Launched")
self.logger.info(f"Module: {self.module_name} Launched")
def compute(self, message):
url, item_id = message.split()

View file

@ -70,7 +70,7 @@ class SentimentAnalysis(AbstractModule):
self.pending_seconds = 1
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
self.logger.info(f"Module {self.module_name} initialized")
def compute(self, message):
# Max time to compute one entry
@ -78,7 +78,7 @@ class SentimentAnalysis(AbstractModule):
try:
self.analyse(message)
except TimeoutException:
self.redis_logger.debug(f"{message} processing timeout")
self.logger.debug(f"{message} processing timeout")
else:
signal.alarm(0)
@ -114,7 +114,7 @@ class SentimentAnalysis(AbstractModule):
p_MimeType = "JSON"
if p_MimeType in SentimentAnalysis.accepted_Mime_type:
self.redis_logger.debug(f'Accepted :{p_MimeType}')
self.logger.debug(f'Accepted :{p_MimeType}')
the_date = datetime.date(int(p_date[0:4]), int(p_date[4:6]), int(p_date[6:8]))
the_time = datetime.datetime.now()

View file

@ -60,7 +60,7 @@ class SubmitPaste(AbstractModule):
"""
Main method of the Module to implement
"""
self.redis_logger.debug(f'compute UUID {uuid}')
self.logger.debug(f'compute UUID {uuid}')
# get temp value save on disk
ltags = self.r_serv_db.smembers(f'{uuid}:ltags')
@ -73,9 +73,9 @@ class SubmitPaste(AbstractModule):
if source in ['crawled', 'tests']:
source = 'submitted'
self.redis_logger.debug(f'isfile UUID {isfile}')
self.redis_logger.debug(f'source UUID {source}')
self.redis_logger.debug(f'paste_content UUID {paste_content}')
self.logger.debug(f'isfile UUID {isfile}')
self.logger.debug(f'source UUID {source}')
self.logger.debug(f'paste_content UUID {paste_content}')
# needed if redis is restarted
self.r_serv_log_submit.set(f'{uuid}:end', 0)
@ -114,15 +114,15 @@ class SubmitPaste(AbstractModule):
if isinstance(uuid, list):
uuid = uuid[0]
# Module processing with the message from the queue
self.redis_logger.debug(uuid)
self.logger.debug(uuid)
self.compute(uuid)
except Exception as err:
self.redis_logger.error(f'Error in module {self.module_name}: {err}')
self.logger.critical(err)
# Remove uuid ref
self.remove_submit_uuid(uuid)
else:
# Wait before next process
self.redis_logger.debug(f'{self.module_name}, waiting for new message, Idling {self.pending_seconds}s')
self.logger.debug(f'{self.module_name}, waiting for new message, Idling {self.pending_seconds}s')
time.sleep(self.pending_seconds)
def _manage_text(self, uuid, paste_content, ltags, ltagsgalaxies, source):

View file

@ -36,7 +36,7 @@ class Tags(AbstractModule):
self.pending_seconds = 10
# Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized')
self.logger.info(f'Module {self.module_name} initialized')
def compute(self, message):
# Extract item ID and tag from message

View file

@ -38,7 +38,7 @@ class Telegram(AbstractModule):
self.max_execution_time = 60
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
self.logger.info(f"Module {self.module_name} initialized")
def compute(self, message, r_result=False):
item = Item(message)

View file

@ -402,7 +402,7 @@ class Tools(AbstractModule):
# Waiting time in seconds between to message processed
self.pending_seconds = 10
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
self.logger.info(f"Module {self.module_name} initialized")
def get_tools(self):
return TOOLS.keys()

View file

@ -56,7 +56,7 @@ class Urls(AbstractModule):
")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:[a-zA-Z]{2,15}))(?:\:[0-9]+)*(?:/?(?:[a-zA-Z0-9\.\,\?'\\+&%\$#\=~_\-]+))*)"
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
self.logger.info(f"Module {self.module_name} initialized")
def compute(self, message):
"""
@ -82,7 +82,7 @@ class Urls(AbstractModule):
to_send = f"{url} {item.get_id()}"
print(to_send)
self.add_message_to_queue(to_send, 'Url')
self.redis_logger.debug(f"url_parsed: {to_send}")
self.logger.debug(f"url_parsed: {to_send}")
if len(l_urls) > 0:
to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};'

View file

@ -39,13 +39,13 @@ class Zerobins(AbstractModule):
self.pending_seconds = 10
# Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized')
self.logger.info(f'Module {self.module_name} initialized')
def computeNone(self):
"""
Compute when no message in queue
"""
self.redis_logger.debug("No message in queue")
self.logger.debug("No message in queue")
def compute(self, message):
"""
@ -63,7 +63,7 @@ class Zerobins(AbstractModule):
crawlers.create_task(bin_url, depth=0, har=False, screenshot=False, proxy='force_tor',
parent='manual', priority=60)
self.redis_logger.debug("Compute message in queue")
self.logger.debug("Compute message in queue")
if __name__ == '__main__':

View file

@ -8,6 +8,8 @@ Base Class for AIL Modules
##################################
from abc import ABC, abstractmethod
import os
import logging
import logging.config
import sys
import time
import traceback
@ -17,22 +19,27 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from pubsublogger import publisher
from lib import ail_logger
from lib.ail_queues import AILQueue
from lib import regex_helper
from lib.exceptions import ModuleQueueError
logging.config.dictConfig(ail_logger.get_config(name='modules'))
class AbstractModule(ABC):
"""
Abstract Module class
"""
def __init__(self, module_name=None, logger_channel='Script', queue=True):
def __init__(self, module_name=None, queue=True):
"""
Init Module
module_name: str; set the module name if different from the instance ClassName
queue_name: str; set the queue name if different from the instance ClassName
logger_channel: str; set the logger channel name, 'Script' by default
"""
self.logger = logging.getLogger(f'{self.__class__.__name__}')
# Module name if provided else instance className
self.module_name = module_name if module_name else self._module_name()
@ -44,14 +51,12 @@ class AbstractModule(ABC):
# Init Redis Logger
self.redis_logger = publisher
# Port of the redis instance used by pubsublogger
self.redis_logger.port = 6380
# Channel name to publish logs
# # TODO: refactor logging
# If provided could be a namespaced channel like script:<ModuleName>
self.redis_logger.channel = logger_channel
self.redis_logger.channel = 'Script'
# Cache key
self.r_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
@ -127,14 +132,9 @@ class AbstractModule(ABC):
# LOG ERROR
trace = traceback.format_tb(err.__traceback__)
trace = ''.join(trace)
self.redis_logger.critical(f"Error in module {self.module_name}: {err}")
self.redis_logger.critical(f"Module {self.module_name} input message: {message}")
self.redis_logger.critical(trace)
print()
print(f"ERROR: {err}")
print(f'MESSAGE: {message}')
print('TRACEBACK:')
print(trace)
self.logger.critical(f"Error in module {self.module_name}: {__name__} : {err}")
self.logger.critical(f"Module {self.module_name} input message: {message}")
self.logger.critical(trace)
if isinstance(err, ModuleQueueError):
self.queue.error()
@ -145,7 +145,7 @@ class AbstractModule(ABC):
else:
self.computeNone()
# Wait before next process
self.redis_logger.debug(f"{self.module_name}, waiting for new message, Idling {self.pending_seconds}s")
self.logger.debug(f"{self.module_name}, waiting for new message, Idling {self.pending_seconds}s")
time.sleep(self.pending_seconds)
def _module_name(self):