chg: [modules + tests] fix modules + test modules on samples

This commit is contained in:
Terrtia 2021-06-08 16:46:36 +02:00
parent 90b6f43468
commit 42a23da182
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
16 changed files with 69 additions and 41 deletions

View file

@ -12,15 +12,14 @@ import time
import datetime import datetime
import redis import redis
import os import os
import sys
sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # # Import Project packages #
################################## ##################################
from module.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Date import Date from packages.Date import Date
from pubsublogger import publisher
from Helper import Process
from packages import Paste from packages import Paste
import ConfigLoader import ConfigLoader

View file

@ -10,14 +10,16 @@ import sys
import time import time
import datetime import datetime
from pubsublogger import publisher sys.path.append(os.environ['AIL_BIN'])
##################################
import NotificationHelper # Import Project packages
##################################
from packages import Date from packages import Date
from packages import Item from packages import Item
from packages import Term from packages import Term
from pubsublogger import publisher
def clean_term_db_stat_token(): def clean_term_db_stat_token():
all_stat_date = Term.get_all_token_stat_history() all_stat_date = Term.get_all_token_stat_history()

View file

@ -51,7 +51,7 @@ class Categ(AbstractModule):
Categ module for AIL framework Categ module for AIL framework
""" """
def __init__(self, categ_files_dir='../files/'): def __init__(self, categ_files_dir=os.path.join(os.environ['AIL_HOME'], 'files')):
""" """
Init Categ Init Categ
""" """
@ -107,7 +107,7 @@ if __name__ == '__main__':
# SCRIPT PARSER # # SCRIPT PARSER #
parser = argparse.ArgumentParser(description='Start Categ module on files.') parser = argparse.ArgumentParser(description='Start Categ module on files.')
parser.add_argument( parser.add_argument(
'-d', type=str, default="../files/", '-d', type=str, default=os.path.join(os.environ['AIL_HOME'], 'files'),
help='Path to the directory containing the category files.', help='Path to the directory containing the category files.',
action='store') action='store')
args = parser.parse_args() args = parser.parse_args()

View file

@ -104,6 +104,7 @@ class Global(AbstractModule):
# Incorrect filename # Incorrect filename
if not os.path.commonprefix([filename, self.PASTES_FOLDER]) == self.PASTES_FOLDER: if not os.path.commonprefix([filename, self.PASTES_FOLDER]) == self.PASTES_FOLDER:
self.redis_logger.warning(f'Global; Path traversal detected {filename}') self.redis_logger.warning(f'Global; Path traversal detected {filename}')
print(f'Global; Path traversal detected {filename}')
else: else:
# Decode compressed base64 # Decode compressed base64
@ -134,6 +135,7 @@ class Global(AbstractModule):
else: else:
self.redis_logger.debug(f"Empty Item: {message} not processed") self.redis_logger.debug(f"Empty Item: {message} not processed")
print(f"Empty Item: {message} not processed")
def check_filename(self, filename, new_file_content): def check_filename(self, filename, new_file_content):
@ -145,6 +147,7 @@ class Global(AbstractModule):
# check if file exist # check if file exist
if os.path.isfile(filename): if os.path.isfile(filename):
self.redis_logger.warning(f'File already exist {filename}') self.redis_logger.warning(f'File already exist {filename}')
print(f'File already exist {filename}')
# Check that file already exists but content differs # Check that file already exists but content differs
curr_file_content = self.gunzip_file(filename) curr_file_content = self.gunzip_file(filename)
@ -165,11 +168,13 @@ class Global(AbstractModule):
if os.path.isfile(filename): if os.path.isfile(filename):
# Ignore duplicate # Ignore duplicate
self.redis_logger.debug(f'ignore duplicated file {filename}') self.redis_logger.debug(f'ignore duplicated file {filename}')
print(f'ignore duplicated file {filename}')
filename = None filename = None
else: else:
# Ignore duplicate checksum equals # Ignore duplicate checksum equals
self.redis_logger.debug(f'ignore duplicated file {filename}') self.redis_logger.debug(f'ignore duplicated file {filename}')
print(f'ignore duplicated file {filename}')
filename = None filename = None
else: else:
@ -192,10 +197,12 @@ class Global(AbstractModule):
curr_file_content = f.read() curr_file_content = f.read()
except EOFError: except EOFError:
self.redis_logger.warning(f'Global; Incomplete file: {filename}') self.redis_logger.warning(f'Global; Incomplete file: {filename}')
print(f'Global; Incomplete file: {filename}')
# save daily stats # save daily stats
self.r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1) self.r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
except OSError: except OSError:
self.redis_logger.warning(f'Global; Not a gzipped file: {filename}') self.redis_logger.warning(f'Global; Not a gzipped file: {filename}')
print(f'Global; Not a gzipped file: {filename}')
# save daily stats # save daily stats
self.r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1) self.r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
@ -213,6 +220,7 @@ class Global(AbstractModule):
gunzipped_bytes_obj = fo.read() gunzipped_bytes_obj = fo.read()
except Exception as e: except Exception as e:
self.redis_logger.warning(f'Global; Invalid Gzip file: {filename}, {e}') self.redis_logger.warning(f'Global; Invalid Gzip file: {filename}, {e}')
print(f'Global; Invalid Gzip file: {filename}, {e}')
return gunzipped_bytes_obj return gunzipped_bytes_obj

View file

@ -26,7 +26,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # Import Project packages
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages import Paste from packages.Item import Item
class Indexer(AbstractModule): class Indexer(AbstractModule):
@ -98,19 +98,23 @@ class Indexer(AbstractModule):
def compute(self, message): def compute(self, message):
try: docpath = message.split(" ", -1)[-1]
PST = Paste.Paste(message)
docpath = message.split(" ", -1)[-1]
paste = PST.get_p_content()
self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}")
print(f"Indexing - {self.indexname}: {docpath}")
item = Item(message)
item_id = item.get_id()
item_content = item.get_content()
self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}")
print(f"Indexing - {self.indexname}: {docpath}")
try:
# Avoid calculating the index's size at each message # Avoid calculating the index's size at each message
if(time.time() - self.last_refresh > self.TIME_WAIT): if(time.time() - self.last_refresh > self.TIME_WAIT):
self.last_refresh = time.time() self.last_refresh = time.time()
if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD*(1000*1000): if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD*(1000*1000):
timestamp = int(time.time()) timestamp = int(time.time())
self.redis_logger.debug(f"Creating new index {timestamp}") self.redis_logger.debug(f"Creating new index {timestamp}")
print(f"Creating new index {timestamp}")
self.indexpath = join(self.baseindexpath, str(timestamp)) self.indexpath = join(self.baseindexpath, str(timestamp))
self.indexname = str(timestamp) self.indexname = str(timestamp)
# update all_index # update all_index
@ -125,13 +129,13 @@ class Indexer(AbstractModule):
indexwriter.update_document( indexwriter.update_document(
title=docpath, title=docpath,
path=docpath, path=docpath,
content=paste) content=item_content)
indexwriter.commit() indexwriter.commit()
except IOError: except IOError:
self.redis_logger.debug(f"CRC Checksum Failed on: {PST.p_path}") self.redis_logger.debug(f"CRC Checksum Failed on: {item_id}")
self.redis_logger.error('Duplicate;{};{};{};CRC Checksum Failed'.format( print(f"CRC Checksum Failed on: {item_id}")
PST.p_source, PST.p_date, PST.p_name)) self.redis_logger.error(f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};CRC Checksum Failed')
def check_index_size(self): def check_index_size(self):
""" """

View file

@ -123,7 +123,7 @@ class SentimentAnalysis(AbstractModule):
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0} avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
neg_line = 0 neg_line = 0
pos_line = 0 pos_line = 0
sid = SentimentIntensityAnalyzer(sentiment_lexicon_file) sid = SentimentIntensityAnalyzer(self.sentiment_lexicon_file)
for sentence in sentences: for sentence in sentences:
ss = sid.polarity_scores(sentence) ss = sid.polarity_scores(sentence)
for k in sorted(ss): for k in sorted(ss):

View file

@ -45,10 +45,11 @@ class Tags(AbstractModule):
if len(mess_split) == 2: if len(mess_split) == 2:
tag = mess_split[0] tag = mess_split[0]
item = Item(mess_split[1]) item = Item(mess_split[1])
item_id = item.get_id()
# Create a new tag # Create a new tag
Tag.add_tag('item', tag, item.get_id()) Tag.add_tag('item', tag, item.get_id())
print(f'{item.get_id(): Tagged {tag}}') print(f'{item_id}: Tagged {tag}')
# Forward message to channel # Forward message to channel
self.send_message_to_queue(message, 'MISP_The_Hive_feeder') self.send_message_to_queue(message, 'MISP_The_Hive_feeder')

View file

@ -5,21 +5,21 @@ The Template Module
====================== ======================
This module is a template for Template for new modules This module is a template for Template for new modules
""" """
################################## ##################################
# Import External packages # Import External packages
################################## ##################################
import os
import sys
import time import time
from pubsublogger import publisher
sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from module.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from Helper import Process
class Template(AbstractModule): class Template(AbstractModule):
@ -30,12 +30,12 @@ class Template(AbstractModule):
def __init__(self): def __init__(self):
super(Template, self).__init__() super(Template, self).__init__()
# Send module state to logs # Pending time between two computation (computeNone) in seconds
self.redis_logger.info("Module %s initialized"%(self.module_name))
# Pending time between two computation in seconds
self.pending_seconds = 10 self.pending_seconds = 10
# Send module state to logs
self.redis_logger.info(f'Module {self.module_name} initialized')
def computeNone(self): def computeNone(self):
""" """
@ -52,6 +52,6 @@ class Template(AbstractModule):
if __name__ == '__main__': if __name__ == '__main__':
module = Template() module = Template()
module.run() module.run()

View file

@ -20,7 +20,7 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
import NotificationHelper import NotificationHelper
from packages import Item from packages.Item import Item
from packages import Term from packages import Term
from lib import Tracker from lib import Tracker

Binary file not shown.

BIN
samples/2021/01/01/categ.gz Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
samples/2021/01/01/keys.gz Normal file

Binary file not shown.

View file

@ -7,6 +7,7 @@ import unittest
import gzip import gzip
from base64 import b64encode from base64 import b64encode
from distutils.dir_util import copy_tree
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
@ -20,9 +21,20 @@ from modules.Keys import Keys
from modules.Onion import Onion from modules.Onion import Onion
# project packages # project packages
from lib.ConfigLoader import ConfigLoader
import lib.crawlers as crawlers import lib.crawlers as crawlers
import packages.Item as Item import packages.Item as Item
#### COPY SAMPLES ####
config_loader = ConfigLoader()
# # TODO:move me in new Item package
ITEMS_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
ITEMS_FOLDER = os.path.join(os.path.realpath(ITEMS_FOLDER), '')
TESTS_ITEMS_FOLDER = os.path.join(ITEMS_FOLDER, 'tests')
sample_dir = os.path.join(os.environ['AIL_HOME'], 'samples')
copy_tree(sample_dir, TESTS_ITEMS_FOLDER)
#### ---- ####
class Test_Module_ApiKey(unittest.TestCase): class Test_Module_ApiKey(unittest.TestCase):
def setUp(self): def setUp(self):
@ -91,29 +103,31 @@ class Test_Module_Global(unittest.TestCase):
item_content = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit' item_content = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'
item_content_1 = b64encode(gzip.compress(item_content)).decode() item_content_1 = b64encode(gzip.compress(item_content)).decode()
item_content_2 = b64encode(gzip.compress(item_content + b' more text')).decode() item_content_2 = b64encode(gzip.compress(item_content + b' more text ...')).decode()
message = f'{item_id} {item_content_1}' message = f'{item_id} {item_content_1}'
# Test new item # Test new item
result = self.module_obj.compute(message, r_result=True) result = self.module_obj.compute(message, r_result=True)
print(result) print(f'test new item: {result}')
self.assertEqual(result, item_id) self.assertEqual(result, item_id)
# Test duplicate # Test duplicate
result = self.module_obj.compute(message, r_result=True) result = self.module_obj.compute(message, r_result=True)
print(result) print(f'test duplicate {result}')
self.assertIsNone(result) self.assertIsNone(result)
# Test same id with != content # Test same id with != content
item = Item.Item('tests/2021/01/01/global_831875da824fc86ab5cc0e835755b520.gz')
item.delete()
message = f'{item_id} {item_content_2}' message = f'{item_id} {item_content_2}'
result = self.module_obj.compute(message, r_result=True) result = self.module_obj.compute(message, r_result=True)
print(result) print(f'test same id with != content: {result}')
self.assertIn(item_id[:-3], result) self.assertIn(item_id[:-3], result)
self.assertNotEqual(result, item_id) self.assertNotEqual(result, item_id)
# cleanup # cleanup
item = Item.Item(result) # item = Item.Item(result)
item.delete() # item.delete()
# # TODO: remove from queue # # TODO: remove from queue
class Test_Module_Keys(unittest.TestCase): class Test_Module_Keys(unittest.TestCase):

View file

@ -31,7 +31,7 @@ if __name__ == '__main__':
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None config_loader = None
r_serv.set('ail:current_background_script', 'domain languages update') r_serv_db.set('ail:current_background_script', 'domain languages update')
nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert') nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert')
if not nb_elem_to_update: if not nb_elem_to_update: