diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index 49992166..240c315b 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -12,15 +12,14 @@ import time import datetime import redis import os +import sys - +sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages # ################################## -from module.abstract_module import AbstractModule +from modules.abstract_module import AbstractModule from packages.Date import Date -from pubsublogger import publisher -from Helper import Process from packages import Paste import ConfigLoader diff --git a/bin/core/DbCleaner.py b/bin/core/DbCleaner.py index e0cf6512..0fdb4228 100755 --- a/bin/core/DbCleaner.py +++ b/bin/core/DbCleaner.py @@ -10,14 +10,16 @@ import sys import time import datetime -from pubsublogger import publisher - -import NotificationHelper - +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## from packages import Date from packages import Item from packages import Term +from pubsublogger import publisher + def clean_term_db_stat_token(): all_stat_date = Term.get_all_token_stat_history() diff --git a/bin/modules/Categ.py b/bin/modules/Categ.py index 4e8b6205..ddd35ace 100755 --- a/bin/modules/Categ.py +++ b/bin/modules/Categ.py @@ -51,7 +51,7 @@ class Categ(AbstractModule): Categ module for AIL framework """ - def __init__(self, categ_files_dir='../files/'): + def __init__(self, categ_files_dir=os.path.join(os.environ['AIL_HOME'], 'files')): """ Init Categ """ @@ -107,7 +107,7 @@ if __name__ == '__main__': # SCRIPT PARSER # parser = argparse.ArgumentParser(description='Start Categ module on files.') parser.add_argument( - '-d', type=str, default="../files/", + '-d', type=str, default=os.path.join(os.environ['AIL_HOME'], 'files'), help='Path to the directory containing the category files.', action='store') args = parser.parse_args() diff --git a/bin/modules/Global.py b/bin/modules/Global.py index c203d6c8..ff14051c 100755 --- a/bin/modules/Global.py +++ b/bin/modules/Global.py @@ -104,6 +104,7 @@ class Global(AbstractModule): # Incorrect filename if not os.path.commonprefix([filename, self.PASTES_FOLDER]) == self.PASTES_FOLDER: self.redis_logger.warning(f'Global; Path traversal detected {filename}') + print(f'Global; Path traversal detected {filename}') else: # Decode compressed base64 @@ -134,6 +135,7 @@ class Global(AbstractModule): else: self.redis_logger.debug(f"Empty Item: {message} not processed") + print(f"Empty Item: {message} not processed") def check_filename(self, filename, new_file_content): @@ -145,6 +147,7 @@ class Global(AbstractModule): # check if file exist if os.path.isfile(filename): self.redis_logger.warning(f'File already exist {filename}') + print(f'File already exist {filename}') # Check that file already exists but content differs curr_file_content = self.gunzip_file(filename) @@ -165,11 +168,13 @@ class Global(AbstractModule): if os.path.isfile(filename): # Ignore duplicate self.redis_logger.debug(f'ignore duplicated file {filename}') + print(f'ignore duplicated file {filename}') filename = None else: # Ignore duplicate checksum equals self.redis_logger.debug(f'ignore duplicated file {filename}') + print(f'ignore duplicated file {filename}') filename = None else: @@ -192,10 +197,12 @@ class Global(AbstractModule): curr_file_content = f.read() except EOFError: self.redis_logger.warning(f'Global; Incomplete file: {filename}') + print(f'Global; Incomplete file: {filename}') # save daily stats self.r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1) except OSError: self.redis_logger.warning(f'Global; Not a gzipped file: {filename}') + print(f'Global; Not a gzipped file: {filename}') # save daily stats self.r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1) @@ -213,6 +220,7 @@ class Global(AbstractModule): gunzipped_bytes_obj = fo.read() except Exception as e: self.redis_logger.warning(f'Global; Invalid Gzip file: {filename}, {e}') + print(f'Global; Invalid Gzip file: {filename}, {e}') return gunzipped_bytes_obj diff --git a/bin/modules/Indexer.py b/bin/modules/Indexer.py index f7218ae1..cdb65f16 100755 --- a/bin/modules/Indexer.py +++ b/bin/modules/Indexer.py @@ -26,7 +26,7 @@ sys.path.append(os.environ['AIL_BIN']) # Import Project packages ################################## from modules.abstract_module import AbstractModule -from packages import Paste +from packages.Item import Item class Indexer(AbstractModule): @@ -98,19 +98,23 @@ class Indexer(AbstractModule): def compute(self, message): - try: - PST = Paste.Paste(message) - docpath = message.split(" ", -1)[-1] - paste = PST.get_p_content() - self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}") - print(f"Indexing - {self.indexname}: {docpath}") + docpath = message.split(" ", -1)[-1] + item = Item(message) + item_id = item.get_id() + item_content = item.get_content() + + self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}") + print(f"Indexing - {self.indexname}: {docpath}") + + try: # Avoid calculating the index's size at each message if(time.time() - self.last_refresh > self.TIME_WAIT): self.last_refresh = time.time() if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD*(1000*1000): timestamp = int(time.time()) self.redis_logger.debug(f"Creating new index {timestamp}") + print(f"Creating new index {timestamp}") self.indexpath = join(self.baseindexpath, str(timestamp)) self.indexname = str(timestamp) # update all_index @@ -125,13 +129,13 @@ class Indexer(AbstractModule): indexwriter.update_document( title=docpath, path=docpath, - content=paste) + content=item_content) indexwriter.commit() except IOError: - self.redis_logger.debug(f"CRC Checksum Failed on: {PST.p_path}") - self.redis_logger.error('Duplicate;{};{};{};CRC Checksum Failed'.format( - PST.p_source, PST.p_date, PST.p_name)) + self.redis_logger.debug(f"CRC Checksum Failed on: {item_id}") + print(f"CRC Checksum Failed on: {item_id}") + self.redis_logger.error(f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};CRC Checksum Failed') def check_index_size(self): """ diff --git a/bin/modules/SentimentAnalysis.py b/bin/modules/SentimentAnalysis.py index ba8032a7..ff0b8142 100755 --- a/bin/modules/SentimentAnalysis.py +++ b/bin/modules/SentimentAnalysis.py @@ -123,7 +123,7 @@ class SentimentAnalysis(AbstractModule): avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0} neg_line = 0 pos_line = 0 - sid = SentimentIntensityAnalyzer(sentiment_lexicon_file) + sid = SentimentIntensityAnalyzer(self.sentiment_lexicon_file) for sentence in sentences: ss = sid.polarity_scores(sentence) for k in sorted(ss): diff --git a/bin/modules/Tags.py b/bin/modules/Tags.py index f21fa858..2a42bfca 100755 --- a/bin/modules/Tags.py +++ b/bin/modules/Tags.py @@ -45,10 +45,11 @@ class Tags(AbstractModule): if len(mess_split) == 2: tag = mess_split[0] item = Item(mess_split[1]) + item_id = item.get_id() # Create a new tag Tag.add_tag('item', tag, item.get_id()) - print(f'{item.get_id(): Tagged {tag}}') + print(f'{item_id}: Tagged {tag}') # Forward message to channel self.send_message_to_queue(message, 'MISP_The_Hive_feeder') diff --git a/bin/template.py b/bin/template.py index 0e1a0a8f..88f17cbd 100755 --- a/bin/template.py +++ b/bin/template.py @@ -5,21 +5,21 @@ The Template Module ====================== This module is a template for Template for new modules - + """ ################################## # Import External packages ################################## +import os +import sys import time -from pubsublogger import publisher - +sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## -from module.abstract_module import AbstractModule -from Helper import Process +from modules.abstract_module import AbstractModule class Template(AbstractModule): @@ -30,12 +30,12 @@ class Template(AbstractModule): def __init__(self): super(Template, self).__init__() - # Send module state to logs - self.redis_logger.info("Module %s initialized"%(self.module_name)) - - # Pending time between two computation in seconds + # Pending time between two computation (computeNone) in seconds self.pending_seconds = 10 + # Send module state to logs + self.redis_logger.info(f'Module {self.module_name} initialized') + def computeNone(self): """ @@ -52,6 +52,6 @@ class Template(AbstractModule): if __name__ == '__main__': - + module = Template() module.run() diff --git a/bin/trackers/Tracker_Term.py b/bin/trackers/Tracker_Term.py index 89791e22..9878835e 100755 --- a/bin/trackers/Tracker_Term.py +++ b/bin/trackers/Tracker_Term.py @@ -20,7 +20,7 @@ sys.path.append(os.environ['AIL_BIN']) ################################## from modules.abstract_module import AbstractModule import NotificationHelper -from packages import Item +from packages.Item import Item from packages import Term from lib import Tracker diff --git a/samples/2021/01/01/api_keys.gz b/samples/2021/01/01/api_keys.gz new file mode 100644 index 00000000..6ed5164b Binary files /dev/null and b/samples/2021/01/01/api_keys.gz differ diff --git a/samples/2021/01/01/categ.gz b/samples/2021/01/01/categ.gz new file mode 100644 index 00000000..35466908 Binary files /dev/null and b/samples/2021/01/01/categ.gz differ diff --git a/samples/2021/01/01/credit_cards.gz b/samples/2021/01/01/credit_cards.gz new file mode 100644 index 00000000..53d094a1 Binary files /dev/null and b/samples/2021/01/01/credit_cards.gz differ diff --git a/samples/2021/01/01/domain_classifier.gz b/samples/2021/01/01/domain_classifier.gz new file mode 100644 index 00000000..8ade7927 Binary files /dev/null and b/samples/2021/01/01/domain_classifier.gz differ diff --git a/samples/2021/01/01/keys.gz b/samples/2021/01/01/keys.gz new file mode 100644 index 00000000..559b4006 Binary files /dev/null and b/samples/2021/01/01/keys.gz differ diff --git a/tests/test_modules.py b/tests/test_modules.py index ce685b9c..bb69397f 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -7,6 +7,7 @@ import unittest import gzip from base64 import b64encode +from distutils.dir_util import copy_tree sys.path.append(os.environ['AIL_BIN']) @@ -20,9 +21,20 @@ from modules.Keys import Keys from modules.Onion import Onion # project packages +from lib.ConfigLoader import ConfigLoader import lib.crawlers as crawlers import packages.Item as Item +#### COPY SAMPLES #### +config_loader = ConfigLoader() +# # TODO:move me in new Item package +ITEMS_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' +ITEMS_FOLDER = os.path.join(os.path.realpath(ITEMS_FOLDER), '') +TESTS_ITEMS_FOLDER = os.path.join(ITEMS_FOLDER, 'tests') +sample_dir = os.path.join(os.environ['AIL_HOME'], 'samples') +copy_tree(sample_dir, TESTS_ITEMS_FOLDER) +#### ---- #### + class Test_Module_ApiKey(unittest.TestCase): def setUp(self): @@ -91,29 +103,31 @@ class Test_Module_Global(unittest.TestCase): item_content = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit' item_content_1 = b64encode(gzip.compress(item_content)).decode() - item_content_2 = b64encode(gzip.compress(item_content + b' more text')).decode() + item_content_2 = b64encode(gzip.compress(item_content + b' more text ...')).decode() message = f'{item_id} {item_content_1}' # Test new item result = self.module_obj.compute(message, r_result=True) - print(result) + print(f'test new item: {result}') self.assertEqual(result, item_id) # Test duplicate result = self.module_obj.compute(message, r_result=True) - print(result) + print(f'test duplicate {result}') self.assertIsNone(result) # Test same id with != content + item = Item.Item('tests/2021/01/01/global_831875da824fc86ab5cc0e835755b520.gz') + item.delete() message = f'{item_id} {item_content_2}' result = self.module_obj.compute(message, r_result=True) - print(result) + print(f'test same id with != content: {result}') self.assertIn(item_id[:-3], result) self.assertNotEqual(result, item_id) # cleanup - item = Item.Item(result) - item.delete() + # item = Item.Item(result) + # item.delete() # # TODO: remove from queue class Test_Module_Keys(unittest.TestCase): diff --git a/update/v3.4/Update_domain.py b/update/v3.4/Update_domain.py index 72ba166d..9e096c48 100755 --- a/update/v3.4/Update_domain.py +++ b/update/v3.4/Update_domain.py @@ -31,7 +31,7 @@ if __name__ == '__main__': r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") config_loader = None - r_serv.set('ail:current_background_script', 'domain languages update') + r_serv_db.set('ail:current_background_script', 'domain languages update') nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert') if not nb_elem_to_update: