chg: [Categ] tests + docs

This commit is contained in:
Terrtia 2021-05-19 16:57:20 +02:00
parent 4a9bda2ee8
commit 20727fff77
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
3 changed files with 55 additions and 59 deletions

View file

@ -43,7 +43,7 @@ class ApiKey(AbstractModule):
# Send module state to logs # Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized") self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message, r_match=False): def compute(self, message, r_result=False):
id, score = message.split() id, score = message.split()
item = Item(id) item = Item(id)
item_content = item.get_content() item_content = item.get_content()
@ -82,7 +82,7 @@ class ApiKey(AbstractModule):
# Send to duplicate # Send to duplicate
self.send_message_to_queue('Duplicate', item.get_id()) self.send_message_to_queue('Duplicate', item.get_id())
if r_match: if r_result:
return (google_api_key, aws_access_key, aws_secret_key) return (google_api_key, aws_access_key, aws_secret_key)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -4,19 +4,16 @@
The ZMQ_PubSub_Categ Module The ZMQ_PubSub_Categ Module
============================ ============================
This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q
Module.
Each words files created under /files/ are representing categories. Each words files created under /files/ are representing categories.
This modules take these files and compare them to This modules take these files and compare them to
the stream of data given by the ZMQ_PubSub_Tokenize_Q Module. the content of an item.
When a word from a paste match one or more of these words file, the filename of When a word from a item match one or more of these words file, the filename of
the paste is published/forwarded to the next modules. the item / zhe item id is published/forwarded to the next modules.
Each category (each files) are representing a dynamic channel. Each category (each files) are representing a dynamic channel.
This mean that if you create 1000 files under /files/ you'll have 1000 channels This mean that if you create 1000 files under /files/ you'll have 1000 channels
where every time there is a matching word to a category, the paste containing where every time there is a matching word to a category, the item containing
this word will be pushed to this specific channel. this word will be pushed to this specific channel.
..note:: The channel will have the name of the file created. ..note:: The channel will have the name of the file created.
@ -25,15 +22,11 @@ Implementing modules can start here, create your own category file,
and then create your own module to treat the specific paste matching this and then create your own module to treat the specific paste matching this
category. category.
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them.
Requirements Requirements
------------ ------------
*Need running Redis instances. (Redis) *Need running Redis instances. (Redis)
*Categories files of words in /files/ need to be created *Categories files of words in /files/ need to be created
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
""" """
@ -42,16 +35,13 @@ Requirements
################################## ##################################
import os import os
import argparse import argparse
import time
import re import re
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from module.abstract_module import AbstractModule from module.abstract_module import AbstractModule
from pubsublogger import publisher from packages.Item import Item
from packages import Paste
from Helper import Process
class Categ(AbstractModule): class Categ(AbstractModule):
@ -59,73 +49,66 @@ class Categ(AbstractModule):
Categ module for AIL framework Categ module for AIL framework
""" """
def __init__(self): def __init__(self, categ_files_dir='../files/'):
""" """
Init Categ Init Categ
""" """
super(Categ, self).__init__() super(Categ, self).__init__()
self.categ_files_dir = categ_files_dir
# default = 1 string
self.matchingThreshold = self.process.config.getint("Categ", "matchingThreshold") self.matchingThreshold = self.process.config.getint("Categ", "matchingThreshold")
# SCRIPT PARSER # self.reload_categ_words()
parser = argparse.ArgumentParser(description='Start Categ module on files.')
parser.add_argument(
'-d', type=str, default="../files/",
help='Path to the directory containing the category files.',
action='store')
args = parser.parse_args()
self.redis_logger.info("Script Categ started") self.redis_logger.info("Script Categ started")
# # TODO: trigger reload on change ( save last reload time, ...)
def reload_categ_words(self):
categories = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve', 'ApiKey'] categories = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve', 'ApiKey']
tmp_dict = {} tmp_dict = {}
for filename in categories: for filename in categories:
bname = os.path.basename(filename) bname = os.path.basename(filename)
tmp_dict[bname] = [] tmp_dict[bname] = []
with open(os.path.join(args.d, filename), 'r') as f: with open(os.path.join(self.categ_files_dir, filename), 'r') as f:
patterns = [r'%s' % ( re.escape(s.strip()) ) for s in f] patterns = [r'%s' % ( re.escape(s.strip()) ) for s in f]
tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE) tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)
self.categ_words = tmp_dict.items()
self.categ_items = tmp_dict.items() def compute(self, message, r_result=False):
# Create Item Object
item = Item(message)
# Get item content
content = item.get_content()
categ_found = []
prec_filename = None # Search for pattern categories in item content
for categ, pattern in self.categ_words:
def compute(self, message):
# Cast message as paste
paste = Paste.Paste(message)
# Get paste content
content = paste.get_p_content()
# init categories found
is_categ_found = False
# Search for pattern categories in paste content
for categ, pattern in self.categ_items:
found = set(re.findall(pattern, content)) found = set(re.findall(pattern, content))
lenfound = len(found) lenfound = len(found)
if lenfound >= self.matchingThreshold: if lenfound >= self.matchingThreshold:
is_categ_found = True categ_found.append(categ)
msg = '{} {}'.format(paste.p_rel_path, lenfound) msg = f'{item.get_id()} {lenfound}'
self.redis_logger.debug('%s;%s %s'%(self.module_name, msg, categ))
# Export message to categ queue # Export message to categ queue
self.process.populate_set_out(msg, categ) print(msg, categ)
self.send_message_to_queue(categ, msg)
self.redis_logger.info( self.redis_logger.info(
'Categ;{};{};{};Detected {} as {};{}'.format( f'Categ;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {lenfound} as {categ};{item.get_id()}')
paste.p_source, paste.p_date, paste.p_name, if r_result:
lenfound, categ, paste.p_rel_path)) return categ_found
if not is_categ_found:
self.redis_logger.debug('No %s found in this paste: %s'%(self.module_name, paste.p_name))
if __name__ == '__main__': if __name__ == '__main__':
module = Categ() # SCRIPT PARSER #
parser = argparse.ArgumentParser(description='Start Categ module on files.')
parser.add_argument(
'-d', type=str, default="../files/",
help='Path to the directory containing the category files.',
action='store')
args = parser.parse_args()
module = Categ(categ_files_dir=args.d)
module.run() module.run()

View file

@ -9,6 +9,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Modules Classes # Modules Classes
from ApiKey import ApiKey from ApiKey import ApiKey
from Categ import Categ
from Onion import Onion from Onion import Onion
# project packages # project packages
@ -25,11 +26,23 @@ class Test_Module_ApiKey(unittest.TestCase):
aws_access_key = 'AKIAIOSFODNN7EXAMPLE' aws_access_key = 'AKIAIOSFODNN7EXAMPLE'
aws_secret_key = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' aws_secret_key = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
matches = self.module_obj.compute(f'{item_id} 3', r_match=True) matches = self.module_obj.compute(f'{item_id} 3', r_result=True)
self.assertCountEqual(matches[0], [google_api_key]) self.assertCountEqual(matches[0], [google_api_key])
self.assertCountEqual(matches[1], [aws_access_key]) self.assertCountEqual(matches[1], [aws_access_key])
self.assertCountEqual(matches[2], [aws_secret_key]) self.assertCountEqual(matches[2], [aws_secret_key])
class Test_Module_Categ(unittest.TestCase):
def setUp(self):
self.module_obj = Categ()
def test_module(self):
item_id = 'tests/2021/01/01/categ.gz'
test_categ = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve']
result = self.module_obj.compute(item_id, r_result=True)
self.assertCountEqual(result, test_categ)
class Test_Module_Onion(unittest.TestCase): class Test_Module_Onion(unittest.TestCase):
def setUp(self): def setUp(self):