chg: [categ] messages, bypass categ module + fix correlation

This commit is contained in:
terrtia 2023-12-08 15:40:05 +01:00
parent 5b808ed416
commit 73185f19fd
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
2 changed files with 25 additions and 21 deletions

View file

@ -59,7 +59,7 @@ CORRELATION_TYPES_BY_OBJ = {
"pgp": ["domain", "item", "message"], "pgp": ["domain", "item", "message"],
"screenshot": ["domain", "item"], "screenshot": ["domain", "item"],
"title": ["domain", "item"], "title": ["domain", "item"],
"user-account": ["chat", "chat-subchannel", "chat-thread", "message"], "user-account": ["chat", "chat-subchannel", "chat-thread", "image", "message"],
"username": ["domain", "item", "message"], # TODO chat-user/account "username": ["domain", "item", "message"], # TODO chat-user/account
} }

View file

@ -6,14 +6,14 @@ The ZMQ_PubSub_Categ Module
Each words files created under /files/ are representing categories. Each words files created under /files/ are representing categories.
This modules take these files and compare them to This modules take these files and compare them to
the content of an item. the content of an obj.
When a word from a item match one or more of these words file, the filename of When a word from a obj match one or more of these words file, the filename of
the item / zhe item id is published/forwarded to the next modules. the obj / the obj id is published/forwarded to the next modules.
Each category (each files) are representing a dynamic channel. Each category (each files) are representing a dynamic channel.
This mean that if you create 1000 files under /files/ you'll have 1000 channels This mean that if you create 1000 files under /files/ you'll have 1000 channels
where every time there is a matching word to a category, the item containing where every time there is a matching word to a category, the obj containing
this word will be pushed to this specific channel. this word will be pushed to this specific channel.
..note:: The channel will have the name of the file created. ..note:: The channel will have the name of the file created.
@ -44,7 +44,6 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
from lib.objects.Items import Item
class Categ(AbstractModule): class Categ(AbstractModule):
@ -81,15 +80,19 @@ class Categ(AbstractModule):
self.categ_words = tmp_dict.items() self.categ_words = tmp_dict.items()
def compute(self, message, r_result=False): def compute(self, message, r_result=False):
# Create Item Object # Get obj Object
item = self.get_obj() obj = self.get_obj()
# Get item content # Get obj content
content = item.get_content() content = obj.get_content()
categ_found = [] categ_found = []
# Search for pattern categories in item content # Search for pattern categories in obj content
for categ, pattern in self.categ_words: for categ, pattern in self.categ_words:
if obj.type == 'message':
self.add_message_to_queue(message='0', queue=categ)
else:
found = set(re.findall(pattern, content)) found = set(re.findall(pattern, content))
lenfound = len(found) lenfound = len(found)
if lenfound >= self.matchingThreshold: if lenfound >= self.matchingThreshold:
@ -101,7 +104,8 @@ class Categ(AbstractModule):
self.add_message_to_queue(message=msg, queue=categ) self.add_message_to_queue(message=msg, queue=categ)
self.redis_logger.debug( self.redis_logger.debug(
f'Categ;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {lenfound} as {categ};{item.get_id()}') f'Categ;{obj.get_source()};{obj.get_date()};{obj.get_basename()};Detected {lenfound} as {categ};{obj.get_id()}')
if r_result: if r_result:
return categ_found return categ_found