2019-08-06 15:03:49 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
"""
|
2021-06-02 14:04:52 +00:00
|
|
|
The Tracker_Term Module
|
2019-08-06 15:03:49 +00:00
|
|
|
===================
|
|
|
|
|
|
|
|
"""
|
2021-04-02 07:52:05 +00:00
|
|
|
|
|
|
|
##################################
|
|
|
|
# Import External packages
|
|
|
|
##################################
|
2019-08-06 15:03:49 +00:00
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import time
|
2019-08-09 12:20:13 +00:00
|
|
|
import signal
|
2021-09-28 19:11:20 +00:00
|
|
|
|
2019-08-06 15:03:49 +00:00
|
|
|
|
2021-06-02 14:04:52 +00:00
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
2021-04-02 07:52:05 +00:00
|
|
|
##################################
|
|
|
|
# Import Project packages
|
|
|
|
##################################
|
2021-06-02 14:04:52 +00:00
|
|
|
from modules.abstract_module import AbstractModule
|
2023-04-13 12:25:02 +00:00
|
|
|
from lib.ConfigLoader import ConfigLoader
|
2022-10-25 14:25:19 +00:00
|
|
|
from lib.objects.Items import Item
|
2019-08-06 15:03:49 +00:00
|
|
|
from packages import Term
|
2020-07-10 13:54:14 +00:00
|
|
|
from lib import Tracker
|
|
|
|
|
2023-03-30 12:58:55 +00:00
|
|
|
from exporter.MailExporter import MailExporterTracker
|
|
|
|
from exporter.WebHookExporter import WebHookExporterTracker
|
|
|
|
|
2019-08-09 12:20:13 +00:00
|
|
|
class TimeoutException(Exception):
|
|
|
|
pass
|
2021-09-28 19:11:20 +00:00
|
|
|
|
|
|
|
|
2019-08-09 12:20:13 +00:00
|
|
|
def timeout_handler(signum, frame):
|
|
|
|
raise TimeoutException
|
2021-09-28 19:11:20 +00:00
|
|
|
|
|
|
|
|
2019-08-09 12:20:13 +00:00
|
|
|
signal.signal(signal.SIGALRM, timeout_handler)
|
|
|
|
|
2019-08-07 10:08:24 +00:00
|
|
|
|
2021-06-02 14:04:52 +00:00
|
|
|
class Tracker_Term(AbstractModule):
|
2021-04-02 07:52:05 +00:00
|
|
|
"""
|
2021-06-02 14:04:52 +00:00
|
|
|
Tracker_Term module for AIL framework
|
2021-04-02 07:52:05 +00:00
|
|
|
"""
|
2021-09-28 19:11:20 +00:00
|
|
|
|
2023-04-13 12:42:57 +00:00
|
|
|
def __init__(self, queue=True):
|
|
|
|
super(Tracker_Term, self).__init__(queue=queue)
|
2019-08-06 15:03:49 +00:00
|
|
|
|
2023-04-13 12:25:02 +00:00
|
|
|
config_loader = ConfigLoader()
|
|
|
|
|
2021-04-02 07:52:05 +00:00
|
|
|
self.pending_seconds = 5
|
2019-08-07 10:08:24 +00:00
|
|
|
|
2023-04-13 12:25:02 +00:00
|
|
|
self.max_execution_time = config_loader.get_config_int('Tracker_Term', "max_execution_time")
|
2019-08-07 10:08:24 +00:00
|
|
|
|
2021-04-02 07:52:05 +00:00
|
|
|
# loads tracked words
|
|
|
|
self.list_tracked_words = Term.get_tracked_words_list()
|
|
|
|
self.last_refresh_word = time.time()
|
|
|
|
self.set_tracked_words_list = Term.get_set_tracked_words_list()
|
|
|
|
self.last_refresh_set = time.time()
|
2019-08-07 10:08:24 +00:00
|
|
|
|
2023-03-30 12:58:55 +00:00
|
|
|
# Exporter
|
|
|
|
self.exporters = {'mail': MailExporterTracker(),
|
|
|
|
'webhook': WebHookExporterTracker()}
|
|
|
|
|
2021-06-02 14:04:52 +00:00
|
|
|
self.redis_logger.info(f"Module: {self.module_name} Launched")
|
2019-08-07 10:08:24 +00:00
|
|
|
|
2022-09-14 09:41:24 +00:00
|
|
|
def compute(self, item_id, item_content=None):
|
2021-04-02 14:54:45 +00:00
|
|
|
# refresh Tracked term
|
|
|
|
if self.last_refresh_word < Term.get_tracked_term_last_updated_by_type('word'):
|
|
|
|
self.list_tracked_words = Term.get_tracked_words_list()
|
|
|
|
self.last_refresh_word = time.time()
|
|
|
|
self.redis_logger.debug('Tracked word refreshed')
|
2021-06-02 14:53:17 +00:00
|
|
|
print('Tracked word refreshed')
|
2021-04-02 14:54:45 +00:00
|
|
|
|
|
|
|
if self.last_refresh_set < Term.get_tracked_term_last_updated_by_type('set'):
|
|
|
|
self.set_tracked_words_list = Term.get_set_tracked_words_list()
|
|
|
|
self.last_refresh_set = time.time()
|
|
|
|
self.redis_logger.debug('Tracked set refreshed')
|
2021-06-02 14:53:17 +00:00
|
|
|
print('Tracked set refreshed')
|
2021-04-02 14:54:45 +00:00
|
|
|
|
2021-04-02 07:52:05 +00:00
|
|
|
# Cast message as Item
|
2021-06-02 14:04:52 +00:00
|
|
|
item = Item(item_id)
|
2022-09-14 09:41:24 +00:00
|
|
|
if not item_content:
|
|
|
|
item_content = item.get_content()
|
2019-08-09 12:20:13 +00:00
|
|
|
|
2021-04-02 07:52:05 +00:00
|
|
|
signal.alarm(self.max_execution_time)
|
2019-08-07 10:08:24 +00:00
|
|
|
|
2021-04-02 07:52:05 +00:00
|
|
|
dict_words_freq = None
|
|
|
|
try:
|
|
|
|
dict_words_freq = Term.get_text_word_frequency(item_content)
|
|
|
|
except TimeoutException:
|
2021-06-02 14:04:52 +00:00
|
|
|
self.redis_logger.warning(f"{item.get_id()} processing timeout")
|
2021-04-02 07:52:05 +00:00
|
|
|
else:
|
|
|
|
signal.alarm(0)
|
2019-08-07 10:08:24 +00:00
|
|
|
|
2021-04-02 07:52:05 +00:00
|
|
|
if dict_words_freq:
|
2019-08-09 12:20:13 +00:00
|
|
|
# create token statistics
|
2021-09-28 19:11:20 +00:00
|
|
|
# for word in dict_words_freq:
|
2019-09-24 13:33:53 +00:00
|
|
|
# Term.create_token_statistics(item_date, word, dict_words_freq[word])
|
2019-08-07 10:08:24 +00:00
|
|
|
|
|
|
|
# check solo words
|
2022-10-25 14:25:19 +00:00
|
|
|
# ###### # TODO: check if source needed #######
|
2021-04-02 07:52:05 +00:00
|
|
|
for word in self.list_tracked_words:
|
2019-08-07 10:08:24 +00:00
|
|
|
if word in dict_words_freq:
|
2022-02-16 09:19:53 +00:00
|
|
|
self.new_term_found(word, 'word', item)
|
2021-06-14 15:36:30 +00:00
|
|
|
|
|
|
|
# check words set
|
|
|
|
for elem in self.set_tracked_words_list:
|
|
|
|
list_words = elem[0]
|
|
|
|
nb_words_threshold = elem[1]
|
|
|
|
word_set = elem[2]
|
|
|
|
nb_uniq_word = 0
|
|
|
|
|
|
|
|
for word in list_words:
|
|
|
|
if word in dict_words_freq:
|
|
|
|
nb_uniq_word += 1
|
|
|
|
if nb_uniq_word >= nb_words_threshold:
|
2022-02-16 09:19:53 +00:00
|
|
|
self.new_term_found(word_set, 'set', item)
|
2021-06-14 15:36:30 +00:00
|
|
|
|
2023-03-30 12:58:55 +00:00
|
|
|
def new_term_found(self, tracker_name, tracker_type, item):
|
|
|
|
uuid_list = Tracker.get_tracker_uuid_list(tracker_name, tracker_type)
|
2022-02-16 09:19:53 +00:00
|
|
|
|
|
|
|
item_id = item.get_id()
|
|
|
|
item_source = item.get_source()
|
2023-03-30 12:58:55 +00:00
|
|
|
|
|
|
|
for tracker_uuid in uuid_list:
|
|
|
|
tracker = Tracker.Tracker(tracker_uuid)
|
|
|
|
|
|
|
|
# Source Filtering
|
|
|
|
tracker_sources = tracker.get_sources()
|
|
|
|
if tracker_sources and item_source not in tracker_sources:
|
|
|
|
continue
|
|
|
|
|
|
|
|
print(f'new tracked term found: {tracker_name} in {item_id}')
|
|
|
|
self.redis_logger.warning(f'new tracked term found: {tracker_name} in {item_id}')
|
|
|
|
# TODO
|
|
|
|
Tracker.add_tracked_item(tracker_uuid, item_id)
|
|
|
|
|
|
|
|
# Tags
|
|
|
|
for tag in tracker.get_tags():
|
|
|
|
msg = f'{tag};{item_id}'
|
2023-04-13 12:25:02 +00:00
|
|
|
self.add_message_to_queue(msg, 'Tags')
|
2023-03-30 12:58:55 +00:00
|
|
|
|
|
|
|
# Mail
|
|
|
|
if tracker.mail_export():
|
|
|
|
# TODO add matches + custom subjects
|
|
|
|
self.exporters['mail'].export(tracker, item)
|
|
|
|
|
|
|
|
# Webhook
|
|
|
|
if tracker.webhook_export():
|
|
|
|
self.exporters['webhook'].export(tracker, item)
|
2021-10-04 10:55:40 +00:00
|
|
|
|
2021-04-02 07:52:05 +00:00
|
|
|
|
2021-09-28 19:11:20 +00:00
|
|
|
if __name__ == '__main__':
|
2021-06-02 14:04:52 +00:00
|
|
|
module = Tracker_Term()
|
2021-04-02 07:52:05 +00:00
|
|
|
module.run()
|