ail-framework/bin/trackers/Tracker_Term.py

169 lines
6.1 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The Tracker_Term Module
===================
"""
2021-04-02 07:52:05 +00:00
##################################
# Import External packages
##################################
import os
import sys
import time
import signal
import requests
sys.path.append(os.environ['AIL_BIN'])
2021-04-02 07:52:05 +00:00
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
import NotificationHelper
from packages.Item import Item
from packages import Term
from lib import Tracker
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
class Tracker_Term(AbstractModule):
mail_body_template = "AIL Framework,\nNew occurrence for tracked term: {}\nitem id: {}\nurl: {}{}"
2021-04-02 07:52:05 +00:00
"""
Tracker_Term module for AIL framework
2021-04-02 07:52:05 +00:00
"""
2021-04-02 07:52:05 +00:00
def __init__(self):
super(Tracker_Term, self).__init__()
2021-04-02 07:52:05 +00:00
self.pending_seconds = 5
self.max_execution_time = self.process.config.getint('Tracker_Term', "max_execution_time")
2021-04-02 07:52:05 +00:00
self.full_item_url = self.process.config.get("Notifications", "ail_domain") + "/object/item?id="
2021-04-02 07:52:05 +00:00
# loads tracked words
self.list_tracked_words = Term.get_tracked_words_list()
self.last_refresh_word = time.time()
self.set_tracked_words_list = Term.get_set_tracked_words_list()
self.last_refresh_set = time.time()
self.redis_logger.info(f"Module: {self.module_name} Launched")
2021-04-02 07:52:05 +00:00
def compute(self, item_id):
# refresh Tracked term
if self.last_refresh_word < Term.get_tracked_term_last_updated_by_type('word'):
self.list_tracked_words = Term.get_tracked_words_list()
self.last_refresh_word = time.time()
self.redis_logger.debug('Tracked word refreshed')
print('Tracked word refreshed')
if self.last_refresh_set < Term.get_tracked_term_last_updated_by_type('set'):
self.set_tracked_words_list = Term.get_set_tracked_words_list()
self.last_refresh_set = time.time()
self.redis_logger.debug('Tracked set refreshed')
print('Tracked set refreshed')
2021-04-02 07:52:05 +00:00
# Cast message as Item
item = Item(item_id)
item_date = item.get_date()
item_content = item.get_content()
2021-04-02 07:52:05 +00:00
signal.alarm(self.max_execution_time)
2021-04-02 07:52:05 +00:00
dict_words_freq = None
try:
dict_words_freq = Term.get_text_word_frequency(item_content)
except TimeoutException:
self.redis_logger.warning(f"{item.get_id()} processing timeout")
2021-04-02 07:52:05 +00:00
else:
signal.alarm(0)
2021-04-02 07:52:05 +00:00
if dict_words_freq:
# create token statistics
# for word in dict_words_freq:
# Term.create_token_statistics(item_date, word, dict_words_freq[word])
# check solo words
####### # TODO: check if source needed #######
2021-04-02 07:52:05 +00:00
for word in self.list_tracked_words:
if word in dict_words_freq:
2022-02-16 09:19:53 +00:00
self.new_term_found(word, 'word', item)
# check words set
for elem in self.set_tracked_words_list:
list_words = elem[0]
nb_words_threshold = elem[1]
word_set = elem[2]
nb_uniq_word = 0
for word in list_words:
if word in dict_words_freq:
nb_uniq_word += 1
if nb_uniq_word >= nb_words_threshold:
2022-02-16 09:19:53 +00:00
self.new_term_found(word_set, 'set', item)
2022-09-09 09:25:51 +00:00
2022-02-16 09:19:53 +00:00
def new_term_found(self, term, term_type, item):
2021-04-02 07:52:05 +00:00
uuid_list = Term.get_term_uuid_list(term, term_type)
2022-02-16 09:19:53 +00:00
item_id = item.get_id()
item_date = item.get_date()
item_source = item.get_source()
2022-09-09 09:25:51 +00:00
self.redis_logger.warning(f'new tracked term found: {term} in {item_id}')
print(f'new tracked term found: {term} in {item_id}')
2021-04-02 07:52:05 +00:00
for term_uuid in uuid_list:
tracker_sources = Tracker.get_tracker_uuid_sources(term_uuid)
if not tracker_sources or item_source in tracker_sources:
2021-07-14 11:58:00 +00:00
Tracker.add_tracked_item(term_uuid, item_id)
tags_to_add = Term.get_term_tags(term_uuid)
for tag in tags_to_add:
msg = '{};{}'.format(tag, item_id)
self.send_message_to_queue(msg, 'Tags')
mail_to_notify = Term.get_term_mails(term_uuid)
if mail_to_notify:
mail_subject = Tracker.get_email_subject(term_uuid)
mail_body = Tracker_Term.mail_body_template.format(term, item_id, self.full_item_url, item_id)
for mail in mail_to_notify:
self.redis_logger.debug(f'Send Mail {mail_subject}')
print(f'S print(item_content)end Mail {mail_subject}')
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
2021-04-02 07:52:05 +00:00
# Webhook
webhook_to_post = Term.get_term_webhook(term_uuid)
if webhook_to_post:
json_request = {"trackerId": term_uuid,
"itemId": item_id,
"itemURL": self.full_item_url + item_id,
"term": term,
"itemSource": item_source,
"itemDate": item_date,
"tags": tags_to_add,
"emailNotification": f'{mail_to_notify}',
"trackerType": term_type
}
try:
response = requests.post(webhook_to_post, json=json_request)
if response.status_code >= 400:
self.redis_logger.error(f"Webhook request failed for {webhook_to_post}\nReason: {response.reason}")
except:
self.redis_logger.error(f"Webhook request failed for {webhook_to_post}\nReason: Something went wrong")
2021-04-02 07:52:05 +00:00
if __name__ == '__main__':
module = Tracker_Term()
2021-04-02 07:52:05 +00:00
module.run()