mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
Stop regexp processing after timeput (60sec)
Minor pep8 fixes
This commit is contained in:
parent
a414a84044
commit
6d199f0150
1 changed files with 36 additions and 16 deletions
|
@ -9,35 +9,45 @@ supplied in the term webpage.
|
||||||
import redis
|
import redis
|
||||||
import time
|
import time
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import lib_words
|
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
import os
|
|
||||||
from os import environ
|
|
||||||
import datetime
|
|
||||||
import calendar
|
import calendar
|
||||||
import re
|
import re
|
||||||
|
import signal
|
||||||
|
import time
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
|
|
||||||
# Email notifications
|
# Email notifications
|
||||||
from NotificationHelper import *
|
from NotificationHelper import *
|
||||||
|
|
||||||
|
|
||||||
|
class TimeoutException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def timeout_handler(signum, frame):
|
||||||
|
raise TimeoutException
|
||||||
|
|
||||||
|
signal.signal(signal.SIGALRM, timeout_handler)
|
||||||
|
|
||||||
|
|
||||||
# Config Variables
|
# Config Variables
|
||||||
DICO_REFRESH_TIME = 60 #s
|
DICO_REFRESH_TIME = 60 # s
|
||||||
|
PROCESS_TIMEOUT = 60
|
||||||
|
|
||||||
BlackListTermsSet_Name = "BlackListSetTermSet"
|
BlackListTermsSet_Name = "BlackListSetTermSet"
|
||||||
TrackedTermsSet_Name = "TrackedSetTermSet"
|
TrackedTermsSet_Name = "TrackedSetTermSet"
|
||||||
TrackedRegexSet_Name = "TrackedRegexSet"
|
TrackedRegexSet_Name = "TrackedRegexSet"
|
||||||
|
|
||||||
top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set
|
top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set
|
||||||
oneDay = 60*60*24
|
oneDay = 60*60*24
|
||||||
top_termFreq_setName_day = ["TopTermFreq_set_day_", 1]
|
top_termFreq_setName_day = ["TopTermFreq_set_day_", 1]
|
||||||
top_termFreq_setName_week = ["TopTermFreq_set_week", 7]
|
top_termFreq_setName_week = ["TopTermFreq_set_week", 7]
|
||||||
top_termFreq_setName_month = ["TopTermFreq_set_month", 31]
|
top_termFreq_setName_month = ["TopTermFreq_set_month", 31]
|
||||||
top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month]
|
top_termFreq_set_array = [top_termFreq_setName_day, top_termFreq_setName_week, top_termFreq_setName_month]
|
||||||
|
|
||||||
# create direct link in mail
|
# create direct link in mail
|
||||||
full_paste_url = "/showsavedpaste/?paste="
|
full_paste_url = "/showsavedpaste/?paste="
|
||||||
|
|
||||||
|
|
||||||
def refresh_dicos():
|
def refresh_dicos():
|
||||||
dico_regex = {}
|
dico_regex = {}
|
||||||
dico_regexname_to_redis = {}
|
dico_regexname_to_redis = {}
|
||||||
|
@ -67,7 +77,7 @@ if __name__ == "__main__":
|
||||||
# create direct link in mail
|
# create direct link in mail
|
||||||
full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url
|
full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url
|
||||||
|
|
||||||
#compile the regex
|
# compile the regex
|
||||||
dico_refresh_cooldown = time.time()
|
dico_refresh_cooldown = time.time()
|
||||||
dico_regex, dico_regexname_to_redis = refresh_dicos()
|
dico_regex, dico_regexname_to_redis = refresh_dicos()
|
||||||
|
|
||||||
|
@ -89,11 +99,21 @@ if __name__ == "__main__":
|
||||||
curr_set = top_termFreq_setName_day[0] + str(timestamp)
|
curr_set = top_termFreq_setName_day[0] + str(timestamp)
|
||||||
content = Paste.Paste(filename).get_p_content()
|
content = Paste.Paste(filename).get_p_content()
|
||||||
|
|
||||||
#iterate the word with the regex
|
# iterate the word with the regex
|
||||||
for regex_str, compiled_regex in dico_regex.items():
|
for regex_str, compiled_regex in dico_regex.items():
|
||||||
matched = compiled_regex.search(content)
|
|
||||||
|
|
||||||
if matched is not None: #there is a match
|
signal.alarm(PROCESS_TIMEOUT)
|
||||||
|
try:
|
||||||
|
matched = compiled_regex.search(content)
|
||||||
|
except TimeoutException:
|
||||||
|
log_msg = "{0} processing timeout".format(filename)
|
||||||
|
print (log_msg)
|
||||||
|
publisher.critical(log_msg)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
signal.alarm(0)
|
||||||
|
|
||||||
|
if matched is not None: # there is a match
|
||||||
print('regex matched {}'.format(regex_str))
|
print('regex matched {}'.format(regex_str))
|
||||||
matched = matched.group(0)
|
matched = matched.group(0)
|
||||||
regex_str_complete = "/" + regex_str + "/"
|
regex_str_complete = "/" + regex_str + "/"
|
||||||
|
@ -104,8 +124,8 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
# create mail body
|
# create mail body
|
||||||
mail_body = ("AIL Framework,\n"
|
mail_body = ("AIL Framework,\n"
|
||||||
"New occurrence for regex: " + regex_str + "\n"
|
"New occurrence for regex: " + regex_str + "\n"
|
||||||
''+full_paste_url + filename)
|
''+full_paste_url + filename)
|
||||||
|
|
||||||
# Send to every associated email adress
|
# Send to every associated email adress
|
||||||
for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + regex_str_complete):
|
for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + regex_str_complete):
|
||||||
|
@ -115,9 +135,9 @@ if __name__ == "__main__":
|
||||||
new_to_the_set = server_term.sadd(set_name, filename)
|
new_to_the_set = server_term.sadd(set_name, filename)
|
||||||
new_to_the_set = True if new_to_the_set == 1 else False
|
new_to_the_set = True if new_to_the_set == 1 else False
|
||||||
|
|
||||||
#consider the num of occurence of this term
|
# consider the num of occurence of this term
|
||||||
regex_value = int(server_term.hincrby(timestamp, dico_regexname_to_redis[regex_str], int(1)))
|
regex_value = int(server_term.hincrby(timestamp, dico_regexname_to_redis[regex_str], int(1)))
|
||||||
#1 term per paste
|
# 1 term per paste
|
||||||
if new_to_the_set:
|
if new_to_the_set:
|
||||||
regex_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), dico_regexname_to_redis[regex_str], int(1)))
|
regex_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), dico_regexname_to_redis[regex_str], int(1)))
|
||||||
server_term.zincrby("per_paste_" + curr_set, dico_regexname_to_redis[regex_str], float(1))
|
server_term.zincrby("per_paste_" + curr_set, dico_regexname_to_redis[regex_str], float(1))
|
||||||
|
|
Loading…
Reference in a new issue