Merge pull request #266 from kovacsbalu/fix-regexp-timeout

Stop regexp processing after timeout (60sec)
This commit is contained in:
Sami Mokaddem 2018-10-09 14:48:08 +02:00 committed by GitHub
commit d9bc2d6f44
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 38 additions and 17 deletions

View file

@ -9,18 +9,25 @@ supplied in the term webpage.
import redis import redis
import time import time
from pubsublogger import publisher from pubsublogger import publisher
from packages import lib_words
from packages import Paste from packages import Paste
import os
from os import environ
import datetime
import calendar import calendar
import re import re
import signal
import time
from Helper import Process from Helper import Process
# Email notifications # Email notifications
from NotificationHelper import * from NotificationHelper import *
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
# Config Variables # Config Variables
DICO_REFRESH_TIME = 60 # s DICO_REFRESH_TIME = 60 # s
@ -38,6 +45,7 @@ top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, to
# create direct link in mail # create direct link in mail
full_paste_url = "/showsavedpaste/?paste=" full_paste_url = "/showsavedpaste/?paste="
def refresh_dicos(): def refresh_dicos():
dico_regex = {} dico_regex = {}
dico_regexname_to_redis = {} dico_regexname_to_redis = {}
@ -53,6 +61,7 @@ if __name__ == "__main__":
config_section = 'RegexForTermsFrequency' config_section = 'RegexForTermsFrequency'
p = Process(config_section) p = Process(config_section)
max_execution_time = p.config.getint(config_section, "max_execution_time")
# REDIS # # REDIS #
server_term = redis.StrictRedis( server_term = redis.StrictRedis(
@ -87,11 +96,20 @@ if __name__ == "__main__":
timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0))
curr_set = top_termFreq_setName_day[0] + str(timestamp) curr_set = top_termFreq_setName_day[0] + str(timestamp)
content = Paste.Paste(filename).get_p_content() paste = Paste.Paste(filename)
content = paste.get_p_content()
# iterate the word with the regex # iterate the word with the regex
for regex_str, compiled_regex in dico_regex.items(): for regex_str, compiled_regex in dico_regex.items():
signal.alarm(max_execution_time)
try:
matched = compiled_regex.search(content) matched = compiled_regex.search(content)
except TimeoutException:
print ("{0} processing timeout".format(paste.p_path))
continue
else:
signal.alarm(0)
if matched is not None: # there is a match if matched is not None: # there is a match
print('regex matched {}'.format(regex_str)) print('regex matched {}'.format(regex_str))

View file

@ -99,6 +99,9 @@ operation_mode = 3
#Define the time that a paste will be considerate duplicate. in seconds (1day = 86400) #Define the time that a paste will be considerate duplicate. in seconds (1day = 86400)
ttl_duplicate = 86400 ttl_duplicate = 86400
[RegexForTermsFrequency]
max_execution_time = 60
##### Redis ##### ##### Redis #####
[Redis_Cache] [Redis_Cache]
host = localhost host = localhost