Merge pull request #266 from kovacsbalu/fix-regexp-timeout

Stop regexp processing after timeout (60sec)
This commit is contained in:
Sami Mokaddem 2018-10-09 14:48:08 +02:00 committed by GitHub
commit d9bc2d6f44
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 38 additions and 17 deletions

View file

@ -9,18 +9,25 @@ supplied in the term webpage.
import redis
import time
from pubsublogger import publisher
from packages import lib_words
from packages import Paste
import os
from os import environ
import datetime
import calendar
import re
import signal
import time
from Helper import Process
# Email notifications
from NotificationHelper import *
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
# Config Variables
DICO_REFRESH_TIME = 60 # s
@ -38,6 +45,7 @@ top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, to
# create direct link in mail
full_paste_url = "/showsavedpaste/?paste="
def refresh_dicos():
dico_regex = {}
dico_regexname_to_redis = {}
@ -53,6 +61,7 @@ if __name__ == "__main__":
config_section = 'RegexForTermsFrequency'
p = Process(config_section)
max_execution_time = p.config.getint(config_section, "max_execution_time")
# REDIS #
server_term = redis.StrictRedis(
@ -87,11 +96,20 @@ if __name__ == "__main__":
timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0))
curr_set = top_termFreq_setName_day[0] + str(timestamp)
content = Paste.Paste(filename).get_p_content()
paste = Paste.Paste(filename)
content = paste.get_p_content()
# iterate the word with the regex
for regex_str, compiled_regex in dico_regex.items():
signal.alarm(max_execution_time)
try:
matched = compiled_regex.search(content)
except TimeoutException:
print ("{0} processing timeout".format(paste.p_path))
continue
else:
signal.alarm(0)
if matched is not None: # there is a match
print('regex matched {}'.format(regex_str))

View file

@ -99,6 +99,9 @@ operation_mode = 3
#Define the time that a paste will be considerate duplicate. in seconds (1day = 86400)
ttl_duplicate = 86400
[RegexForTermsFrequency]
max_execution_time = 60
##### Redis #####
[Redis_Cache]
host = localhost