fix: [Mails] regex timeout

This commit is contained in:
Terrtia 2020-05-11 11:33:07 +02:00
parent 0dfddd318b
commit 27554d8863
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0

View file

@ -54,6 +54,9 @@ dns_server = config_loader.get_config_str('Mail', 'dns')
config_loader = None config_loader = None
## -- ## ## -- ##
def extract_all_email(email_regex, item_content):
return re.findall(email_regex, item_content)
def is_mxdomain_in_cache(mxdomain): def is_mxdomain_in_cache(mxdomain):
return r_serv_cache.exists('mxdomain:{}'.format(mxdomain)) return r_serv_cache.exists('mxdomain:{}'.format(mxdomain))
@ -148,15 +151,14 @@ if __name__ == "__main__":
# Get all emails address # Get all emails address
signal.alarm(max_execution_time) signal.alarm(max_execution_time)
try: try:
all_emails = re.findall(email_regex, item_content) all_emails = extract_all_email(email_regex, item_content)
except TimeoutException: except TimeoutException:
p.incr_module_timeout_statistic() p.incr_module_timeout_statistic()
err_mess = "Mails: processing timeout: {}".format(item_id) err_mess = "Mails: processing timeout: {}".format(item_id)
print(err_mess) print(err_mess)
publisher.info(err_mess) publisher.info(err_mess)
continue continue
else: signal.alarm(0)
signal.alarm(0)
# filtering duplicate # filtering duplicate
all_emails = set(all_emails) all_emails = set(all_emails)