fix: [crawler] debug signal timeout
Some checks are pending
CI / ail_test (3.10) (push) Waiting to run
CI / ail_test (3.7) (push) Waiting to run
CI / ail_test (3.8) (push) Waiting to run
CI / ail_test (3.9) (push) Waiting to run

This commit is contained in:
terrtia 2025-01-08 15:25:41 +01:00
parent 0287a1380b
commit 9425e01c85
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0

View file

@ -326,20 +326,20 @@ def extract_favicon_from_html(html, url):
# # # # # # # # # # # # # # # #
def extract_title_from_html(html, item_id): def extract_title_from_html(html, item_id):
signal.alarm(60) # signal.alarm(60)
try: # try:
soup = BeautifulSoup(html, 'html.parser') soup = BeautifulSoup(html, 'html.parser')
title = soup.title title = soup.title
if title:
title = title.string
if title: if title:
title = title.string return str(title)
if title: # except TimeoutException:
return str(title) # signal.alarm(0)
except TimeoutException: # logger_crawler.warning(f'BeautifulSoup HTML parser timeout: {item_id}')
signal.alarm(0) # else:
logger_crawler.warning(f'BeautifulSoup HTML parser timeout: {item_id}') # signal.alarm(0)
else: # signal.alarm(0)
signal.alarm(0)
signal.alarm(0)
return '' return ''
def extract_description_from_html(html): def extract_description_from_html(html):