fix: [crawler] debug signal timeout
Some checks are pending
CI / ail_test (3.10) (push) Waiting to run
CI / ail_test (3.7) (push) Waiting to run
CI / ail_test (3.8) (push) Waiting to run
CI / ail_test (3.9) (push) Waiting to run

This commit is contained in:
terrtia 2025-01-08 15:25:41 +01:00
parent 0287a1380b
commit 9425e01c85
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0

View file

@ -326,20 +326,20 @@ def extract_favicon_from_html(html, url):
# # # # # # # #
def extract_title_from_html(html, item_id):
signal.alarm(60)
try:
soup = BeautifulSoup(html, 'html.parser')
title = soup.title
# signal.alarm(60)
# try:
soup = BeautifulSoup(html, 'html.parser')
title = soup.title
if title:
title = title.string
if title:
title = title.string
if title:
return str(title)
except TimeoutException:
signal.alarm(0)
logger_crawler.warning(f'BeautifulSoup HTML parser timeout: {item_id}')
else:
signal.alarm(0)
signal.alarm(0)
return str(title)
# except TimeoutException:
# signal.alarm(0)
# logger_crawler.warning(f'BeautifulSoup HTML parser timeout: {item_id}')
# else:
# signal.alarm(0)
# signal.alarm(0)
return ''
def extract_description_from_html(html):