From 9425e01c85a856fc9c7077d800c6f44009f748d0 Mon Sep 17 00:00:00 2001 From: terrtia Date: Wed, 8 Jan 2025 15:25:41 +0100 Subject: [PATCH] fix: [crawler] debug signal timeout --- bin/lib/crawlers.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index a0d907ec..c8a4db2f 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -326,20 +326,20 @@ def extract_favicon_from_html(html, url): # # # # # # # # def extract_title_from_html(html, item_id): - signal.alarm(60) - try: - soup = BeautifulSoup(html, 'html.parser') - title = soup.title + # signal.alarm(60) + # try: + soup = BeautifulSoup(html, 'html.parser') + title = soup.title + if title: + title = title.string if title: - title = title.string - if title: - return str(title) - except TimeoutException: - signal.alarm(0) - logger_crawler.warning(f'BeautifulSoup HTML parser timeout: {item_id}') - else: - signal.alarm(0) - signal.alarm(0) + return str(title) + # except TimeoutException: + # signal.alarm(0) + # logger_crawler.warning(f'BeautifulSoup HTML parser timeout: {item_id}') + # else: + # signal.alarm(0) + # signal.alarm(0) return '' def extract_description_from_html(html):