mirror of
https://github.com/ail-project/ail-framework.git
synced 2025-01-18 16:36:13 +00:00
fix: [crawler] debug signal timeout
This commit is contained in:
parent
0287a1380b
commit
9425e01c85
1 changed files with 13 additions and 13 deletions
|
@ -326,20 +326,20 @@ def extract_favicon_from_html(html, url):
|
||||||
# # # # # # # #
|
# # # # # # # #
|
||||||
|
|
||||||
def extract_title_from_html(html, item_id):
|
def extract_title_from_html(html, item_id):
|
||||||
signal.alarm(60)
|
# signal.alarm(60)
|
||||||
try:
|
# try:
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
title = soup.title
|
title = soup.title
|
||||||
|
if title:
|
||||||
|
title = title.string
|
||||||
if title:
|
if title:
|
||||||
title = title.string
|
return str(title)
|
||||||
if title:
|
# except TimeoutException:
|
||||||
return str(title)
|
# signal.alarm(0)
|
||||||
except TimeoutException:
|
# logger_crawler.warning(f'BeautifulSoup HTML parser timeout: {item_id}')
|
||||||
signal.alarm(0)
|
# else:
|
||||||
logger_crawler.warning(f'BeautifulSoup HTML parser timeout: {item_id}')
|
# signal.alarm(0)
|
||||||
else:
|
# signal.alarm(0)
|
||||||
signal.alarm(0)
|
|
||||||
signal.alarm(0)
|
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def extract_description_from_html(html):
|
def extract_description_from_html(html):
|
||||||
|
|
Loading…
Add table
Reference in a new issue