fix: [Crawler] retry when splash is not available

This commit is contained in:
Terrtia 2018-12-17 16:04:12 +01:00
parent 4e08aaa80f
commit f842194c57
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0

View file

@ -27,11 +27,17 @@ def crawl_onion(url, domain, date, date_month, message):
if super_father is None: if super_father is None:
super_father=paste super_father=paste
retry = True
nb_retry = 0
while retry:
try: try:
r = requests.get(splash_url , timeout=30.0) r = requests.get(splash_url , timeout=30.0)
retry = False
except Exception: except Exception:
# TODO: relaunch docker or send error message # TODO: relaunch docker or send error message
nb_retry += 1
if nb_retry == 30:
on_error_send_message_back_in_queue(type_hidden_service, domain, message) on_error_send_message_back_in_queue(type_hidden_service, domain, message)
publisher.error('{} SPASH DOWN'.format(splash_url)) publisher.error('{} SPASH DOWN'.format(splash_url))
print('--------------------------------------') print('--------------------------------------')
@ -39,6 +45,10 @@ def crawl_onion(url, domain, date, date_month, message):
print(' {} DOWN'.format(splash_url)) print(' {} DOWN'.format(splash_url))
exit(1) exit(1)
print(' \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m')
print(' Retry({}) in 10 seconds'.format(nb_retry))
time.sleep(10)
if r.status_code == 200: if r.status_code == 200:
process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', splash_url, type_hidden_service, url, domain, paste, super_father], process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', splash_url, type_hidden_service, url, domain, paste, super_father],
stdout=subprocess.PIPE) stdout=subprocess.PIPE)