From f842194c57defe3f6ba8dfdf8011af9024e05325 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 17 Dec 2018 16:04:12 +0100 Subject: [PATCH] fix: [Crawler] retry when splash is not available --- bin/Crawler.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/bin/Crawler.py b/bin/Crawler.py index 99917c49..0f69cfe6 100755 --- a/bin/Crawler.py +++ b/bin/Crawler.py @@ -27,17 +27,27 @@ def crawl_onion(url, domain, date, date_month, message): if super_father is None: super_father=paste - try: - r = requests.get(splash_url , timeout=30.0) - except Exception: - # TODO: relaunch docker or send error message + retry = True + nb_retry = 0 + while retry: + try: + r = requests.get(splash_url , timeout=30.0) + retry = False + except Exception: + # TODO: relaunch docker or send error message + nb_retry += 1 - on_error_send_message_back_in_queue(type_hidden_service, domain, message) - publisher.error('{} SPASH DOWN'.format(splash_url)) - print('--------------------------------------') - print(' \033[91m DOCKER SPLASH DOWN\033[0m') - print(' {} DOWN'.format(splash_url)) - exit(1) + if nb_retry == 30: + on_error_send_message_back_in_queue(type_hidden_service, domain, message) + publisher.error('{} SPASH DOWN'.format(splash_url)) + print('--------------------------------------') + print(' \033[91m DOCKER SPLASH DOWN\033[0m') + print(' {} DOWN'.format(splash_url)) + exit(1) + + print(' \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m') + print(' Retry({}) in 10 seconds'.format(nb_retry)) + time.sleep(10) if r.status_code == 200: process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', splash_url, type_hidden_service, url, domain, paste, super_father],