From 672bb02bbf0d51b564d2bc0d956e3cc4e23d8c22 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 6 Apr 2020 10:52:44 +0200 Subject: [PATCH] fix: [Crawler splash ResponseNeverReceived] add retry --- bin/torcrawler/TorSplashCrawler.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index f1ed527d..18bb8be1 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -172,7 +172,26 @@ class TorSplashCrawler(): # LUA ERROR # # TODO: print/display errors elif 'error' in response.data: if(response.data['error'] == 'network99'): - print('Connection to proxy refused') + ## splash restart ## + error_retry = request.meta.get('error_retry', 0) + if error_retry < 3: + error_retry += 1 + url= request.meta['current_url'] + father = request.meta['father'] + + self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url) + time.sleep(10) + yield SplashRequest( + url, + self.parse, + errback=self.errback_catcher, + endpoint='execute', + cache_args=['lua_source'], + meta={'father': father, 'current_url': url, 'error_retry' = error_retry}, + args=self.build_request_arg(response.cookiejar) + ) + else: + print('Connection to proxy refused') else: print(response.data['error'])