mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-27 00:07:16 +00:00
fix: [crawler] error catcher
This commit is contained in:
parent
eea2e1714d
commit
179fba2ecc
1 changed files with 4 additions and 4 deletions
|
@ -156,7 +156,7 @@ class TorSplashCrawler():
|
||||||
self.parse,
|
self.parse,
|
||||||
errback=self.errback_catcher,
|
errback=self.errback_catcher,
|
||||||
endpoint='execute',
|
endpoint='execute',
|
||||||
meta={'father': self.original_item},
|
meta={'father': self.original_item, 'current_url': self.start_urls},
|
||||||
args=l_cookies
|
args=l_cookies
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -217,7 +217,7 @@ class TorSplashCrawler():
|
||||||
self.parse,
|
self.parse,
|
||||||
errback=self.errback_catcher,
|
errback=self.errback_catcher,
|
||||||
endpoint='execute',
|
endpoint='execute',
|
||||||
meta={'father': item_id},
|
meta={'father': item_id, 'current_url': link.url},
|
||||||
args=l_cookies
|
args=l_cookies
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -227,7 +227,7 @@ class TorSplashCrawler():
|
||||||
|
|
||||||
if failure.check(ResponseNeverReceived):
|
if failure.check(ResponseNeverReceived):
|
||||||
request = failure.request
|
request = failure.request
|
||||||
url= response.data['last_url']
|
url= request.meta['current_url']
|
||||||
father = request.meta['father']
|
father = request.meta['father']
|
||||||
|
|
||||||
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url)
|
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url)
|
||||||
|
@ -242,7 +242,7 @@ class TorSplashCrawler():
|
||||||
errback=self.errback_catcher,
|
errback=self.errback_catcher,
|
||||||
endpoint='execute',
|
endpoint='execute',
|
||||||
cache_args=['lua_source'],
|
cache_args=['lua_source'],
|
||||||
meta={'father': father},
|
meta={'father': father, 'current_url': url},
|
||||||
args=self.build_request_arg(response.cookiejar)
|
args=self.build_request_arg(response.cookiejar)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue