fix: [crawler] fix incomplete response

This commit is contained in:
Terrtia 2023-06-18 15:09:09 +02:00
parent f8fd037bd2
commit e9539e640b
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0

View file

@ -186,7 +186,7 @@ class Crawler(AbstractModule):
parent_id = task.get_parent() parent_id = task.get_parent()
entries = self.lacus.get_capture(capture.uuid) entries = self.lacus.get_capture(capture.uuid)
print(entries['status']) print(entries.get('status'))
self.har = task.get_har() self.har = task.get_har()
self.screenshot = task.get_screenshot() self.screenshot = task.get_screenshot()
# DEBUG # DEBUG
@ -218,12 +218,12 @@ class Crawler(AbstractModule):
if 'error' in entries: if 'error' in entries:
# TODO IMPROVE ERROR MESSAGE # TODO IMPROVE ERROR MESSAGE
self.logger.warning(str(entries['error'])) self.logger.warning(str(entries['error']))
print(entries['error']) print(entries.get('error'))
if entries.get('html'): if entries.get('html'):
print('retrieved content') print('retrieved content')
# print(entries.get('html')) # print(entries.get('html'))
if 'last_redirected_url' in entries and entries['last_redirected_url']: if 'last_redirected_url' in entries and entries.get('last_redirected_url'):
last_url = entries['last_redirected_url'] last_url = entries['last_redirected_url']
unpacked_last_url = crawlers.unpack_url(last_url) unpacked_last_url = crawlers.unpack_url(last_url)
current_domain = unpacked_last_url['domain'] current_domain = unpacked_last_url['domain']
@ -238,7 +238,7 @@ class Crawler(AbstractModule):
else: else:
last_url = f'http://{self.domain.id}' last_url = f'http://{self.domain.id}'
if 'html' in entries and entries['html']: if 'html' in entries and entries.get('html'):
item_id = crawlers.create_item_id(self.items_dir, self.domain.id) item_id = crawlers.create_item_id(self.items_dir, self.domain.id)
print(item_id) print(item_id)
gzip64encoded = crawlers.get_gzipped_b64_item(item_id, entries['html']) gzip64encoded = crawlers.get_gzipped_b64_item(item_id, entries['html'])
@ -264,7 +264,7 @@ class Crawler(AbstractModule):
# SCREENSHOT # SCREENSHOT
if self.screenshot: if self.screenshot:
if 'png' in entries and entries['png']: if 'png' in entries and entries.get('png'):
screenshot = Screenshots.create_screenshot(entries['png'], b64=False) screenshot = Screenshots.create_screenshot(entries['png'], b64=False)
if screenshot: if screenshot:
if not screenshot.is_tags_safe(): if not screenshot.is_tags_safe():
@ -278,7 +278,7 @@ class Crawler(AbstractModule):
screenshot.add_correlation('domain', '', self.domain.id) screenshot.add_correlation('domain', '', self.domain.id)
# HAR # HAR
if self.har: if self.har:
if 'har' in entries and entries['har']: if 'har' in entries and entries.get('har'):
har_id = crawlers.create_har_id(self.date, item_id) har_id = crawlers.create_har_id(self.date, item_id)
crawlers.save_har(har_id, entries['har']) crawlers.save_har(har_id, entries['har'])
for cookie_name in crawlers.extract_cookies_names_from_har(entries['har']): for cookie_name in crawlers.extract_cookies_names_from_har(entries['har']):