mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-30 09:47:17 +00:00
fix: [crawler] cleanup
This commit is contained in:
parent
6cfd3fe36d
commit
db634e8866
2 changed files with 1 additions and 14 deletions
|
@ -138,7 +138,3 @@ def save_har(har_dir, item_id, har_content):
|
||||||
filename = os.path.join(har_dir, item_id + '.json')
|
filename = os.path.join(har_dir, item_id + '.json')
|
||||||
with open(filename, 'w') as f:
|
with open(filename, 'w') as f:
|
||||||
f.write(json.dumps(har_content))
|
f.write(json.dumps(har_content))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
all_cookies = load_cookies(get_cookies(), '3thxemke2x7hcibu.onion', crawler_type='onion')
|
|
||||||
print(json.dumps(all_cookies))
|
|
||||||
|
|
|
@ -177,9 +177,6 @@ class TorSplashCrawler():
|
||||||
error_log = (json.loads(response.body.decode()))
|
error_log = (json.loads(response.body.decode()))
|
||||||
print(error_log)
|
print(error_log)
|
||||||
else:
|
else:
|
||||||
# DEBUG:
|
|
||||||
# print('----')
|
|
||||||
# print(response.data.keys())
|
|
||||||
|
|
||||||
item_id = crawler_splash.create_item_id(self.item_dir, self.domains[0])
|
item_id = crawler_splash.create_item_id(self.item_dir, self.domains[0])
|
||||||
self.save_crawled_item(item_id, response.data['html'])
|
self.save_crawled_item(item_id, response.data['html'])
|
||||||
|
@ -190,14 +187,8 @@ class TorSplashCrawler():
|
||||||
crawler_splash.add_domain_root_item(item_id, self.domain_type, self.domains[0], self.date_epoch, self.port)
|
crawler_splash.add_domain_root_item(item_id, self.domain_type, self.domains[0], self.date_epoch, self.port)
|
||||||
crawler_splash.create_domain_metadata(self.domain_type, self.domains[0], self.port, self.full_date, self.date_month)
|
crawler_splash.create_domain_metadata(self.domain_type, self.domains[0], self.port, self.full_date, self.date_month)
|
||||||
|
|
||||||
#print(response.data['cookies'])
|
|
||||||
if 'cookies' in response.data:
|
if 'cookies' in response.data:
|
||||||
all_cookies = response.data['cookies']
|
all_cookies = response.data['cookies']
|
||||||
# for cookie in all_cookies:
|
|
||||||
# print('------------------------')
|
|
||||||
# print(cookie['name'])
|
|
||||||
# print(cookie['value'])
|
|
||||||
# print(cookie)
|
|
||||||
else:
|
else:
|
||||||
all_cookies = []
|
all_cookies = []
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue