fix: [helper dump_crawler] fix files not found

This commit is contained in:
Terrtia 2019-06-26 11:51:26 +02:00
parent 06ab66ff57
commit bd0da210e7
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0

View file

@ -42,6 +42,8 @@ date_range = substract_date(date_from, date_to)
dir_path = os.path.join(os.environ['AIL_HOME'], 'temp') dir_path = os.path.join(os.environ['AIL_HOME'], 'temp')
domain_skipped = []
for date in date_range: for date in date_range:
domains_up = list(r_serv_onion.smembers('{}_up:{}'.format(service_type, date))) domains_up = list(r_serv_onion.smembers('{}_up:{}'.format(service_type, date)))
if domains_up: if domains_up:
@ -56,8 +58,19 @@ for date in date_range:
item_core = h.get_domain_crawled_core_item() item_core = h.get_domain_crawled_core_item()
if 'root_item' in item_core: if 'root_item' in item_core:
l_pastes = h.get_last_crawled_pastes(item_root=item_core['root_item']) l_pastes = h.get_last_crawled_pastes(item_root=item_core['root_item'])
res = h.create_domain_basic_archive(l_pastes) try:
filename = os.path.join(save_path, '{}'.format(domain)) res = h.create_domain_basic_archive(l_pastes)
with open(filename, 'wb') as f: filename = os.path.join(save_path, '{}'.format(domain))
shutil.copyfileobj(res, f) with open(filename, 'wb') as f:
print('done') shutil.copyfileobj(res, f)
print('done')
except Exception as e:
print('skipped')
domain_skipped.append(domain)
pass
print()
print()
print('DOMAINS SKIPPED: ')
for domain in domain_skipped:
print(domain)