mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
fix: [helper dump_crawler] fix files not found
This commit is contained in:
parent
06ab66ff57
commit
bd0da210e7
1 changed files with 18 additions and 5 deletions
|
@ -42,6 +42,8 @@ date_range = substract_date(date_from, date_to)
|
||||||
|
|
||||||
dir_path = os.path.join(os.environ['AIL_HOME'], 'temp')
|
dir_path = os.path.join(os.environ['AIL_HOME'], 'temp')
|
||||||
|
|
||||||
|
domain_skipped = []
|
||||||
|
|
||||||
for date in date_range:
|
for date in date_range:
|
||||||
domains_up = list(r_serv_onion.smembers('{}_up:{}'.format(service_type, date)))
|
domains_up = list(r_serv_onion.smembers('{}_up:{}'.format(service_type, date)))
|
||||||
if domains_up:
|
if domains_up:
|
||||||
|
@ -56,8 +58,19 @@ for date in date_range:
|
||||||
item_core = h.get_domain_crawled_core_item()
|
item_core = h.get_domain_crawled_core_item()
|
||||||
if 'root_item' in item_core:
|
if 'root_item' in item_core:
|
||||||
l_pastes = h.get_last_crawled_pastes(item_root=item_core['root_item'])
|
l_pastes = h.get_last_crawled_pastes(item_root=item_core['root_item'])
|
||||||
res = h.create_domain_basic_archive(l_pastes)
|
try:
|
||||||
filename = os.path.join(save_path, '{}'.format(domain))
|
res = h.create_domain_basic_archive(l_pastes)
|
||||||
with open(filename, 'wb') as f:
|
filename = os.path.join(save_path, '{}'.format(domain))
|
||||||
shutil.copyfileobj(res, f)
|
with open(filename, 'wb') as f:
|
||||||
print('done')
|
shutil.copyfileobj(res, f)
|
||||||
|
print('done')
|
||||||
|
except Exception as e:
|
||||||
|
print('skipped')
|
||||||
|
domain_skipped.append(domain)
|
||||||
|
pass
|
||||||
|
|
||||||
|
print()
|
||||||
|
print()
|
||||||
|
print('DOMAINS SKIPPED: ')
|
||||||
|
for domain in domain_skipped:
|
||||||
|
print(domain)
|
||||||
|
|
Loading…
Reference in a new issue