mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-30 09:47:17 +00:00
chg: [Crawler] add priority queue, fix #263
This commit is contained in:
parent
c1b34bd99c
commit
88eaaeae93
2 changed files with 12 additions and 4 deletions
|
@ -20,7 +20,7 @@ def on_error_send_message_back_in_queue(type_hidden_service, domain, message):
|
||||||
# send this msg back in the queue
|
# send this msg back in the queue
|
||||||
if not r_onion.sismember('{}_domain_crawler_queue'.format(type_hidden_service), domain):
|
if not r_onion.sismember('{}_domain_crawler_queue'.format(type_hidden_service), domain):
|
||||||
r_onion.sadd('{}_domain_crawler_queue'.format(type_hidden_service), domain)
|
r_onion.sadd('{}_domain_crawler_queue'.format(type_hidden_service), domain)
|
||||||
r_onion.sadd('{}_crawler_queue'.format(type_hidden_service), message)
|
r_onion.sadd('{}_crawler_priority_queue'.format(type_hidden_service), message)
|
||||||
|
|
||||||
def crawl_onion(url, domain, date, date_month, message):
|
def crawl_onion(url, domain, date, date_month, message):
|
||||||
|
|
||||||
|
@ -166,8 +166,12 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
# Recovering the streamed message informations.
|
# Priority Queue - Recovering the streamed message informations.
|
||||||
message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service))
|
message = r_onion.spop('{}_crawler_priority_queue'.format(type_hidden_service))
|
||||||
|
|
||||||
|
if message is None:
|
||||||
|
# Recovering the streamed message informations.
|
||||||
|
message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service))
|
||||||
|
|
||||||
if message is not None:
|
if message is not None:
|
||||||
|
|
||||||
|
|
|
@ -223,7 +223,11 @@ if __name__ == "__main__":
|
||||||
print('send to onion crawler')
|
print('send to onion crawler')
|
||||||
r_onion.sadd('onion_domain_crawler_queue', domain)
|
r_onion.sadd('onion_domain_crawler_queue', domain)
|
||||||
msg = '{};{}'.format(url,PST.p_path)
|
msg = '{};{}'.format(url,PST.p_path)
|
||||||
r_onion.sadd('onion_crawler_queue', msg)
|
if not r_onion.hexists('onion_metadata:{}'.format(domain), 'first_seen'):
|
||||||
|
r_onion.sadd('onion_crawler_priority_queue', msg)
|
||||||
|
print('send to priority queue)
|
||||||
|
else:
|
||||||
|
r_onion.sadd('onion_crawler_queue', msg)
|
||||||
#p.populate_set_out(msg, 'Crawler')
|
#p.populate_set_out(msg, 'Crawler')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in a new issue