diff --git a/bin/Crawler.py b/bin/Crawler.py index c69c1de5..2f4316d9 100755 --- a/bin/Crawler.py +++ b/bin/Crawler.py @@ -20,7 +20,7 @@ def on_error_send_message_back_in_queue(type_hidden_service, domain, message): # send this msg back in the queue if not r_onion.sismember('{}_domain_crawler_queue'.format(type_hidden_service), domain): r_onion.sadd('{}_domain_crawler_queue'.format(type_hidden_service), domain) - r_onion.sadd('{}_crawler_queue'.format(type_hidden_service), message) + r_onion.sadd('{}_crawler_priority_queue'.format(type_hidden_service), message) def crawl_onion(url, domain, date, date_month, message): @@ -166,8 +166,12 @@ if __name__ == '__main__': while True: - # Recovering the streamed message informations. - message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service)) + # Priority Queue - Recovering the streamed message informations. + message = r_onion.spop('{}_crawler_priority_queue'.format(type_hidden_service)) + + if message is None: + # Recovering the streamed message informations. + message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service)) if message is not None: diff --git a/bin/Onion.py b/bin/Onion.py index d0555868..707c39fe 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -223,7 +223,11 @@ if __name__ == "__main__": print('send to onion crawler') r_onion.sadd('onion_domain_crawler_queue', domain) msg = '{};{}'.format(url,PST.p_path) - r_onion.sadd('onion_crawler_queue', msg) + if not r_onion.hexists('onion_metadata:{}'.format(domain), 'first_seen'): + r_onion.sadd('onion_crawler_priority_queue', msg) + print('send to priority queue) + else: + r_onion.sadd('onion_crawler_queue', msg) #p.populate_set_out(msg, 'Crawler') else: