diff --git a/bin/Crawler.py b/bin/Crawler.py index d8e6a430..e5864059 100755 --- a/bin/Crawler.py +++ b/bin/Crawler.py @@ -12,6 +12,7 @@ import time import subprocess import requests +from collections import deque from pyfaup.faup import Faup sys.path.append(os.environ['AIL_BIN']) @@ -303,7 +304,7 @@ if __name__ == '__main__': #mode = sys.argv[1] splash_port = sys.argv[1] - rotation_mode = ['onion', 'regular'] + rotation_mode = deque(['onion', 'regular']) default_proto_map = {'http': 80, 'https': 443} ######################################################## add ftp ??? @@ -361,6 +362,7 @@ if __name__ == '__main__': update_auto_crawler() + rotation_mode.rotate() to_crawl = get_elem_to_crawl(rotation_mode) if to_crawl: url_data = unpack_url(to_crawl['url']) diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index f8be2f9b..1087880b 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -125,7 +125,15 @@ class Paste(object): """ - paste = self.cache.get(self.p_path) + try: + paste = self.cache.get(self.p_path) + except UnicodeDecodeError: + paste = None + except Exception as e: + print("ERROR in: " + self.p_path) + print(e) + paste = None + if paste is None: try: with gzip.open(self.p_path, 'r') as f: