From 9d26a47c1775c18ebdb6d5158579061d6796dd52 Mon Sep 17 00:00:00 2001 From: terrtia Date: Tue, 8 Oct 2024 16:26:46 +0200 Subject: [PATCH] chg: [onion module] filter onion v2 --- bin/lib/crawlers.py | 21 +++++++++++---------- bin/modules/Onion.py | 7 +++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index 9f4c2839..fbc3470b 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -136,16 +136,17 @@ def is_valid_onion_v3_domain(domain): def is_valid_onion_domain(domain): if not domain.endswith('.onion'): return False - domain = domain.replace('.onion', '', 1) - if len(domain) == 16: # v2 address - r_onion = r'[a-z0-9]{16}' - if re.match(r_onion, domain): - return True - elif len(domain) == 56: # v3 address - r_onion = r'[a-z0-9]{56}' - if re.fullmatch(r_onion, domain): - return True - return False + return is_valid_onion_v3_domain(domain) + # domain = domain.replace('.onion', '', 1) + # if len(domain) == 16: # v2 address + # r_onion = r'[a-z0-9]{16}' + # if re.match(r_onion, domain): + # return True + # elif len(domain) == 56: # v3 address + # r_onion = r'[a-z0-9]{56}' + # if re.fullmatch(r_onion, domain): + # return True + # return False def is_valid_domain(domain): faup.decode(domain) diff --git a/bin/modules/Onion.py b/bin/modules/Onion.py index 2e11431e..02093a9f 100755 --- a/bin/modules/Onion.py +++ b/bin/modules/Onion.py @@ -23,7 +23,6 @@ sys.path.append(os.environ['AIL_BIN']) ################################## from modules.abstract_module import AbstractModule from lib.ConfigLoader import ConfigLoader -from lib.objects.Items import Item from lib import crawlers class Onion(AbstractModule): @@ -35,9 +34,9 @@ class Onion(AbstractModule): config_loader = ConfigLoader() self.r_cache = config_loader.get_redis_conn("Redis_Cache") - self.pending_seconds = config_loader.get_config_int("Onion", "max_execution_time") + self.pending_seconds = 10 # regex timeout - self.regex_timeout = 30 + self.regex_timeout = config_loader.get_config_int("Onion", "max_execution_time") self.faup = crawlers.get_faup() @@ -80,6 +79,7 @@ class Onion(AbstractModule): # String to tuple x = x[2:-2].replace(" '", "").split("',") url = x[0] + url = url.lower() print(url) # TODO Crawl subdomain @@ -108,5 +108,4 @@ class Onion(AbstractModule): if __name__ == "__main__": module = Onion() - # module.compute('submitted/2022/10/10/submitted_705d1d92-7e9a-4a44-8c21-ccd167bfb7db.gz 9') module.run()