chg: [onion module] filter onion v2
Some checks are pending
CI / ail_test (3.10) (push) Waiting to run
CI / ail_test (3.7) (push) Waiting to run
CI / ail_test (3.8) (push) Waiting to run
CI / ail_test (3.9) (push) Waiting to run

This commit is contained in:
terrtia 2024-10-08 16:26:46 +02:00
parent faea17572c
commit 9d26a47c17
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
2 changed files with 14 additions and 14 deletions

View file

@ -136,16 +136,17 @@ def is_valid_onion_v3_domain(domain):
def is_valid_onion_domain(domain): def is_valid_onion_domain(domain):
if not domain.endswith('.onion'): if not domain.endswith('.onion'):
return False return False
domain = domain.replace('.onion', '', 1) return is_valid_onion_v3_domain(domain)
if len(domain) == 16: # v2 address # domain = domain.replace('.onion', '', 1)
r_onion = r'[a-z0-9]{16}' # if len(domain) == 16: # v2 address
if re.match(r_onion, domain): # r_onion = r'[a-z0-9]{16}'
return True # if re.match(r_onion, domain):
elif len(domain) == 56: # v3 address # return True
r_onion = r'[a-z0-9]{56}' # elif len(domain) == 56: # v3 address
if re.fullmatch(r_onion, domain): # r_onion = r'[a-z0-9]{56}'
return True # if re.fullmatch(r_onion, domain):
return False # return True
# return False
def is_valid_domain(domain): def is_valid_domain(domain):
faup.decode(domain) faup.decode(domain)

View file

@ -23,7 +23,6 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
from lib.objects.Items import Item
from lib import crawlers from lib import crawlers
class Onion(AbstractModule): class Onion(AbstractModule):
@ -35,9 +34,9 @@ class Onion(AbstractModule):
config_loader = ConfigLoader() config_loader = ConfigLoader()
self.r_cache = config_loader.get_redis_conn("Redis_Cache") self.r_cache = config_loader.get_redis_conn("Redis_Cache")
self.pending_seconds = config_loader.get_config_int("Onion", "max_execution_time") self.pending_seconds = 10
# regex timeout # regex timeout
self.regex_timeout = 30 self.regex_timeout = config_loader.get_config_int("Onion", "max_execution_time")
self.faup = crawlers.get_faup() self.faup = crawlers.get_faup()
@ -80,6 +79,7 @@ class Onion(AbstractModule):
# String to tuple # String to tuple
x = x[2:-2].replace(" '", "").split("',") x = x[2:-2].replace(" '", "").split("',")
url = x[0] url = x[0]
url = url.lower()
print(url) print(url)
# TODO Crawl subdomain # TODO Crawl subdomain
@ -108,5 +108,4 @@ class Onion(AbstractModule):
if __name__ == "__main__": if __name__ == "__main__":
module = Onion() module = Onion()
# module.compute('submitted/2022/10/10/submitted_705d1d92-7e9a-4a44-8c21-ccd167bfb7db.gz 9')
module.run() module.run()