diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index 5eca1943..60d78967 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -10,9 +10,10 @@ The ``Domain`` import os import sys import itertools -import time +import re import redis import random +import time sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Cryptocurrency @@ -241,6 +242,52 @@ def create_domains_metadata_list(list_domains, domain_type): ports=True, tags=True, languages=True, screenshot=True, tags_safe=True)) return l_domains +def sanithyse_domain_name_to_search(name_to_search, domain_type): + if domain_type == 'onion': + r_name = r'[a-z0-9\.]+' + else: + r_name = r'[a-zA-Z0-9\.-_]+' + # invalid domain name + if not re.fullmatch(r_name, name_to_search): + return None + return name_to_search.replace('.', '\.') + + +def search_domains_by_name(name_to_search, domain_types, r_pos=False): + domains_dict = {} + for domain_type in domain_types: + r_name = sanithyse_domain_name_to_search(name_to_search, domain_type) + if not name_to_search: + break + r_name = re.compile(r_name) + for domain in get_all_domains_up(domain_type): + res = re.search(r_name, domain) + if res: + domains_dict[domain] = {} + if r_pos: + domains_dict[domain]['hl-start'] = res.start() + domains_dict[domain]['hl-end'] = res.end() + return domains_dict + +def api_search_domains_by_name(name_to_search, domains_types, domains_metadata=False, page=1): + domains_types = sanitize_domain_types(domains_types) + domains_dict = search_domains_by_name(name_to_search, domains_types, r_pos=True) + l_domains = sorted(domains_dict.keys()) + l_domains = paginate_iterator(l_domains, nb_obj=28, page=page) + if not domains_metadata: + return l_domains + else: + l_dict_domains = [] + for domain in l_domains['list_elem']: + dict_domain = get_domain_metadata(domain, get_domain_type(domain), first_seen=True, last_ckeck=True, + status=True, ports=True, tags=True, tags_safe=True, + languages=True, screenshot=True) + dict_domain = {**domains_dict[domain], **dict_domain} + l_dict_domains.append(dict_domain) + l_domains['list_elem'] = l_dict_domains + l_domains['search'] = name_to_search + return l_domains + ######## LANGUAGES ######## def get_all_domains_languages(): @@ -940,3 +987,6 @@ class Domain(object): ''' port = sanathyse_port(port, self.domain, self.type, strict=True, current_port=self.current_port) return get_domain_items_crawled(self.domain, self.type, port, epoch=epoch, items_link=items_link, item_screenshot=item_screenshot, item_tag=item_tag) + +if __name__ == '__main__': + search_domains_by_name('c', 'onion') diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index 8a6817f5..ed60fb62 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -47,6 +47,20 @@ faup = Faup() def generate_uuid(): return str(uuid.uuid4()).replace('-', '') +def is_valid_onion_domain(domain): + if not domain.endswith('.onion'): + return False + domain = domain.replace('.onion', '', 1) + if len(domain) == 16: # v2 address + r_onion = r'[a-z0-9]{16}' + if re.match(r_onion, domain): + return True + elif len(domain) == 56: # v3 address + r_onion = r'[a-z0-9]{56}' + if re.fullmatch(r_onion, domain): + return True + return False + ################################################################################ # # TODO: handle prefix cookies diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index 5da9b633..f80b3967 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -232,6 +232,25 @@ def domains_search_languages_get(): l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label, current_languages=languages, domains_types=domains_types) +@crawler_splash.route('/domains/name/search', methods=['GET']) +@login_required +@login_analyst +def domains_search_name(): + name = request.args.get('name') + page = request.args.get('page') + try: + page = int(page) + except: + page = 1 + domains_types = request.args.getlist('domain_types') + if domains_types: + domains_types = domains_types[0].split(',') + + l_dict_domains = Domain.api_search_domains_by_name(name, domains_types, domains_metadata=True, page=page) + return render_template("domains/domains_result_list.html", template_folder='../../', + l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label, + domains_types=domains_types) + ##-- --## diff --git a/var/www/modules/hiddenServices/templates/Crawler_dashboard.html b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html index 5cc7f987..86c82476 100644 --- a/var/www/modules/hiddenServices/templates/Crawler_dashboard.html +++ b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html @@ -105,23 +105,7 @@ -