From d941d8abb40ff4bb804b40ad564b973ed0bc2d01 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 5 Feb 2021 17:42:33 +0100 Subject: [PATCH] chg: [domains search] search domains by name --- bin/lib/Domain.py | 52 ++++- bin/lib/crawlers.py | 14 ++ var/www/blueprints/crawler_splash.py | 19 ++ .../templates/Crawler_dashboard.html | 18 +- var/www/static/css/ail-project.css | 7 + .../domains/block_domains_name_search.html | 50 +++++ .../templates/domains/card_img_domain.html | 2 +- .../domains/domains_result_list.html | 195 ++++++++++++++++++ 8 files changed, 338 insertions(+), 19 deletions(-) create mode 100644 var/www/static/css/ail-project.css create mode 100644 var/www/templates/domains/block_domains_name_search.html create mode 100644 var/www/templates/domains/domains_result_list.html diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index 5eca1943..60d78967 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -10,9 +10,10 @@ The ``Domain`` import os import sys import itertools -import time +import re import redis import random +import time sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Cryptocurrency @@ -241,6 +242,52 @@ def create_domains_metadata_list(list_domains, domain_type): ports=True, tags=True, languages=True, screenshot=True, tags_safe=True)) return l_domains +def sanithyse_domain_name_to_search(name_to_search, domain_type): + if domain_type == 'onion': + r_name = r'[a-z0-9\.]+' + else: + r_name = r'[a-zA-Z0-9\.-_]+' + # invalid domain name + if not re.fullmatch(r_name, name_to_search): + return None + return name_to_search.replace('.', '\.') + + +def search_domains_by_name(name_to_search, domain_types, r_pos=False): + domains_dict = {} + for domain_type in domain_types: + r_name = sanithyse_domain_name_to_search(name_to_search, domain_type) + if not name_to_search: + break + r_name = re.compile(r_name) + for domain in get_all_domains_up(domain_type): + res = re.search(r_name, domain) + if res: + domains_dict[domain] = {} + if r_pos: + domains_dict[domain]['hl-start'] = res.start() + domains_dict[domain]['hl-end'] = res.end() + return domains_dict + +def api_search_domains_by_name(name_to_search, domains_types, domains_metadata=False, page=1): + domains_types = sanitize_domain_types(domains_types) + domains_dict = search_domains_by_name(name_to_search, domains_types, r_pos=True) + l_domains = sorted(domains_dict.keys()) + l_domains = paginate_iterator(l_domains, nb_obj=28, page=page) + if not domains_metadata: + return l_domains + else: + l_dict_domains = [] + for domain in l_domains['list_elem']: + dict_domain = get_domain_metadata(domain, get_domain_type(domain), first_seen=True, last_ckeck=True, + status=True, ports=True, tags=True, tags_safe=True, + languages=True, screenshot=True) + dict_domain = {**domains_dict[domain], **dict_domain} + l_dict_domains.append(dict_domain) + l_domains['list_elem'] = l_dict_domains + l_domains['search'] = name_to_search + return l_domains + ######## LANGUAGES ######## def get_all_domains_languages(): @@ -940,3 +987,6 @@ class Domain(object): ''' port = sanathyse_port(port, self.domain, self.type, strict=True, current_port=self.current_port) return get_domain_items_crawled(self.domain, self.type, port, epoch=epoch, items_link=items_link, item_screenshot=item_screenshot, item_tag=item_tag) + +if __name__ == '__main__': + search_domains_by_name('c', 'onion') diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index 8a6817f5..ed60fb62 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -47,6 +47,20 @@ faup = Faup() def generate_uuid(): return str(uuid.uuid4()).replace('-', '') +def is_valid_onion_domain(domain): + if not domain.endswith('.onion'): + return False + domain = domain.replace('.onion', '', 1) + if len(domain) == 16: # v2 address + r_onion = r'[a-z0-9]{16}' + if re.match(r_onion, domain): + return True + elif len(domain) == 56: # v3 address + r_onion = r'[a-z0-9]{56}' + if re.fullmatch(r_onion, domain): + return True + return False + ################################################################################ # # TODO: handle prefix cookies diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index 5da9b633..f80b3967 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -232,6 +232,25 @@ def domains_search_languages_get(): l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label, current_languages=languages, domains_types=domains_types) +@crawler_splash.route('/domains/name/search', methods=['GET']) +@login_required +@login_analyst +def domains_search_name(): + name = request.args.get('name') + page = request.args.get('page') + try: + page = int(page) + except: + page = 1 + domains_types = request.args.getlist('domain_types') + if domains_types: + domains_types = domains_types[0].split(',') + + l_dict_domains = Domain.api_search_domains_by_name(name, domains_types, domains_metadata=True, page=page) + return render_template("domains/domains_result_list.html", template_folder='../../', + l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label, + domains_types=domains_types) + ##-- --## diff --git a/var/www/modules/hiddenServices/templates/Crawler_dashboard.html b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html index 5cc7f987..86c82476 100644 --- a/var/www/modules/hiddenServices/templates/Crawler_dashboard.html +++ b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html @@ -105,23 +105,7 @@ -
-
-
-
Show Domain:
-
-
- -
- -
-
-
-
-
-
+ {% include 'domains/block_domains_name_search.html' %}
diff --git a/var/www/static/css/ail-project.css b/var/www/static/css/ail-project.css new file mode 100644 index 00000000..ba4d461b --- /dev/null +++ b/var/www/static/css/ail-project.css @@ -0,0 +1,7 @@ +.hg-text{ + padding-top: 0.2em; + padding-bottom: 0.2em; + padding-right: 0.15em; + padding-left: 0.15em; + background-color: #2e5; +} diff --git a/var/www/templates/domains/block_domains_name_search.html b/var/www/templates/domains/block_domains_name_search.html new file mode 100644 index 00000000..ca5e7cbe --- /dev/null +++ b/var/www/templates/domains/block_domains_name_search.html @@ -0,0 +1,50 @@ +
+
+
+
Search Domain by name:
+
+ +
+ +
+
+ +
+
+ + +
+
+ + +
+
+ +
+
+
+ + diff --git a/var/www/templates/domains/card_img_domain.html b/var/www/templates/domains/card_img_domain.html index fbb67bc8..fb5480f1 100644 --- a/var/www/templates/domains/card_img_domain.html +++ b/var/www/templates/domains/card_img_domain.html @@ -12,7 +12,7 @@
{% if 'hl-start' in dict_domain %} - {{dict_domain["id"][:dict_domain['hl-start']]}}{{dict_domain["id"][dict_domain['hl-start']:dict_domain['hl-end']]}}{{dict_domain["id"][dict_domain['hl-end']:]}} + {{dict_domain["id"][:dict_domain['hl-start']]}}{{dict_domain["id"][dict_domain['hl-start']:dict_domain['hl-end']]}}{{dict_domain["id"][dict_domain['hl-end']:]}} {% else %} {{dict_domain["id"]}} {% endif %} diff --git a/var/www/templates/domains/domains_result_list.html b/var/www/templates/domains/domains_result_list.html new file mode 100644 index 00000000..168cf530 --- /dev/null +++ b/var/www/templates/domains/domains_result_list.html @@ -0,0 +1,195 @@ + + + + Domain Search - AIL + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'crawler/menu_sidebar.html' %} + +
+ +
+
+ + {% with page=l_dict_domains['page'], search=l_dict_domains['search'] %} + {% include 'domains/block_domains_name_search.html' %} + {% endwith %} + + +
+
+ +
+
+
+
+ +
+
+ +
+
+ +
+
+
+
+ +
+
+ + + {% with l_dict_domains=l_dict_domains['list_elem'], bootstrap_label=bootstrap_label %} + {% include 'domains/card_img_domain.html' %} + {% endwith %} + +
+
+ + {%if l_dict_domains['list_elem']%} + {% with page=l_dict_domains['page'], nb_page_max=l_dict_domains['nb_pages'], nb_first_elem=l_dict_domains['nb_first_elem'], nb_last_elem=l_dict_domains['nb_last_elem'], nb_all_elem=l_dict_domains['nb_all_elem'] %} + {% set target_url=url_for('crawler_splash.domains_search_name') + "?name=" + l_dict_domains['search']%} + {%if domains_types %} + {% set target_url = target_url + '&domain_types=' + ','.join(domains_types)%} + {%endif%} + {% include 'pagination.html' %} + {% endwith %} + {%endif%} + + +
+
+
+ + + + + + + + +