From addb8856740730bbf012d0d522d674be6796323c Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 23 Jan 2020 15:43:54 +0100 Subject: [PATCH] chg: [domain core + UI] add domain explorer v1 --- bin/lib/Domain.py | 81 +++++- bin/lib/Screenshot.py | 11 + var/www/blueprints/crawler_splash.py | 26 ++ .../templates/Crawler_dashboard.html | 20 ++ .../crawler_splash/domain_explorer.html | 235 ++++++++++++++++++ 5 files changed, 370 insertions(+), 3 deletions(-) create mode 100644 var/www/templates/crawler/crawler_splash/domain_explorer.html diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index e6cd5472..8c1c8b40 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -62,7 +62,7 @@ def sanitize_domain_type(domain_type): return 'regular' ######## DOMAINS ######## -def get_all_domains_up(domain_type): +def get_all_domains_up(domain_type, r_list=True): ''' Get all domain up (at least one time) @@ -72,7 +72,13 @@ def get_all_domains_up(domain_type): :return: list of domain :rtype: list ''' - return list(r_serv_onion.smembers("full_{}_up".format(domain_type))) + domains = r_serv_onion.smembers("full_{}_up".format(domain_type)) + if r_list: + if domains: + list(domains) + else: + domains = [] + return domains def get_domains_up_by_month(date_year_month, domain_type, rlist=False): ''' @@ -128,6 +134,64 @@ def get_domains_up_by_daterange(date_from, date_to, domain_type): domains_up = [] return domains_up +def paginate_iterator(iter_elems, nb_obj=50, page=1): + dict_page = {} + dict_page['nb_all_elem'] = len(iter_elems) + nb_pages = dict_page['nb_all_elem'] / nb_obj + if not nb_pages.is_integer(): + nb_pages = int(nb_pages)+1 + else: + nb_pages = int(nb_pages) + if page > nb_pages: + page = nb_pages + + # multiple pages + if nb_pages > 1: + dict_page['list_elem'] = [] + start = nb_obj*(page -1) + stop = (nb_obj*page) -1 + current_index = 0 + for elem in iter_elems: + if current_index > stop: + break + if start <= current_index and stop >= current_index: + dict_page['list_elem'].append(elem) + current_index += 1 + stop += 1 + if stop > dict_page['nb_all_elem']: + stop = dict_page['nb_all_elem'] + + else: + start = 0 + stop = dict_page['nb_all_elem'] + dict_page['list_elem'] = list(iter_elems) + dict_page['page'] = page + dict_page['nb_pages'] = nb_pages + # UI + dict_page['nb_first_elem'] = start+1 + dict_page['nb_last_elem'] = stop + return dict_page + +def domains_up_by_page(domain_type, nb_obj=28, page=1): + ''' + Get a list of domains up (alpha sorted) + + :param domain_type: domain type + :type domain_type: str + + :return: list of domain + :rtype: list + ''' + domains = sorted(get_all_domains_up(domain_type, r_list=False)) + domains = paginate_iterator(domains, nb_obj=nb_obj, page=page) + + # # TODO: get tags + root_screenshot + metadata + l_domains = [] + for domain in domains['list_elem']: + l_domains.append(get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=True, screenshot=True)) + domains['list_elem'] = l_domains + return domains + ######## DOMAIN ######## def get_domain_type(domain): @@ -367,7 +431,15 @@ def get_domain_tags(domain): ''' return Tag.get_obj_tag(domain) -def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False): +def get_domain_random_screenshot(domain): + ''' + Retun last screenshot (core item). + + :param domain: crawled domain + ''' + return Screenshot.get_randon_domain_screenshot(domain) + +def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False, screenshot=False): ''' Get Domain basic metadata @@ -384,6 +456,7 @@ def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, s :rtype: dict ''' dict_metadata = {} + dict_metadata['id'] = domain if first_seen: res = get_domain_first_seen(domain, domain_type=domain_type) if res is not None: @@ -398,6 +471,8 @@ def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, s dict_metadata['ports'] = get_domain_all_ports(domain, domain_type) if tags: dict_metadata['tags'] = get_domain_tags(domain) + if screenshot: + dict_metadata['screenshot'] = get_domain_random_screenshot(domain) return dict_metadata def get_domain_metadata_basic(domain, domain_type=None): diff --git a/bin/lib/Screenshot.py b/bin/lib/Screenshot.py index 8a4c8312..cbf951a8 100755 --- a/bin/lib/Screenshot.py +++ b/bin/lib/Screenshot.py @@ -76,6 +76,17 @@ def get_domain_screenshot(domain): else: return [] +def get_randon_domain_screenshot(domain, r_path=True): + ''' + Retun all screenshot of a given domain. + + :param domain: crawled domain + ''' + res = r_serv_onion.srandmember('domain_screenshot:{}'.format(domain)) + if res and r_path: + return get_screenshot_rel_path(res) + return res + def get_screenshot_domain(sha256_string): ''' Retun all domain of a given screenshot. diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index 155b3090..1cedd8e6 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -79,3 +79,29 @@ def showDomain(): return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label, modal_add_tags=Tag.get_modal_add_tags(dict_domain['domain'], object_type="domain")) + +@crawler_splash.route('/domains/explorer/onion', methods=['GET', 'POST']) +@login_required +@login_read_only +def domains_explorer_onion(): + page = request.args.get('page') + try: + page = int(page) + except: + page = 1 + + dict_data = Domain.domains_up_by_page('onion', page=page) + return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='onion') + +@crawler_splash.route('/domains/explorer/web', methods=['GET', 'POST']) +@login_required +@login_read_only +def domains_explorer_web(): + page = request.args.get('page') + try: + page = int(page) + except: + page = 1 + + dict_data = Domain.domains_up_by_page('regular', page=page) + return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='onion') diff --git a/var/www/modules/hiddenServices/templates/Crawler_dashboard.html b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html index d72daea6..261d055e 100644 --- a/var/www/modules/hiddenServices/templates/Crawler_dashboard.html +++ b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html @@ -124,6 +124,26 @@ +
+
+ + +
+ + + {% with object_type='domain' %} {% include 'tags/block_obj_tags_search.html' %} {% endwith %} diff --git a/var/www/templates/crawler/crawler_splash/domain_explorer.html b/var/www/templates/crawler/crawler_splash/domain_explorer.html new file mode 100644 index 00000000..e80bdb7f --- /dev/null +++ b/var/www/templates/crawler/crawler_splash/domain_explorer.html @@ -0,0 +1,235 @@ + + + + Show Domain - AIL + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'crawler/menu_sidebar.html' %} + +
+ +
+
+
+
+ +
+
+ +
+
+ +
+
+
+
+ + {% for dict_domain in dict_data['list_elem'] %} + + {% if loop.index0 % 4 == 0 %} +
+ {% endif %} + +
+
+ + +
+
+
+ + {{dict_domain["id"]}} + +
+

+ + First seen: {{dict_domain["first_seen"]}}
+ Last_seen: {{dict_domain["first_seen"]}}
+ Ports: {{dict_domain["ports"]}} +
+

+ Status: + {% if dict_domain["status"] %} + + + UP + + {% else %} + + + DOWN + + {% endif %} +
+ {% for tag in dict_domain['tags'] %} + + {{ tag }} + + {% endfor %} +
+
+
+ + + {% if loop.index0 % 4 == 3 %} +
+ {% endif %} + + {% endfor %} + + {% if dict_data['list_elem']|length % 4 != 0 %} +
+ {% endif %} + +
+
+ + {%if 'list_elem' in dict_data%} + {% with page=dict_data['page'], nb_page_max=dict_data['nb_pages'], nb_first_elem=dict_data['nb_first_elem'], nb_last_elem=dict_data['nb_last_elem'], nb_all_elem=dict_data['nb_all_elem'] %} + {% set object_name="domain" %} + {%if domain_type=='onion'%} + {% set target_url=url_for('crawler_splash.domains_explorer_onion') + "?domain_type=onion" %} + {%else%} + {% set target_url=url_for('crawler_splash.domains_explorer_web') + "?domain_type=regular" %} + {%endif%} + {% include 'pagination.html' %} + {% endwith %} + {%endif%} + + + + + +
+
+ + + + + + + + + + +