From a183695ddb9547e8156d80ca009c585497d5c494 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 26 Apr 2019 15:14:29 +0200 Subject: [PATCH] chg: [crawler UI] bootstrap 4 migration: show domains by daterange + bug fix hidden services tags --- bin/packages/HiddenServices.py | 28 +- .../hiddenServices/Flask_hiddenServices.py | 60 ++- .../Crawler_Splash_last_by_type.html | 57 +-- .../hiddenServices/templates/domains.html | 389 ++++++------------ .../crawler/show_domains_by_daterange.html | 56 +++ 5 files changed, 252 insertions(+), 338 deletions(-) create mode 100644 var/www/templates/crawler/show_domains_by_daterange.html diff --git a/bin/packages/HiddenServices.py b/bin/packages/HiddenServices.py index 79876c4c..f1ed0767 100755 --- a/bin/packages/HiddenServices.py +++ b/bin/packages/HiddenServices.py @@ -105,18 +105,20 @@ class HiddenServices(object): return self.tags def update_domain_tags(self, item): - if self.r_serv_metadata.exists('tag:{}'.format(item)): - p_tags = self.r_serv_metadata.smembers('tag:{}'.format(item)) - # update path here - else: - # need to remove it - if self.paste_directory in item: - p_tags = self.r_serv_metadata.smembers('tag:{}'.format(item.replace(self.paste_directory+'/', ''))) - # need to remove it + if item: + + if self.r_serv_metadata.exists('tag:{}'.format(item)): + p_tags = self.r_serv_metadata.smembers('tag:{}'.format(item)) + # update path here else: - p_tags = self.r_serv_metadata.smembers('tag:{}'.format(os.path.join(self.paste_directory, item))) - for tag in p_tags: - self.tags[tag] = self.tags.get(tag, 0) + 1 + # need to remove it + if self.paste_directory in item: + p_tags = self.r_serv_metadata.smembers('tag:{}'.format(item.replace(self.paste_directory+'/', ''))) + # need to remove it + else: + p_tags = self.r_serv_metadata.smembers('tag:{}'.format(os.path.join(self.paste_directory, item))) + for tag in p_tags: + self.tags[tag] = self.tags.get(tag, 0) + 1 def get_first_crawled(self): res = self.r_serv_onion.zrange('crawler_history_{}:{}:{}'.format(self.type, self.domain, self.port), 0, 0, withscores=True) @@ -150,7 +152,9 @@ class HiddenServices(object): #todo use the right paste def get_last_crawled_pastes(self, item_root=None): if item_root is None: - item_root = self.get_domain_crawled_core_item(self) + item_root = self.get_domain_crawled_core_item() + if item_root: + item_root = item_root['root_item'] return self.get_all_pastes_domain(item_root) def get_all_pastes_domain(self, root_item): diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index adbbe87c..fd68dc93 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -82,6 +82,13 @@ def is_valid_domain(domain): else: return False +def is_valid_service_type(service_type): + accepted_service = ['onion', 'regular'] + if service_type in accepted_service: + return True + else: + return False + def get_onion_status(domain, date): if r_serv_onion.sismember('onion_up:'+date , domain): return True @@ -114,13 +121,17 @@ def get_domain_from_url(url): def get_last_domains_crawled(type): return r_serv_onion.lrange('last_{}'.format(type), 0 ,-1) +def get_nb_domains_inqueue(type): + nb = r_serv_onion.scard('{}_crawler_queue'.format(type)) + nb += r_serv_onion.scard('{}_crawler_priority_queue'.format(type)) + return nb + def get_stats_last_crawled_domains(type, date): statDomains = {} statDomains['domains_up'] = r_serv_onion.scard('{}_up:{}'.format(type, date)) statDomains['domains_down'] = r_serv_onion.scard('{}_down:{}'.format(type, date)) statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] - statDomains['domains_queue'] = r_serv_onion.scard('{}_crawler_queue'.format(type)) - statDomains['domains_queue'] += r_serv_onion.scard('{}_crawler_priority_queue'.format(type)) + statDomains['domains_queue'] = get_nb_domains_inqueue(type) return statDomains def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None, auto_mode=False): @@ -561,20 +572,28 @@ def last_crawled_domains_with_stats_json(): def get_onions_by_daterange(): date_from = request.form.get('date_from') date_to = request.form.get('date_to') + service_type = request.form.get('service_type') domains_up = request.form.get('domains_up') domains_down = request.form.get('domains_down') domains_tags = request.form.get('domains_tags') - return redirect(url_for('hiddenServices.show_domains_by_daterange', date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down, domains_tags=domains_tags)) + return redirect(url_for('hiddenServices.show_domains_by_daterange', date_from=date_from, date_to=date_to, service_type=service_type, domains_up=domains_up, domains_down=domains_down, domains_tags=domains_tags)) @hiddenServices.route("/hiddenServices/show_domains_by_daterange", methods=['GET']) def show_domains_by_daterange(): date_from = request.args.get('date_from') date_to = request.args.get('date_to') + service_type = request.args.get('service_type') domains_up = request.args.get('domains_up') domains_down = request.args.get('domains_down') domains_tags = request.args.get('domains_tags') + # incorrect service type + if not is_valid_service_type(service_type): + service_type = 'onion' + + type_name = dic_type_name[service_type] + date_range = [] if date_from is not None and date_to is not None: #change format @@ -595,12 +614,22 @@ def show_domains_by_daterange(): date_from = date_from[0:4] + '-' + date_from[4:6] + '-' + date_from[6:8] date_to = date_to[0:4] + '-' + date_to[4:6] + '-' + date_to[6:8] + statDomains = {} + statDomains['domains_up'] = 0 + statDomains['domains_down'] = 0 + statDomains['total'] = 0 + statDomains['domains_queue'] = get_nb_domains_inqueue(service_type) + domains_by_day = {} domain_metadata = {} + stats_by_date = {} for date in date_range: + stats_by_date[date] = {} + stats_by_date[date]['domain_up'] = 0 + stats_by_date[date]['domain_down'] = 0 if domains_up: domains_up = True - domains_by_day[date] = list(r_serv_onion.smembers('onion_up:{}'.format(date))) + domains_by_day[date] = list(r_serv_onion.smembers('{}_up:{}'.format(service_type, date))) for domain in domains_by_day[date]: h = HiddenServices(domain, 'onion') domain_metadata[domain] = {} @@ -608,19 +637,21 @@ def show_domains_by_daterange(): domains_tags = True domain_metadata[domain]['tags'] = h.get_domain_tags(update=True) - domain_metadata[domain]['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'last_check') + domain_metadata[domain]['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'last_check') if domain_metadata[domain]['last_check'] is None: domain_metadata[domain]['last_check'] = '********' - domain_metadata[domain]['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'first_seen') + domain_metadata[domain]['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'first_seen') if domain_metadata[domain]['first_seen'] is None: domain_metadata[domain]['first_seen'] = '********' domain_metadata[domain]['status_text'] = 'UP' domain_metadata[domain]['status_color'] = 'Green' domain_metadata[domain]['status_icon'] = 'fa-check-circle' + statDomains['domains_up'] += 1 + stats_by_date[date]['domain_up'] += 1 if domains_down: domains_down = True - domains_by_day_down = list(r_serv_onion.smembers('onion_down:{}'.format(date))) + domains_by_day_down = list(r_serv_onion.smembers('{}_down:{}'.format(service_type, date))) if domains_up: domains_by_day[date].extend(domains_by_day_down) else: @@ -630,20 +661,27 @@ def show_domains_by_daterange(): domain_metadata[domain] = {} #domain_metadata[domain]['tags'] = h.get_domain_tags() - domain_metadata[domain]['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'last_check') + domain_metadata[domain]['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'last_check') if domain_metadata[domain]['last_check'] is None: domain_metadata[domain]['last_check'] = '********' - domain_metadata[domain]['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'first_seen') + domain_metadata[domain]['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'first_seen') if domain_metadata[domain]['first_seen'] is None: domain_metadata[domain]['first_seen'] = '********' domain_metadata[domain]['status_text'] = 'DOWN' domain_metadata[domain]['status_color'] = 'Red' domain_metadata[domain]['status_icon'] = 'fa-times-circle' + statDomains['domains_down'] += 1 + stats_by_date[date]['domain_down'] += 1 - return render_template("domains.html", date_range=date_range, domains_by_day=domains_by_day, domain_metadata=domain_metadata, + statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] + + return render_template("domains.html", date_range=date_range, domains_by_day=domains_by_day, + statDomains=statDomains, type_name=type_name, + domain_metadata=domain_metadata, + stats_by_date=stats_by_date, date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down, - domains_tags=domains_tags, bootstrap_label=bootstrap_label) + domains_tags=domains_tags, type=service_type, bootstrap_label=bootstrap_label) @hiddenServices.route("/crawlers/show_domain", methods=['GET']) def show_domain(): diff --git a/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html b/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html index 6c3bab49..a42e3880 100644 --- a/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html +++ b/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html @@ -85,62 +85,7 @@
- -
-
-
-
- {{ statDomains['domains_up'] }} UP - {{ statDomains['domains_down'] }} DOWN -
-
- {{ statDomains['total'] }} Crawled - {{ statDomains['domains_queue'] }} Queue -
-
-
-
-
Select domains by date range :
-

Some quick example text to build on the card title and make up the bulk of the card's content.

-
-
-
-
-
- -
-
-
- -
-
-
-
- - -
-
- - -
-
- - -
-
-
- - -
-
+ {% include 'crawler/show_domains_by_daterange.html' %}
diff --git a/var/www/modules/hiddenServices/templates/domains.html b/var/www/modules/hiddenServices/templates/domains.html index 85f2006a..c816ae17 100644 --- a/var/www/modules/hiddenServices/templates/domains.html +++ b/var/www/modules/hiddenServices/templates/domains.html @@ -1,291 +1,162 @@ - - - + + + AIL-Framework + + + + + + - Hidden Service - AIL - + + + + + + + + - - - - - - - - - - - - - + + - + {% include 'nav_bar.html' %} - - +
+
- {% include 'navbar.html' %} + {% include 'crawler/menu_sidebar.html' %} -
+
-
+
{% for date in date_range %} {% if domains_by_day[date]%} -
-
-

{{'{}/{}/{}'.format(date[0:4], date[4:6], date[6:8])}}

-
-
- - - - - - - - - - - - {% for domain in domains_by_day[date] %} - - - - - - - {% endfor %} - -
DomainFirst SeenLast CheckStatus
- {{ domain }} -
- {% for tag in domain_metadata[domain]['tags'] %} - - {{ tag }} {{ domain_metadata[domain]['tags'][tag] }} - - {% endfor %} -
-
{{'{}/{}/{}'.format(domain_metadata[domain]['first_seen'][0:4], domain_metadata[domain]['first_seen'][4:6], domain_metadata[domain]['first_seen'][6:8])}}{{'{}/{}/{}'.format(domain_metadata[domain]['last_check'][0:4], domain_metadata[domain]['last_check'][4:6], domain_metadata[domain]['last_check'][6:8])}}
- - {{domain_metadata[domain]['status_text']}} -
-
-
-
-
- {% endif %} - {% endfor %} - - -
- -
-
-
- Select domains by date range : -
-
- +
+
-
-
- - -
-
- - -
-
- -
+
+

{{'{}/{}/{}'.format(date[0:4], date[4:6], date[6:8])}}

-
-
- -
-
- -
+
- - +
+
+ + + + + + + + + + + + {% for domain in domains_by_day[date] %} + + + + + + + {% endfor %} + +
DomainFirst SeenLast CheckStatus
+ {{ domain }} +
+ {% for tag in domain_metadata[domain]['tags'] %} + + {{ tag }} {{ domain_metadata[domain]['tags'][tag] }} + + {% endfor %} +
+
{{'{}/{}/{}'.format(domain_metadata[domain]['first_seen'][0:4], domain_metadata[domain]['first_seen'][4:6], domain_metadata[domain]['first_seen'][6:8])}}{{'{}/{}/{}'.format(domain_metadata[domain]['last_check'][0:4], domain_metadata[domain]['last_check'][4:6], domain_metadata[domain]['last_check'][6:8])}}
+ + {{domain_metadata[domain]['status_text']}} +
+
+ +
-
+ {% endif %} + {% endfor %}
- +
+ {% include 'crawler/show_domains_by_daterange.html' %} +
-
- +
+
+
- + - diff --git a/var/www/templates/crawler/show_domains_by_daterange.html b/var/www/templates/crawler/show_domains_by_daterange.html new file mode 100644 index 00000000..d56644c2 --- /dev/null +++ b/var/www/templates/crawler/show_domains_by_daterange.html @@ -0,0 +1,56 @@ +
+
+
+ +
+ {{ statDomains['total'] }} Crawled + {{ statDomains['domains_queue'] }} Queue +
+
+
+
+
Select domains by date range :
+

Some quick example text to build on the card title and make up the bulk of the card's content.

+ +
+
+ +
+
+ +
+
+
+ +
+
+
+
+ + +
+
+ + +
+
+ + +
+
+
+ + +
+