From da78d0552d646fd83d4d81b16593e0a754c17fd1 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 19 Feb 2019 11:41:45 +0100 Subject: [PATCH] chg: [Crawler UI Tags] add tag by day + add crawler status + UI onion blacklist --- bin/Crawler.py | 17 +- bin/packages/Paste.py | 4 + .../hiddenServices/Flask_hiddenServices.py | 182 ++++++++++++-- .../templates/Crawler_Splash_onion.html | 75 ++++-- .../templates/blacklisted_onion.html | 225 ++++++++++++++++++ 5 files changed, 450 insertions(+), 53 deletions(-) create mode 100644 var/www/modules/hiddenServices/templates/blacklisted_onion.html diff --git a/bin/Crawler.py b/bin/Crawler.py index 278ecc05..721a4415 100755 --- a/bin/Crawler.py +++ b/bin/Crawler.py @@ -66,8 +66,6 @@ def load_type_blacklist(type_service): # load domains blacklist try: with open(os.path.join(os.environ['AIL_BIN'],'/torcrawler/blacklist_{}.txt'.format(type_service)), 'r') as f: - # # TODO: # FIXME: remove this - r_onion.delete('blacklist_{}'.format(type_service)) lines = f.read().splitlines() for line in lines: r_onion.sadd('blacklist_{}'.format(type_service), line) @@ -176,7 +174,9 @@ if __name__ == '__main__': crawler_depth_limit = p.config.getint("Crawler", "crawler_depth_limit") # Crawler status - r_cache.sadd('all_crawler:{}'.format(type_hidden_service), splash_port) + r_cache.sadd('all_crawler:{}'.format(splash_port) + r_cache.sadd('all_crawler:{}:{}'.format(mode, type_hidden_service), splash_port) + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'mode', mode) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting') r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S")) @@ -293,9 +293,14 @@ if __name__ == '__main__': r_onion.delete('domain_{}_external_links:{}'.format(type_hidden_service, domain)) print(r_onion.smembers('domain_{}_external_links:{}'.format(type_hidden_service, domain))) - # update list, last crawled sites - r_onion.lpush('last_{}'.format(type_hidden_service), domain) - r_onion.ltrim('last_{}'.format(type_hidden_service), 0, 15) + # update list, last crawled sites + r_onion.lpush('last_{}'.format(type_hidden_service), domain) + r_onion.ltrim('last_{}'.format(type_hidden_service), 0, 15) + # manual + else: + # update list, last crawled sites + r_onion.lpush('last_crawled_manual', domain) + r_onion.ltrim('last_crawled_manual', 0, 15) #update crawler status r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting') diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index d02a92f5..48bd710b 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -241,6 +241,10 @@ class Paste(object): def _get_p_date(self): return self.p_date + # used + def get_p_date(self): + return self.p_date + def _get_p_size(self): return self.p_size diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index 3153f2a9..320802a5 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -8,6 +8,7 @@ import redis import datetime import sys import os +from pyfaup.faup import Faup from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for from Date import Date @@ -27,6 +28,8 @@ PASTES_FOLDER = Flask_config.PASTES_FOLDER hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates') +faup = Faup() + # ============ FUNCTIONS ============ def one(): return 1 @@ -68,11 +71,73 @@ def unpack_paste_tags(p_tags): l_tags.append( (tag, complete_tag) ) return l_tags +def is_valid_onion_domain(onion_domain): + # t + print(onion_domain) + faup.decode(onion_domain) + domain_unpack = faup.get() + if domain_unpack['tld']==b'onion' and domain_unpack['scheme'] is None and domain_unpack['port'] is None and domain_unpack['query_string'] is None: + return True + else: + return False + def get_onion_status(domain, date): if r_serv_onion.sismember('onion_up:'+date , domain): return True else: return False + +def get_domain_type(domain): + type_id = domain.split(':')[-1] + if type_id == 'onion': + return 'onion' + else: + return 'regular' + +def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None): + list_crawled_metadata = [] + for domain in list_domains_crawled: + metadata_domain = {} + # get Domain type + if type is None: + type = get_domain_type(domain) + + metadata_domain['domain'] = domain + metadata_domain['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'last_check') + if metadata_domain['last_check'] is None: + metadata_domain['last_check'] = '********' + metadata_domain['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'first_seen') + if metadata_domain['first_seen'] is None: + metadata_domain['first_seen'] = '********' + if r_serv_onion.sismember('{}_up:{}'.format(type, metadata_domain['last_check']) , domain): + metadata_domain['status_text'] = 'UP' + metadata_domain['status_color'] = 'Green' + metadata_domain['status_icon'] = 'fa-check-circle' + else: + metadata_domain['status_text'] = 'DOWN' + metadata_domain['status_color'] = 'Red' + metadata_domain['status_icon'] = 'fa-times-circle' + list_crawled_metadata.append(metadata_domain) + return list_crawled_metadata + +def get_crawler_splash_status(mode, type): + crawler_metadata = [] + all_crawlers = r_cache.smembers('all_crawler:{}:{}'.format(mode, type)) + for crawler in all_crawlers: + crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain') + started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time') + status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status') + crawler_info = '{} - {}'.format(crawler, started_time) + if status_info=='Waiting' or status_info=='Crawling': + status=True + else: + status=False + crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status}) + + crawler_metadata.append({'crawler_info': '8050 - 2019/02/18 - 16:49.54', 'crawling_domain': 'test', 'status_info': 'Crawling', 'status': True}) + crawler_metadata.append({'crawler_info': '8051 - 2019/02/18 - 16:49.54', 'crawling_domain': 'test', 'status_info': 'Crawling', 'status': True}) + return crawler_metadata + # ============= ROUTES ============== @hiddenServices.route("/hiddenServices/2", methods=['GET']) @@ -80,36 +145,44 @@ def hiddenServices_page_test(): return render_template("Crawler_index.html") @hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET']) -def hiddenServices_page_l(): +def crawler_splash_onion(): last_onions = r_serv_onion.lrange('last_onion', 0 ,-1) list_onion = [] now = datetime.datetime.now() - date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d")) + date = now.strftime("%Y%m%d") statDomains = {} statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date)) statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date)) statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue') - for onion in last_onions: - metadata_onion = {} - metadata_onion['domain'] = onion - metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check') - if metadata_onion['last_check'] is None: - metadata_onion['last_check'] = '********' - metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen') - if metadata_onion['first_seen'] is None: - metadata_onion['first_seen'] = '********' - if get_onion_status(onion, metadata_onion['last_check']): - metadata_onion['status_text'] = 'UP' - metadata_onion['status_color'] = 'Green' - metadata_onion['status_icon'] = 'fa-check-circle' - else: - metadata_onion['status_text'] = 'DOWN' - metadata_onion['status_color'] = 'Red' - metadata_onion['status_icon'] = 'fa-times-circle' - list_onion.append(metadata_onion) + list_onion = get_last_crawled_domains_metadata(last_onions, date, type='onion') + crawler_metadata = get_crawler_splash_status('automatic', 'onion') + + date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8]) + return render_template("Crawler_Splash_onion.html", last_onions=list_onion, statDomains=statDomains, + crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string) + +@hiddenServices.route("/crawlers/manual_splash_crawler", methods=['GET']) +def manual_splash_crawler(): + + now = datetime.datetime.now() + date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d")) + + # Stats + # user request == CHECK + # preconf crawlers == ????? + ################################################################################# + statDomains = {} + #statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date)) + #statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date)) + #statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] + #statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue') + #################################################################################### + + last_crawled = r_serv_onion.lrange('last_crawled_manual', 0 ,-1) + list_crawled = get_last_crawled_domains_metadata(last_crawled) crawler_metadata=[] all_onion_crawler = r_cache.smembers('all_crawler:onion') @@ -125,16 +198,81 @@ def hiddenServices_page_l(): crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status}) date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8]) - return render_template("Crawler_Splash_onion.html", last_onions=list_onion, statDomains=statDomains, + return render_template("Crawler_Splash_onion.html", last_crawled=list_crawled, statDomains=statDomains, crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string) +@hiddenServices.route("/crawlers/blacklisted_onion", methods=['GET']) +def blacklisted_onion(): + blacklist_onion = request.args.get('blacklist_onion') + unblacklist_onion = request.args.get('unblacklist_onion') + if blacklist_onion is not None: + blacklist_onion = int(blacklist_onion) + if unblacklist_onion is not None: + unblacklist_onion = int(unblacklist_onion) + try: + page = int(request.args.get('page')) + except: + page = 1 + if page <= 0: + page = 1 + nb_page_max = r_serv_onion.scard('blacklist_onion')/(1000) + if isinstance(nb_page_max, float): + nb_page_max = int(nb_page_max)+1 + if page > nb_page_max: + page = nb_page_max + start = 1000*(page -1) + stop = 1000*page + + list_blacklisted = list(r_serv_onion.smembers('blacklist_onion')) + list_blacklisted_1 = list_blacklisted[start:stop] + list_blacklisted_2 = list_blacklisted[stop:stop+1000] + return render_template("blacklisted_onion.html", list_blacklisted_1=list_blacklisted_1, list_blacklisted_2=list_blacklisted_2, + page=page, nb_page_max=nb_page_max, + blacklist_onion=blacklist_onion, unblacklist_onion=unblacklist_onion) + +@hiddenServices.route("/crawler/blacklist_onion", methods=['GET']) +def blacklist_onion(): + onion = request.args.get('onion') + try: + page = int(request.args.get('page')) + except: + page = 1 + if is_valid_onion_domain(onion): + res = r_serv_onion.sadd('blacklist_onion', onion) + print(res) + if page: + if res == 0: + return redirect(url_for('hiddenServices.blacklisted_onion', page=page, blacklist_onion=2)) + else: + return redirect(url_for('hiddenServices.blacklisted_onion', page=page, blacklist_onion=1)) + else: + return redirect(url_for('hiddenServices.blacklisted_onion', page=page, blacklist_onion=0)) + +@hiddenServices.route("/crawler/unblacklist_onion", methods=['GET']) +def unblacklist_onion(): + onion = request.args.get('onion') + try: + page = int(request.args.get('page')) + except: + page = 1 + if is_valid_onion_domain(onion): + res = r_serv_onion.srem('blacklist_onion', onion) + if page: + if res == 0: + return redirect(url_for('hiddenServices.blacklisted_onion', page=page, unblacklist_onion=2)) + else: + return redirect(url_for('hiddenServices.blacklisted_onion', page=page, unblacklist_onion=1)) + else: + return redirect(url_for('hiddenServices.blacklisted_onion', page=page, unblacklist_onion=0)) + @hiddenServices.route("/hiddenServices/", methods=['GET']) def hiddenServices_page(): last_onions = r_serv_onion.lrange('last_onion', 0 ,-1) list_onion = [] now = datetime.datetime.now() - date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d")) + date = now.strftime("%Y%m%d") + statDomains = {} statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date)) statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date)) diff --git a/var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html b/var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html index 3518e455..1deed7a1 100644 --- a/var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html +++ b/var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html @@ -60,28 +60,27 @@ - - + +
@@ -116,6 +115,10 @@
+ + + +
@@ -178,8 +181,30 @@
- - +
+
+ Crawlers Status +
+
+ + + {% for crawler in crawler_metadata %} + + + + + + {% endfor %} + +
+ {{crawler['crawler_info']}} + + {{crawler['crawling_domain']}} + + {{crawler['status_info']}} +
+
+
@@ -283,7 +308,7 @@ function refresh_list_crawled(){ } var newCell = newRow.insertCell(0); - newCell.innerHTML = ""+crawler['crawler_info']+""; + newCell.innerHTML = ""+crawler['crawler_info']+""; newCell = newRow.insertCell(1); newCell.innerHTML = ""+crawler['crawling_domain']+""; diff --git a/var/www/modules/hiddenServices/templates/blacklisted_onion.html b/var/www/modules/hiddenServices/templates/blacklisted_onion.html new file mode 100644 index 00000000..501a6c62 --- /dev/null +++ b/var/www/modules/hiddenServices/templates/blacklisted_onion.html @@ -0,0 +1,225 @@ + + + + + AIL-Framework + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + + +
+ +
+
+
+ Blacklisted Onions +
+
+ +
+
+
+
+
Blacklist Onion
+ +
+ {%if blacklist_onion==2 %} + This Onion is already blacklisted + {% else %} + Incorrect Onion address + {% endif %} +
+
+ Onion Blacklisted +
+ +
+
+
+
+
+
+
Unblacklist Onion
+ +
+ {%if unblacklist_onion==2 %} + This Onion is not blacklisted + {% else %} + Incorrect Onion address + {% endif %} +
+
+ Onion Unblacklisted +
+ +
+
+
+
+ +
+
+ + + + + + + + + {% for onion in list_blacklisted_1 %} + + + + + {% endfor %} + +
OnionUnblacklist Onion
{{onion}} + + + +
+
+
+ + + + + + + + + {% for onion in list_blacklisted_2 %} + + + + + {% endfor %} + +
OnionUnblacklist Onion
{{onion}} + + + +
+
+
+
+
+ +
+ +
+ +
+ +
+
+
+ + + +