diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index dee511dc..64bbdafb 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -30,6 +30,8 @@ PASTES_FOLDER = Flask_config.PASTES_FOLDER hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates') faup = Faup() +list_types=['onion', 'regular'] +dic_type_name={'onion':'Onion', 'regular':'Website'} # ============ FUNCTIONS ============ def one(): @@ -72,10 +74,10 @@ def unpack_paste_tags(p_tags): l_tags.append( (tag, complete_tag) ) return l_tags -def is_valid_onion_domain(onion_domain): - faup.decode(onion_domain) +def is_valid_domain(domain): + faup.decode(domain) domain_unpack = faup.get() - if domain_unpack['tld']==b'onion' and domain_unpack['scheme'] is None and domain_unpack['port'] is None and domain_unpack['query_string'] is None: + if domain_unpack['tld'] is not None and domain_unpack['scheme'] is None and domain_unpack['port'] is None and domain_unpack['query_string'] is None: return True else: return False @@ -93,6 +95,18 @@ def get_domain_type(domain): else: return 'regular' +def get_last_domains_crawled(type): + return r_serv_onion.lrange('last_{}'.format(type), 0 ,-1) + +def get_stats_last_crawled_domains(type, date): + statDomains = {} + statDomains['domains_up'] = r_serv_onion.scard('{}_up:{}'.format(type, date)) + statDomains['domains_down'] = r_serv_onion.scard('{}_down:{}'.format(type, date)) + statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] + statDomains['domains_queue'] = r_serv_onion.scard('{}_crawler_queue'.format(type)) + statDomains['domains_queue'] += r_serv_onion.scard('{}_crawler_priority_queue'.format(type)) + return statDomains + def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None): list_crawled_metadata = [] for domain_epoch in list_domains_crawled: @@ -153,6 +167,10 @@ def send_url_to_crawl_in_queue(mode, service_type, url): # ============= ROUTES ============== +@hiddenServices.route("/crawlers/", methods=['GET']) +def dashboard(): + return render_template("Crawler_dashboard.html") + @hiddenServices.route("/hiddenServices/2", methods=['GET']) def hiddenServices_page_test(): return render_template("Crawler_index.html") @@ -163,124 +181,115 @@ def manual(): @hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET']) def crawler_splash_onion(): - last_onions = r_serv_onion.lrange('last_onion', 0 ,-1) + type = 'onion' + last_onions = get_last_domains_crawled(type) list_onion = [] now = datetime.datetime.now() date = now.strftime("%Y%m%d") - statDomains = {} - statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date)) - statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date)) - statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] - statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue') + statDomains = get_stats_last_crawled_domains(type, date) - list_onion = get_last_crawled_domains_metadata(last_onions, date, type='onion') - crawler_metadata = get_crawler_splash_status('onion') + list_onion = get_last_crawled_domains_metadata(last_onions, date, type=type) + crawler_metadata = get_crawler_splash_status(type) date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8]) return render_template("Crawler_Splash_onion.html", last_onions=list_onion, statDomains=statDomains, crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string) -@hiddenServices.route("/crawlers/manual_splash_crawler", methods=['GET']) -def manual_splash_crawler(): +@hiddenServices.route("/crawlers/crawler_splash_regular", methods=['GET']) +def crawler_splash_regular(): + type = 'regular' + type_name = dic_type_name[type] + list_domains = [] now = datetime.datetime.now() - date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d")) - - # Stats - # user request == CHECK - # preconf crawlers == ????? - ################################################################################# - statDomains = {} - #statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date)) - #statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date)) - #statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] - #statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue') - #################################################################################### - - last_crawled = r_serv_onion.lrange('last_crawled_manual', 0 ,-1) - list_crawled = get_last_crawled_domains_metadata(last_crawled) - - crawler_metadata=[] - all_onion_crawler = r_cache.smembers('all_crawler:onion') - for crawler in all_onion_crawler: - crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain') - started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time') - status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status') - crawler_info = '{} - {}'.format(crawler, started_time) - if status_info=='Waiting' or status_info=='Crawling': - status=True - else: - status=False - crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status}) - + date = now.strftime("%Y%m%d") date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8]) - return render_template("Crawler_Splash_onion.html", last_crawled=list_crawled, statDomains=statDomains, + + statDomains = get_stats_last_crawled_domains(type, date) + + list_domains = get_last_crawled_domains_metadata(get_last_domains_crawled(type), date, type=type) + crawler_metadata = get_crawler_splash_status(type) + + return render_template("Crawler_Splash_last_by_type.html", type=type, type_name=type_name, + last_domains=list_domains, statDomains=statDomains, crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string) -@hiddenServices.route("/crawlers/blacklisted_onion", methods=['GET']) -def blacklisted_onion(): - blacklist_onion = request.args.get('blacklist_onion') - unblacklist_onion = request.args.get('unblacklist_onion') - if blacklist_onion is not None: - blacklist_onion = int(blacklist_onion) - if unblacklist_onion is not None: - unblacklist_onion = int(unblacklist_onion) - try: - page = int(request.args.get('page')) - except: - page = 1 - if page <= 0: - page = 1 - nb_page_max = r_serv_onion.scard('blacklist_onion')/(1000) - if isinstance(nb_page_max, float): - nb_page_max = int(nb_page_max)+1 - if page > nb_page_max: - page = nb_page_max - start = 1000*(page -1) - stop = 1000*page +@hiddenServices.route("/crawlers/blacklisted_domains", methods=['GET']) +def blacklisted_domains(): + blacklist_domain = request.args.get('blacklist_domain') + unblacklist_domain = request.args.get('unblacklist_domain') + type = request.args.get('type') + if type in list_types: + type_name = dic_type_name[type] + if blacklist_domain is not None: + blacklist_domain = int(blacklist_domain) + if unblacklist_domain is not None: + unblacklist_domain = int(unblacklist_domain) + try: + page = int(request.args.get('page')) + except: + page = 1 + if page <= 0: + page = 1 + nb_page_max = r_serv_onion.scard('blacklist_{}'.format(type))/(1000) + if isinstance(nb_page_max, float): + nb_page_max = int(nb_page_max)+1 + if page > nb_page_max: + page = nb_page_max + start = 1000*(page -1) + stop = 1000*page - list_blacklisted = list(r_serv_onion.smembers('blacklist_onion')) - list_blacklisted_1 = list_blacklisted[start:stop] - list_blacklisted_2 = list_blacklisted[stop:stop+1000] - return render_template("blacklisted_onion.html", list_blacklisted_1=list_blacklisted_1, list_blacklisted_2=list_blacklisted_2, - page=page, nb_page_max=nb_page_max, - blacklist_onion=blacklist_onion, unblacklist_onion=unblacklist_onion) - -@hiddenServices.route("/crawler/blacklist_onion", methods=['GET']) -def blacklist_onion(): - onion = request.args.get('onion') - try: - page = int(request.args.get('page')) - except: - page = 1 - if is_valid_onion_domain(onion): - res = r_serv_onion.sadd('blacklist_onion', onion) - print(res) - if page: - if res == 0: - return redirect(url_for('hiddenServices.blacklisted_onion', page=page, blacklist_onion=2)) - else: - return redirect(url_for('hiddenServices.blacklisted_onion', page=page, blacklist_onion=1)) + list_blacklisted = list(r_serv_onion.smembers('blacklist_{}'.format(type))) + list_blacklisted_1 = list_blacklisted[start:stop] + list_blacklisted_2 = list_blacklisted[stop:stop+1000] + return render_template("blacklisted_domains.html", list_blacklisted_1=list_blacklisted_1, list_blacklisted_2=list_blacklisted_2, + type=type, type_name=type_name, page=page, nb_page_max=nb_page_max, + blacklist_domain=blacklist_domain, unblacklist_domain=unblacklist_domain) else: - return redirect(url_for('hiddenServices.blacklisted_onion', page=page, blacklist_onion=0)) + return 'Incorrect Type' -@hiddenServices.route("/crawler/unblacklist_onion", methods=['GET']) -def unblacklist_onion(): - onion = request.args.get('onion') +@hiddenServices.route("/crawler/blacklist_domain", methods=['GET']) +def blacklist_domain(): + domain = request.args.get('domain') + type = request.args.get('type') try: page = int(request.args.get('page')) except: page = 1 - if is_valid_onion_domain(onion): - res = r_serv_onion.srem('blacklist_onion', onion) - if page: - if res == 0: - return redirect(url_for('hiddenServices.blacklisted_onion', page=page, unblacklist_onion=2)) - else: - return redirect(url_for('hiddenServices.blacklisted_onion', page=page, unblacklist_onion=1)) + if type in list_types: + if is_valid_domain(domain): + res = r_serv_onion.sadd('blacklist_{}'.format(type), domain) + if page: + if res == 0: + return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, blacklist_domain=2)) + else: + return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, blacklist_domain=1)) + else: + return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, blacklist_domain=0)) else: - return redirect(url_for('hiddenServices.blacklisted_onion', page=page, unblacklist_onion=0)) + return 'Incorrect type' + +@hiddenServices.route("/crawler/unblacklist_domain", methods=['GET']) +def unblacklist_domain(): + domain = request.args.get('domain') + type = request.args.get('type') + try: + page = int(request.args.get('page')) + except: + page = 1 + if type in list_types: + if is_valid_domain(domain): + res = r_serv_onion.srem('blacklist_{}'.format(type), domain) + if page: + if res == 0: + return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, unblacklist_domain=2)) + else: + return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, unblacklist_domain=1)) + else: + return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, unblacklist_domain=0)) + else: + return 'Incorrect type' @hiddenServices.route("/crawlers/create_spider_splash", methods=['POST']) def create_spider_splash(): @@ -619,23 +628,26 @@ def domain_crawled_7days_json(): return jsonify(json_domain_stats) -@hiddenServices.route('/hiddenServices/automatic_onion_crawler_json') -def automatic_onion_crawler_json(): +@hiddenServices.route('/hiddenServices/domain_crawled_by_type_json') +def domain_crawled_by_type_json(): current_date = request.args.get('date') - type = 'onion' + type = request.args.get('type') + if type in list_types: - num_day_type = 7 - date_range = get_date_range(num_day_type) - range_decoder = [] - for date in date_range: - day_crawled = {} - day_crawled['date']= date[0:4] + '-' + date[4:6] + '-' + date[6:8] - day_crawled['UP']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date)) - day_crawled['DOWN']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date)) - range_decoder.append(day_crawled) + num_day_type = 7 + date_range = get_date_range(num_day_type) + range_decoder = [] + for date in date_range: + day_crawled = {} + day_crawled['date']= date[0:4] + '-' + date[4:6] + '-' + date[6:8] + day_crawled['UP']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date)) + day_crawled['DOWN']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date)) + range_decoder.append(day_crawled) - return jsonify(range_decoder) + return jsonify(range_decoder) + else: + return jsonify('Incorrect Type') # ========= REGISTRATION ========= app.register_blueprint(hiddenServices, url_prefix=baseUrl) diff --git a/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html b/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html new file mode 100644 index 00000000..c59bc9ab --- /dev/null +++ b/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html @@ -0,0 +1,476 @@ + + + + + AIL-Framework + + + + + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'crawler/menu_sidebar.html' %} + +
+ +
+
+ +
+ + + + + + + + + + + {% for metadata_domain in last_domains %} + + + + + + + {% endfor %} + +
DomainFirst SeenLast CheckStatus
{{ metadata_domain['domain'] }}{{'{}/{}/{}'.format(metadata_domain['first_seen'][0:4], metadata_domain['first_seen'][4:6], metadata_domain['first_seen'][6:8])}}{{'{}/{}/{}'.format(metadata_domain['last_check'][0:4], metadata_domain['last_check'][4:6], metadata_domain['last_check'][6:8])}}
+ + {{metadata_domain['status_text']}} +
+
+
+ + + + + +
+
+ +
+
+
+
+ {{ statDomains['domains_up'] }} UP + {{ statDomains['domains_down'] }} DOWN +
+
+ {{ statDomains['total'] }} Crawled + {{ statDomains['domains_queue'] }} Queue +
+
+
+
+
Select domains by date range :
+

Some quick example text to build on the card title and make up the bulk of the card's content.

+
+
+
+
+
+ +
+
+
+ +
+
+
+
+ + +
+
+ + +
+
+ + +
+
+
+ + +
+
+ +
+
+ +
+
+ Crawlers Status +
+
+ + + {% for crawler in crawler_metadata %} + + + + + + {% endfor %} + +
+ {{crawler['crawler_info']}} + + {{crawler['crawling_domain']}} + + {{crawler['status_info']}} +
+
+
+
+
+ +
+ +
+
+ + + + + + + + + + + diff --git a/var/www/modules/hiddenServices/templates/Crawler_Splash_manual.html b/var/www/modules/hiddenServices/templates/Crawler_Splash_manual.html index 9647d733..bc2e6024 100644 --- a/var/www/modules/hiddenServices/templates/Crawler_Splash_manual.html +++ b/var/www/modules/hiddenServices/templates/Crawler_Splash_manual.html @@ -26,40 +26,9 @@
-
+ {% include 'crawler/menu_sidebar.html' %} - - -
- -
+
@@ -162,6 +131,7 @@ var chart = {}; $(document).ready(function(){ $("#page-Crawler").addClass("active"); + $("#nav_manual_crawler").addClass("active"); manual_crawler_input_controler(); $('#crawler_type').change(function () { @@ -169,6 +139,20 @@ $(document).ready(function(){ }); }); +function toggle_sidebar(){ + if($('#nav_menu').is(':visible')){ + $('#nav_menu').hide(); + $('#side_menu').removeClass('border-right') + $('#side_menu').removeClass('col-lg-2') + $('#core_content').removeClass('col-lg-10') + }else{ + $('#nav_menu').show(); + $('#side_menu').addClass('border-right') + $('#side_menu').addClass('col-lg-2') + $('#core_content').addClass('col-lg-10') + } +} + function manual_crawler_input_controler() { if($('#crawler_type').is(':checked')){ $("#crawler_epoch_input").show(); diff --git a/var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html b/var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html index e6c4f697..1e1a1b7a 100644 --- a/var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html +++ b/var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html @@ -50,42 +50,7 @@
-
- - - - -
+ {% include 'crawler/menu_sidebar.html' %}
@@ -224,6 +189,7 @@ var chart = {}; $(document).ready(function(){ $("#page-Crawler").addClass("active"); + $("#nav_onion_crawler").addClass("active"); $('#date-range-from').dateRangePicker({ separator : ' to ', diff --git a/var/www/modules/hiddenServices/templates/Crawler_dashboard.html b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html new file mode 100644 index 00000000..faccf26a --- /dev/null +++ b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html @@ -0,0 +1,82 @@ + + + + + AIL-Framework + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + + + + +
+
+					--------------
+
+
+
+					--------------
+
+				
+
+ +
+
+ + + + + diff --git a/var/www/modules/hiddenServices/templates/blacklisted_onion.html b/var/www/modules/hiddenServices/templates/blacklisted_domains.html similarity index 56% rename from var/www/modules/hiddenServices/templates/blacklisted_onion.html rename to var/www/modules/hiddenServices/templates/blacklisted_domains.html index 501a6c62..c1a28406 100644 --- a/var/www/modules/hiddenServices/templates/blacklisted_onion.html +++ b/var/www/modules/hiddenServices/templates/blacklisted_domains.html @@ -24,45 +24,14 @@
-
+ {% include 'crawler/menu_sidebar.html' %} - - -
- -
+
- Blacklisted Onions + Blacklisted {{type_name}}s
@@ -70,38 +39,38 @@
-
Blacklist Onion
- +
Blacklist {{type_name}}
+
- {%if blacklist_onion==2 %} - This Onion is already blacklisted + {%if blacklist_domain==2 %} + This {{type_name}} is already blacklisted {% else %} - Incorrect Onion address + Incorrect {{type_name}} address {% endif %}
- Onion Blacklisted + {{type_name}} Blacklisted
- +
-
Unblacklist Onion
- +
Unblacklist {{type_name}}
+
- {%if unblacklist_onion==2 %} - This Onion is not blacklisted + {%if unblacklist_domain==2 %} + This {{type_name}} is not blacklisted {% else %} - Incorrect Onion address + Incorrect {{type_name}} address {% endif %}
- Onion Unblacklisted + {{type_name}} Unblacklisted
- +
@@ -112,17 +81,17 @@ - - + + - {% for onion in list_blacklisted_1 %} + {% for domain in list_blacklisted_1 %} - + @@ -134,17 +103,17 @@
OnionUnblacklist Onion{{type_name}}Unblacklist {{type_name}}
{{onion}}{{domain}} - - + +
- - + + - {% for onion in list_blacklisted_2 %} + {% for domain in list_blacklisted_2 %} - + @@ -162,34 +131,34 @@ @@ -220,6 +189,20 @@ $(document).ready(function(){ "order": [[ 0, "asc" ]] } ); - + $("#page-Crawler").addClass("active"); }); + +function toggle_sidebar(){ + if($('#nav_menu').is(':visible')){ + $('#nav_menu').hide(); + $('#side_menu').removeClass('border-right') + $('#side_menu').removeClass('col-lg-2') + $('#core_content').removeClass('col-lg-10') + }else{ + $('#nav_menu').show(); + $('#side_menu').addClass('border-right') + $('#side_menu').addClass('col-lg-2') + $('#core_content').addClass('col-lg-10') + } +} diff --git a/var/www/templates/crawler/menu_sidebar.html b/var/www/templates/crawler/menu_sidebar.html new file mode 100644 index 00000000..fd2f7c45 --- /dev/null +++ b/var/www/templates/crawler/menu_sidebar.html @@ -0,0 +1,48 @@ +
+ + + + +
OnionUnblacklist Onion{{type_name}}Unblacklist {{type_name}}
{{onion}}{{domain}} - - + +