From 28a469cf5845243e282c398c7e554d1e1cd6e035 Mon Sep 17 00:00:00 2001 From: David Cruciani Date: Tue, 12 Apr 2022 17:40:17 +0200 Subject: [PATCH] add: [ail] i2p --- bin/lib/Domain.py | 4 +- bin/lib/crawlers.py | 10 +++- bin/lib/objects/Domains.py | 2 + var/www/blueprints/crawler_splash.py | 23 ++++++++- .../hiddenServices/Flask_hiddenServices.py | 32 ++++++++++--- .../templates/Crawler_auto.html | 34 ++++++++++++++ .../dashboard_splash_crawler.html | 47 +++++++++++++++++-- .../crawler_splash/domain_explorer.html | 6 ++- .../settings_splash_crawler.html | 12 ++++- var/www/templates/crawler/menu_sidebar.html | 12 +++++ .../domains/block_domains_name_search.html | 8 +++- .../domains/block_languages_search.html | 8 +++- var/www/templates/domains/filter_domains.html | 6 +++ 13 files changed, 185 insertions(+), 19 deletions(-) diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index d178cf29..28108577 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -80,7 +80,7 @@ def sanitize_domain_types(l_domain_type): ######## DOMAINS ######## def get_all_domains_type(): - return ['onion', 'regular'] + return ['onion', 'i2p', 'regular'] def get_all_domains_up(domain_type, r_list=True): ''' @@ -391,6 +391,8 @@ def api_get_domains_by_languages(domains_types, languages, domains_metadata=Fals def get_domain_type(domain): if str(domain).endswith('.onion'): return 'onion' + elif str(domain).endswith('.i2p'): + return 'i2p' else: return 'regular' diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index a2201c74..1ff3c76b 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -477,7 +477,7 @@ def is_crawler_activated(): return activate_crawler == 'True' def get_crawler_all_types(): - return ['onion', 'regular'] + return ['onion', 'i2p', 'regular'] def sanitize_crawler_types(l_crawler_types): all_crawler_types = get_crawler_all_types() @@ -538,7 +538,7 @@ def get_stats_last_crawled_domains(crawler_types, date): def get_splash_crawler_latest_stats(): now = datetime.now() date = now.strftime("%Y%m%d") - return get_stats_last_crawled_domains(['onion', 'regular'], date) + return get_stats_last_crawled_domains(['onion', 'i2p', 'regular'], date) def get_nb_crawlers_to_launch_by_splash_name(splash_name): res = r_serv_onion.hget('all_crawlers_to_launch', splash_name) @@ -694,6 +694,8 @@ def create_crawler_task(url, screenshot=True, har=True, depth_limit=1, max_pages else: if tld == 'onion': crawler_type = 'onion' + elif tld == 'i2p': + crawler_type = 'i2p' else: crawler_type = 'regular' @@ -968,6 +970,8 @@ def get_crawler_queue_types_by_splash_name(splash_name): if crawler_type == 'tor': all_domain_type.append('onion') all_domain_type.append('regular') + elif crawler_type == 'i2p': + all_domain_type.append('i2p') else: all_domain_type.append('regular') return all_domain_type @@ -983,6 +987,8 @@ def get_crawler_type_by_url(url): if tld == 'onion': crawler_type = 'onion' + elif tld == 'i2p': + crawler_type = 'i2p' else: crawler_type = 'regular' return crawler_type diff --git a/bin/lib/objects/Domains.py b/bin/lib/objects/Domains.py index 9f82c6c8..eedc246e 100755 --- a/bin/lib/objects/Domains.py +++ b/bin/lib/objects/Domains.py @@ -38,6 +38,8 @@ class Domain(AbstractObject): def get_domain_type(self): if str(self.id).endswith('.onion'): return 'onion' + elif str(self.id).endswith('.i2p'): + return 'i2p' else: return 'regular' diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index 7c79023e..4129fdbf 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -170,6 +170,7 @@ def showDomain(): @login_read_only def domains_explorer_post_filter(): domain_onion = request.form.get('domain_onion_switch') + domain_i2p = request.form.get('domain_i2p_switch') domain_regular = request.form.get('domain_regular_switch') date_from = request.form.get('date_from') date_to = request.form.get('date_to') @@ -181,11 +182,16 @@ def domains_explorer_post_filter(): date_from = None date_to = None - if domain_onion and domain_regular: + if domain_onion and domain_regular and domain_i2p: if date_from and date_to: return redirect(url_for('crawler_splash.domains_explorer_all', date_from=date_from, date_to=date_to)) else: return redirect(url_for('crawler_splash.domains_explorer_all')) + elif domain_i2p: + if date_from and date_to: + return redirect(url_for('crawler_splash.domains_explorer_i2p', date_from=date_from, date_to=date_to)) + else: + return redirect(url_for('crawler_splash.domains_explorer_i2p')) elif domain_regular: if date_from and date_to: return redirect(url_for('crawler_splash.domains_explorer_web', date_from=date_from, date_to=date_to)) @@ -227,6 +233,21 @@ def domains_explorer_onion(): dict_data = Domain.get_domains_up_by_filers('onion', page=page, date_from=date_from, date_to=date_to) return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='onion') +@crawler_splash.route('/domains/explorer/i2p', methods=['GET']) +@login_required +@login_read_only +def domains_explorer_i2p(): + page = request.args.get('page') + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + try: + page = int(page) + except: + page = 1 + + dict_data = Domain.get_domains_up_by_filers('i2p', page=page, date_from=date_from, date_to=date_to) + return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='i2p') + @crawler_splash.route('/domains/explorer/web', methods=['GET']) @login_required @login_read_only diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index 55a7abe4..8e323368 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -36,8 +36,8 @@ import crawlers hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates') faup = Faup() -list_types=['onion', 'regular'] -dic_type_name={'onion':'Onion', 'regular':'Website'} +list_types=['onion', 'i2p', 'regular'] +dic_type_name={'onion':'Onion', 'i2p':'I2P', 'regular':'Website'} # ============ FUNCTIONS ============ @@ -90,7 +90,7 @@ def is_valid_domain(domain): return False def is_valid_service_type(service_type): - accepted_service = ['onion', 'regular'] + accepted_service = ['onion', 'i2p', 'regular'] if service_type in accepted_service: return True else: @@ -106,6 +106,8 @@ def get_domain_type(domain): type_id = domain.split(':')[-1] if type_id == 'onion': return 'onion' + elif type_id == 'i2p': + return 'i2p' else: return 'regular' @@ -113,8 +115,11 @@ def get_type_domain(domain): if domain is None: type = 'regular' else: - if domain.rsplit('.', 1)[1] == 'onion': + loc = domain.rsplit('.', 1)[1] + if loc == 'onion': type = 'onion' + elif loc == 'i2p': + type = 'i2p' else: type = 'regular' return type @@ -387,12 +392,18 @@ def auto_crawler(): page = 1 nb_auto_onion = r_serv_onion.scard('auto_crawler_url:onion') + nb_auto_i2p = r_serv_onion.scard('auto_crawler_url:i2p') nb_auto_regular = r_serv_onion.scard('auto_crawler_url:regular') if nb_auto_onion > nb_auto_regular: - nb_max = nb_auto_onion - else: + if nb_auto_onion > nb_auto_i2p: + nb_max = nb_auto_onion + else: + nb_max = nb_auto_i2p + elif nb_auto_regular > nb_auto_i2p: nb_max = nb_auto_regular + else: + nb_max = nb_auto_i2p nb_page_max = nb_max/(nb_element_to_display) if isinstance(nb_page_max, float): @@ -412,6 +423,13 @@ def auto_crawler(): else: auto_crawler_domain_onions = list(r_serv_onion.smembers('auto_crawler_url:onion'))[start:stop] + if start > nb_auto_i2p: + auto_crawler_domain_i2p = [] + elif stop > nb_auto_i2p: + auto_crawler_domain_i2p = list(r_serv_onion.smembers('auto_crawler_url:onion'))[start:nb_auto_i2p] + else: + auto_crawler_domain_i2p = list(r_serv_onion.smembers('auto_crawler_url:onion'))[start:stop] + if start > nb_auto_regular: auto_crawler_domain_regular = [] elif stop > nb_auto_regular: @@ -420,12 +438,14 @@ def auto_crawler(): auto_crawler_domain_regular = list(r_serv_onion.smembers('auto_crawler_url:regular'))[start:stop] auto_crawler_domain_onions_metadata = get_last_crawled_domains_metadata(auto_crawler_domain_onions, '', type='onion', auto_mode=True) + auto_crawler_domain_i2p_metadata = get_last_crawled_domains_metadata(auto_crawler_domain_i2p, '', type='i2p', auto_mode=True) auto_crawler_domain_regular_metadata = get_last_crawled_domains_metadata(auto_crawler_domain_regular, '', type='regular', auto_mode=True) return render_template("Crawler_auto.html", page=page, nb_page_max=nb_page_max, last_domains=last_domains, is_manager_connected=crawlers.get_splash_manager_connection_metadata(), auto_crawler_domain_onions_metadata=auto_crawler_domain_onions_metadata, + auto_crawler_domain_i2p_metadata=auto_crawler_domain_i2p_metadata, auto_crawler_domain_regular_metadata=auto_crawler_domain_regular_metadata) @hiddenServices.route("/crawlers/remove_auto_crawler", methods=['GET']) diff --git a/var/www/modules/hiddenServices/templates/Crawler_auto.html b/var/www/modules/hiddenServices/templates/Crawler_auto.html index 87b74c36..befed709 100644 --- a/var/www/modules/hiddenServices/templates/Crawler_auto.html +++ b/var/www/modules/hiddenServices/templates/Crawler_auto.html @@ -95,6 +95,40 @@ +
+
+ + + + + + + + + + + + {% for metadata_domain in auto_crawler_domain_i2p_metadata %} + + + + + + + + {% endfor %} + +
I2P UrlNext Check
{{ metadata_domain['url'] }} + + {{metadata_domain['epoch']}}
+ + {{metadata_domain['status_text']}} +
+
+ +
+
+
diff --git a/var/www/templates/crawler/crawler_splash/dashboard_splash_crawler.html b/var/www/templates/crawler/crawler_splash/dashboard_splash_crawler.html index 0a80d08c..28d8d50d 100644 --- a/var/www/templates/crawler/crawler_splash/dashboard_splash_crawler.html +++ b/var/www/templates/crawler/crawler_splash/dashboard_splash_crawler.html @@ -47,6 +47,25 @@ + +
+ + +
@@ -78,7 +97,11 @@ {%if splash_crawler['type']=='onion'%} {%else%} - + {%if splash_crawler['type']=='i2p'%} + + {%else%} + + {%endif%} {%endif%}
@@ -191,7 +195,11 @@ {%if all_proxies[proxy_name]['crawler_type']=='tor'%} {%else%} - + {%if all_proxies[proxy_name]['crawler_type']=='i2p'%} + + {%else%} + + {%endif%} {%endif%} {{all_proxies[proxy_name]['crawler_type']}} diff --git a/var/www/templates/crawler/menu_sidebar.html b/var/www/templates/crawler/menu_sidebar.html index d3ed9170..90a05661 100644 --- a/var/www/templates/crawler/menu_sidebar.html +++ b/var/www/templates/crawler/menu_sidebar.html @@ -25,6 +25,12 @@ Onion Crawler + +
@@ -96,14 +119,21 @@
-
+ -
+ +
Web Domain Explorer @@ -170,6 +200,11 @@ function refresh_crawler_status(){ $('#stat_onion_total').text(data.splash_crawlers_latest_stats['onion']['total']); $('#stat_onion_queue').text(data.splash_crawlers_latest_stats['onion']['domains_queue']); + $('#stat_i2p_domain_up').text(data.splash_crawlers_latest_stats['i2p']['domains_up']); + $('#stat_i2p_domain_down').text(data.splash_crawlers_latest_stats['i2p']['domains_down']); + $('#stat_i2p_total').text(data.splash_crawlers_latest_stats['i2p']['total']); + $('#stat_i2p_queue').text(data.splash_crawlers_latest_stats['i2p']['domains_queue']); + $('#stat_regular_domain_up').text(data.splash_crawlers_latest_stats['regular']['domains_up']); $('#stat_regular_domain_down').text(data.splash_crawlers_latest_stats['regular']['domains_down']); $('#stat_regular_total').text(data.splash_crawlers_latest_stats['regular']['total']); @@ -194,7 +229,11 @@ function refresh_crawler_status(){ if(crawler['type'] === 'onion'){ icon_t = 'fas fa-user-secret'; } else { - icon_t = 'fab fa-html5'; + if(crawler['type'] === 'i2p'){ + icon_t = 'fas fa-ghost'; + } else { + icon_t = 'fab fa-html5'; + } } var newCell = newRow.insertCell(0); diff --git a/var/www/templates/crawler/crawler_splash/domain_explorer.html b/var/www/templates/crawler/crawler_splash/domain_explorer.html index 629cd090..39c8d406 100644 --- a/var/www/templates/crawler/crawler_splash/domain_explorer.html +++ b/var/www/templates/crawler/crawler_splash/domain_explorer.html @@ -81,7 +81,11 @@ {%if domain_type=='onion'%} {% set target_url=url_for('crawler_splash.domains_explorer_onion') + "?domain_type=onion" %} {%else%} - {% set target_url=url_for('crawler_splash.domains_explorer_web') + "?domain_type=regular" %} + {%if domain_type=='i2p'%} + {% set target_url=url_for('crawler_splash.domains_explorer_i2p') + "?domain_type=i2p" %} + {%else%} + {% set target_url=url_for('crawler_splash.domains_explorer_web') + "?domain_type=regular" %} + {%endif%} {%endif%} {%if 'date_from' in dict_data %} {% set target_url = target_url + '&date_from=' + dict_data['date_from'] + '&date_to=' + dict_data['date_to'] %} diff --git a/var/www/templates/crawler/crawler_splash/settings_splash_crawler.html b/var/www/templates/crawler/crawler_splash/settings_splash_crawler.html index 739350e4..2c9e9d58 100644 --- a/var/www/templates/crawler/crawler_splash/settings_splash_crawler.html +++ b/var/www/templates/crawler/crawler_splash/settings_splash_crawler.html @@ -126,7 +126,11 @@ {%if all_splash[splash_name]['type']=='tor'%} {%else%} - + {%if all_splash[splash_name]['type']=='i2p'%} + + {%else%} + + {%endif%} {%endif%} {{all_splash[splash_name]['type']}}