From 65f6ee4911126108cdc37e8ef948627e9677e290 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 18 Aug 2020 19:10:38 +0200 Subject: [PATCH] chg: [crawlers manager] show setings --- bin/lib/crawlers.py | 57 ++++- var/www/blueprints/crawler_splash.py | 15 +- .../settings_splash_crawler.html | 216 ++++++++++++++++++ var/www/templates/crawler/menu_sidebar.html | 2 +- 4 files changed, 285 insertions(+), 5 deletions(-) create mode 100644 var/www/templates/crawler/crawler_splash/settings_splash_crawler.html diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index 6448843d..13c8759a 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -667,6 +667,13 @@ def get_splash_manager_url(reload=False): # TODO: add in db config def get_splash_api_key(reload=False): # TODO: add in db config return splash_api_key +def get_hidden_splash_api_key(): # TODO: add in db config + key = get_splash_api_key() + if len(key)==41: + return f'{key[:4]}*********************************{key[-4:]}' + else: + return None + def get_splash_url_from_manager_url(splash_manager_url, splash_port): url = urlparse(splash_manager_url) host = url.netloc.split(':', 1)[0] @@ -780,7 +787,23 @@ def get_splash_name_by_url(splash_url): def get_splash_crawler_type(splash_name): return r_serv_onion.hget('splash:metadata:{}'.format(splash_name), 'crawler_type') -def get_all_splash_by_proxy(proxy_name): +def get_splash_crawler_description(splash_name): + return r_serv_onion.hget('splash:metadata:{}'.format(splash_name), 'description') + +def get_splash_crawler_metadata(splash_name): + dict_splash = {} + dict_splash['proxy'] = get_splash_proxy(splash_name) + dict_splash['type'] = get_splash_crawler_type(splash_name) + dict_splash['description'] = get_splash_crawler_description(splash_name) + return dict_splash + +def get_all_splash_crawler_metadata(): + dict_splash = {} + for splash_name in get_all_splash(): + dict_splash[splash_name] = get_splash_crawler_metadata(splash_name) + return dict_splash + +def get_all_splash_by_proxy(proxy_name, r_list=False): res = r_serv_onion.smembers('proxy:splash:{}'.format(proxy_name)) if res: if r_list: @@ -816,6 +839,36 @@ def delete_all_proxies(): for proxy_name in get_all_proxies(): delete_proxy(proxy_name) +def get_proxy_host(proxy_name): + return r_serv_onion.hget('proxy:metadata:{}'.format(proxy_name), 'host') + +def get_proxy_port(proxy_name): + return r_serv_onion.hget('proxy:metadata:{}'.format(proxy_name), 'port') + +def get_proxy_type(proxy_name): + return r_serv_onion.hget('proxy:metadata:{}'.format(proxy_name), 'type') + +def get_proxy_crawler_type(proxy_name): + return r_serv_onion.hget('proxy:metadata:{}'.format(proxy_name), 'crawler_type') + +def get_proxy_description(proxy_name): + return r_serv_onion.hget('proxy:metadata:{}'.format(proxy_name), 'description') + +def get_proxy_metadata(proxy_name): + meta_dict = {} + meta_dict['host'] = get_proxy_host(proxy_name) + meta_dict['port'] = get_proxy_port(proxy_name) + meta_dict['type'] = get_proxy_type(proxy_name) + meta_dict['crawler_type'] = get_proxy_crawler_type(proxy_name) + meta_dict['description'] = get_proxy_description(proxy_name) + return meta_dict + +def get_all_proxies_metadata(): + all_proxy_dict = {} + for proxy_name in get_all_proxies(): + all_proxy_dict[proxy_name] = get_proxy_metadata(proxy_name) + return all_proxy_dict + def set_proxy_used_in_discovery(proxy_name, value): r_serv_onion.hset('splash:metadata:{}'.format(splash_name), 'discovery_queue', value) @@ -851,6 +904,7 @@ def load_all_splash_containers(): r_serv_onion.set('splash:map:url:name:{}'.format(splash_url), splash_name) def load_all_proxy(): + delete_all_proxies() all_proxies = get_all_splash_manager_proxies() for proxy_name in all_proxies: proxy_dict = all_proxies[proxy_name] @@ -861,6 +915,7 @@ def load_all_proxy(): description = all_proxies[proxy_name].get('description', None) if description: r_serv_onion.hset('proxy:metadata:{}'.format(proxy_name), 'description', description) + r_serv_onion.sadd('all_proxy', proxy_name) def reload_splash_and_proxies_list(): if ping_splash_manager(): diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index 5d9324ed..8bc9c1b1 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -428,11 +428,20 @@ def crawler_cookiejar_cookie_json_add_post(): return redirect(url_for('crawler_splash.crawler_cookiejar_cookie_add', cookiejar_uuid=cookiejar_uuid)) -@crawler_splash.route('/crawler/cookiejar/cookie/json_add_post', methods=['GET']) +@crawler_splash.route('/crawler/settings', methods=['GET']) @login_required @login_analyst def crawler_splash_setings(): - - return render_template("settings_splash_crawler.html", cookiejar_uuid=True, cookie_uuid=False) + all_proxies = crawlers.get_all_proxies_metadata() + all_splash = crawlers.get_all_splash_crawler_metadata() + + splash_manager_url = crawlers.get_splash_manager_url() + api_key = crawlers.get_hidden_splash_api_key() + is_manager_connected = crawlers.ping_splash_manager() + + return render_template("settings_splash_crawler.html", + is_manager_connected=is_manager_connected, + splash_manager_url=splash_manager_url, api_key=api_key, + all_splash=all_splash, all_proxies=all_proxies) ## - - ## diff --git a/var/www/templates/crawler/crawler_splash/settings_splash_crawler.html b/var/www/templates/crawler/crawler_splash/settings_splash_crawler.html new file mode 100644 index 00000000..c1f8204a --- /dev/null +++ b/var/www/templates/crawler/crawler_splash/settings_splash_crawler.html @@ -0,0 +1,216 @@ + + + + + AIL-Framework + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'crawler/menu_sidebar.html' %} + +
+ + {%if not is_manager_connected%} + {% include 'crawler/crawler_disabled.html' %} + {%endif%} + +
+
+ + + +
+
+ +
+
+ + +
+
+ + {% if is_manager_connected %} +
+ + Connected +
+ {% else %} +
+ + Error +
+ {% endif %} +
+

Splash Crawler Manager

+
+
+ +
+
+
+
+ + + + + + + + + + + +
Splash Manager URL{{splash_manager_url}}
API Key + {{api_key}} + +
+
+
+
+
+ +
+
+
All Splash Crawlers:
+ + + + + + + + + {% for splash_name in all_splash %} + + + + + + + {% endfor %} + +
+ Splash name + + Proxy + + Crawler type + + Description +
+ {{splash_name}} + + {{all_splash[splash_name]['proxy']}} + + {%if all_splash[splash_name]['type']=='tor'%} + + {%else%} + + {%endif%} + {{all_splash[splash_name]['type']}} + + {{all_splash[splash_name]['description']}} +
+
+
+ +
+
+
All Proxies:
+ + + + + + + + + + + {% for proxy_name in all_proxies %} + + + + + + + + + {% endfor %} + +
+ Proxy name + + Host + + Port + + Type + + Crawler Type + + Description +
+ {{proxy_name}} + + {{all_proxies[proxy_name]['host']}} + + {{all_proxies[proxy_name]['port']}} + + {{all_proxies[proxy_name]['type']}} + + {%if all_proxies[proxy_name]['crawler_type']=='tor'%} + + {%else%} + + {%endif%} + {{all_proxies[proxy_name]['crawler_type']}} + + {{all_proxies[proxy_name]['description']}} +
+
+
+ +
+
+ +
+
+

Crawlers Settings

+
+
+ +
+
+ +
+
+
+ + + + + diff --git a/var/www/templates/crawler/menu_sidebar.html b/var/www/templates/crawler/menu_sidebar.html index 66a5f4f5..d3ed9170 100644 --- a/var/www/templates/crawler/menu_sidebar.html +++ b/var/www/templates/crawler/menu_sidebar.html @@ -44,7 +44,7 @@