From c2885589cf6cd57b6082d471f88352fcbf2d871d Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 7 Feb 2019 17:22:44 +0100 Subject: [PATCH] chg: [UI] basic navbar + sidebar + refractor --- bin/Crawler.py | 118 +++++++++--------- .../hiddenServices/Flask_hiddenServices.py | 4 + .../templates/Crawler_index.html | 82 ++++++++++++ var/www/templates/nav_bar.html | 46 +++++++ var/www/update_thirdparty.sh | 6 +- 5 files changed, 192 insertions(+), 64 deletions(-) create mode 100644 var/www/modules/hiddenServices/templates/Crawler_index.html create mode 100644 var/www/templates/nav_bar.html diff --git a/bin/Crawler.py b/bin/Crawler.py index e1591d55..278ecc05 100755 --- a/bin/Crawler.py +++ b/bin/Crawler.py @@ -16,6 +16,47 @@ sys.path.append(os.environ['AIL_BIN']) from Helper import Process from pubsublogger import publisher +# ======== GLOBAL VARIABLES ======== +publisher.port = 6380 +publisher.channel = "Script" + +config_section = 'Crawler' + +# Setup the I/O queues +p = Process(config_section) + +accepted_services = ['onion', 'regular'] + +dic_regex = {} +dic_regex['onion'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" +re.compile(dic_regex['onion']) +dic_regex['i2p'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" +re.compile(dic_regex['i2p']) +dic_regex['regular'] = dic_regex['i2p'] + +faup = Faup() + +PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) + +r_serv_metadata = redis.StrictRedis( + host=p.config.get("ARDB_Metadata", "host"), + port=p.config.getint("ARDB_Metadata", "port"), + db=p.config.getint("ARDB_Metadata", "db"), + decode_responses=True) + +r_cache = redis.StrictRedis( + host=p.config.get("Redis_Cache", "host"), + port=p.config.getint("Redis_Cache", "port"), + db=p.config.getint("Redis_Cache", "db"), + decode_responses=True) + +r_onion = redis.StrictRedis( + host=p.config.get("ARDB_Onion", "host"), + port=p.config.getint("ARDB_Onion", "port"), + db=p.config.getint("ARDB_Onion", "db"), + decode_responses=True) + +# ======== FUNCTIONS ======== def decode_val(value): if value is not None: value = value.decode() @@ -105,7 +146,7 @@ def crawl_onion(url, domain, date, date_month, message): r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling') exit(1) - +# ======== MAIN ======== if __name__ == '__main__': if len(sys.argv) != 3: @@ -119,83 +160,38 @@ if __name__ == '__main__': if mode == 'automatic': type_hidden_service = 'onion' - publisher.port = 6380 - publisher.channel = "Script" - - publisher.info("Script Crawler started") - - config_section = 'Crawler' - - # Setup the I/O queues - p = Process(config_section) - - accepted_services = ['onion', 'regular'] - - dic_regex = {} - dic_regex['onion'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" - re.compile(dic_regex['onion']) - dic_regex['i2p'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" - re.compile(dic_regex['i2p']) - dic_regex['regular'] = dic_regex['i2p'] - - - url_onion = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" - re.compile(url_onion) - url_i2p = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" - re.compile(url_i2p) - - if type_hidden_service == 'onion': - regex_hidden_service = url_onion - elif type_hidden_service == 'i2p': - regex_hidden_service = url_i2p - elif type_hidden_service == 'regular': - regex_hidden_service = url_i2p - else: + # verify crawler type (type_hidden_service) + if type_hidden_service not in accepted_services: print('incorrect crawler type: {}'.format(type_hidden_service)) exit(0) + else: + publisher.info("Script Crawler started") + + # load domains blacklist + load_type_blacklist(type_hidden_service) splash_url = '{}:{}'.format( p.config.get("Crawler", "splash_url_onion"), splash_port) print('splash url: {}'.format(splash_url)) crawler_depth_limit = p.config.getint("Crawler", "crawler_depth_limit") - faup = Faup() - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) - - r_serv_metadata = redis.StrictRedis( - host=p.config.get("ARDB_Metadata", "host"), - port=p.config.getint("ARDB_Metadata", "port"), - db=p.config.getint("ARDB_Metadata", "db"), - decode_responses=True) - - r_cache = redis.StrictRedis( - host=p.config.get("Redis_Cache", "host"), - port=p.config.getint("Redis_Cache", "port"), - db=p.config.getint("Redis_Cache", "db"), - decode_responses=True) - - r_onion = redis.StrictRedis( - host=p.config.get("ARDB_Onion", "host"), - port=p.config.getint("ARDB_Onion", "port"), - db=p.config.getint("ARDB_Onion", "db"), - decode_responses=True) # Crawler status r_cache.sadd('all_crawler:{}'.format(type_hidden_service), splash_port) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting') r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S")) - # load domains blacklist - load_type_blacklist(type_hidden_service) while True: - # Priority Queue - Recovering the streamed message informations. - message = r_onion.spop('{}_crawler_priority_queue'.format(type_hidden_service)) + if mode == 'automatic': + # Priority Queue - Recovering the streamed message informations. + message = r_onion.spop('{}_crawler_priority_queue'.format(type_hidden_service)) - if message is None: - # Recovering the streamed message informations. - message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service)) + if message is None: + # Recovering the streamed message informations. + message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service)) + else: + pass if message is not None: diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index cc977976..965255fb 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -75,6 +75,10 @@ def get_onion_status(domain, date): return False # ============= ROUTES ============== +@hiddenServices.route("/hiddenServices/2", methods=['GET']) +def hiddenServices_page_test(): + return render_template("Crawler_index.html") + @hiddenServices.route("/hiddenServices/", methods=['GET']) def hiddenServices_page(): last_onions = r_serv_onion.lrange('last_onion', 0 ,-1) diff --git a/var/www/modules/hiddenServices/templates/Crawler_index.html b/var/www/modules/hiddenServices/templates/Crawler_index.html new file mode 100644 index 00000000..5b2137ff --- /dev/null +++ b/var/www/modules/hiddenServices/templates/Crawler_index.html @@ -0,0 +1,82 @@ + + + + + AIL-Framework + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + + + + + + + + +
+
+					--------------
+
+
+
+					--------------
+
+				
+
+ +
+
+ + + + + diff --git a/var/www/templates/nav_bar.html b/var/www/templates/nav_bar.html new file mode 100644 index 00000000..d10c6cea --- /dev/null +++ b/var/www/templates/nav_bar.html @@ -0,0 +1,46 @@ + diff --git a/var/www/update_thirdparty.sh b/var/www/update_thirdparty.sh index 01a73136..e03d2af7 100755 --- a/var/www/update_thirdparty.sh +++ b/var/www/update_thirdparty.sh @@ -31,9 +31,9 @@ unzip temp/d3_${D3_JS_VERSION}.zip -d temp/ unzip temp/moment_2.22.2.zip -d temp/ unzip temp/daterangepicker_v0.18.0.zip -d temp/ -mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/js/bootstrap.min.js ./static/js/ -mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css ./static/css/ -mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css.map ./static/css/ +mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/js/bootstrap.min.js ./static/js/bootstrap4.min.js +mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css ./static/css/bootstrap4.min.css +mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css.map ./static/css/bootstrap4.min.css.map mv temp/startbootstrap-sb-admin-${SBADMIN_VERSION} temp/sb-admin mv temp/startbootstrap-sb-admin-2-${SBADMIN_VERSION} temp/sb-admin-2