From 82e6df4b94d7d5c5ee6f819d78da4574ce37eafd Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 28 Sep 2018 15:23:27 +0200 Subject: [PATCH] chg: [Crawler] domains stats + logs + clean --- bin/Crawler.py | 29 ++---------- pip3_packages_requirement.txt | 3 -- .../hiddenServices/Flask_hiddenServices.py | 9 +++- .../templates/hiddenServices.html | 45 ++++++++++++++++++- var/www/modules/showpaste/Flask_showpaste.py | 1 - 5 files changed, 55 insertions(+), 32 deletions(-) diff --git a/bin/Crawler.py b/bin/Crawler.py index 30d3ffb2..9ebff043 100755 --- a/bin/Crawler.py +++ b/bin/Crawler.py @@ -14,10 +14,6 @@ sys.path.append(os.environ['AIL_BIN']) from Helper import Process from pubsublogger import publisher - -def signal_handler(sig, frame): - sys.exit(0) - def on_error_send_message_back_in_queue(type_hidden_service, domain, message): # send this msg back in the queue if not r_onion.sismember('{}_domain_crawler_queue'.format(type_hidden_service), domain): @@ -34,9 +30,10 @@ def crawl_onion(url, domain, date, date_month, message): try: r = requests.get(splash_url , timeout=30.0) except Exception: - ## FIXME: # TODO: relaunch docker or send error message + # TODO: relaunch docker or send error message on_error_send_message_back_in_queue(type_hidden_service, domain, message) + publisher.error('{} SPASH DOWN'.format(splash_url)) print('--------------------------------------') print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' {} DOWN'.format(splash_url)) @@ -54,6 +51,7 @@ def crawl_onion(url, domain, date, date_month, message): # error: splash:Connection to proxy refused if 'Connection to proxy refused' in output: on_error_send_message_back_in_queue(type_hidden_service, domain, message) + publisher.error('{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format(splash_url)) print('------------------------------------------------------------------------') print(' \033[91m SPLASH: Connection to proxy refused') print('') @@ -114,8 +112,6 @@ if __name__ == '__main__': PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) - #signal.signal(signal.SIGINT, signal_handler) - r_serv_metadata = redis.StrictRedis( host=p.config.get("ARDB_Metadata", "host"), port=p.config.getint("ARDB_Metadata", "port"), @@ -136,26 +132,15 @@ if __name__ == '__main__': while True: - # Recovering the streamed message informations. http://eepsites.i2p + # Recovering the streamed message informations. message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service)) - # # FIXME: remove - if message is None: - print('get ardb message') - message = r_onion.spop('mess_onion') - if message is not None: splitted = message.split(';') if len(splitted) == 2: url, paste = splitted paste = paste.replace(PASTES_FOLDER+'/', '') - ''' - if not '.onion' in url: - print('not onion') - continue - ''' - url_list = re.findall(regex_hidden_service, url)[0] if url_list[1] == '': @@ -238,12 +223,6 @@ if __name__ == '__main__': r_onion.lpush('last_{}'.format(type_hidden_service), domain) r_onion.ltrim('last_{}'.format(type_hidden_service), 0, 15) - #send all crawled domain past - #msg = domain - #p.populate_set_out(msg, 'DomainSubject') - - #time.sleep(30) - else: continue else: diff --git a/pip3_packages_requirement.txt b/pip3_packages_requirement.txt index cc1d0543..dd447d5c 100644 --- a/pip3_packages_requirement.txt +++ b/pip3_packages_requirement.txt @@ -58,9 +58,6 @@ pycountry # To fetch Onion urls PySocks -#extract subject -newspaper3k - # decompress files sflock diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index 9613a7e5..64c23f65 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -68,6 +68,13 @@ def hiddenServices_page(): last_onions = r_serv_onion.lrange('last_onion', 0 ,-1) list_onion = [] + now = datetime.datetime.now() + date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d")) + statDomains = {} + statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date)) + statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date)) + statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] + for onion in last_onions: metadata_onion = {} metadata_onion['domain'] = onion @@ -83,7 +90,7 @@ def hiddenServices_page(): metadata_onion['status_icon'] = 'fa-times-circle' list_onion.append(metadata_onion) - return render_template("hiddenServices.html", last_onions=list_onion) + return render_template("hiddenServices.html", last_onions=list_onion, statDomains=statDomains) @hiddenServices.route("/hiddenServices/onion_domain", methods=['GET']) def onion_domain(): diff --git a/var/www/modules/hiddenServices/templates/hiddenServices.html b/var/www/modules/hiddenServices/templates/hiddenServices.html index 1784aa72..292a70d9 100644 --- a/var/www/modules/hiddenServices/templates/hiddenServices.html +++ b/var/www/modules/hiddenServices/templates/hiddenServices.html @@ -80,10 +80,50 @@ - +
+
+
+ Domains Crawled Today +
+ + + + + + + + + + + + + + + + +
+
+ + Domains UP +
+
+
+ {{ statDomains['domains_up'] }} +
+
+
+ + Domains DOWN +
+
+
+ {{ statDomains['domains_down'] }} +
+
Crawled Domains{{ statDomains['total'] }}
+
@@ -125,7 +165,8 @@ function create_line_chart(id, url){ var line = d3.line() .x(function(d) { return x(d.date); - }).y(function(d) { + }) + .y(function(d) { return y(d.value); }); var svg_line = d3.select('#'+id).append('svg') diff --git a/var/www/modules/showpaste/Flask_showpaste.py b/var/www/modules/showpaste/Flask_showpaste.py index f79239a3..4912e7b0 100644 --- a/var/www/modules/showpaste/Flask_showpaste.py +++ b/var/www/modules/showpaste/Flask_showpaste.py @@ -5,7 +5,6 @@ Flask functions and routes for the trending modules page ''' import redis -import os import json import os import flask