chg: [Crawler] domains stats + logs + clean

This commit is contained in:
Terrtia 2018-09-28 15:23:27 +02:00
parent ecb2857151
commit 82e6df4b94
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
5 changed files with 55 additions and 32 deletions

View file

@ -14,10 +14,6 @@ sys.path.append(os.environ['AIL_BIN'])
from Helper import Process from Helper import Process
from pubsublogger import publisher from pubsublogger import publisher
def signal_handler(sig, frame):
sys.exit(0)
def on_error_send_message_back_in_queue(type_hidden_service, domain, message): def on_error_send_message_back_in_queue(type_hidden_service, domain, message):
# send this msg back in the queue # send this msg back in the queue
if not r_onion.sismember('{}_domain_crawler_queue'.format(type_hidden_service), domain): if not r_onion.sismember('{}_domain_crawler_queue'.format(type_hidden_service), domain):
@ -34,9 +30,10 @@ def crawl_onion(url, domain, date, date_month, message):
try: try:
r = requests.get(splash_url , timeout=30.0) r = requests.get(splash_url , timeout=30.0)
except Exception: except Exception:
## FIXME: # TODO: relaunch docker or send error message # TODO: relaunch docker or send error message
on_error_send_message_back_in_queue(type_hidden_service, domain, message) on_error_send_message_back_in_queue(type_hidden_service, domain, message)
publisher.error('{} SPASH DOWN'.format(splash_url))
print('--------------------------------------') print('--------------------------------------')
print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' \033[91m DOCKER SPLASH DOWN\033[0m')
print(' {} DOWN'.format(splash_url)) print(' {} DOWN'.format(splash_url))
@ -54,6 +51,7 @@ def crawl_onion(url, domain, date, date_month, message):
# error: splash:Connection to proxy refused # error: splash:Connection to proxy refused
if 'Connection to proxy refused' in output: if 'Connection to proxy refused' in output:
on_error_send_message_back_in_queue(type_hidden_service, domain, message) on_error_send_message_back_in_queue(type_hidden_service, domain, message)
publisher.error('{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format(splash_url))
print('------------------------------------------------------------------------') print('------------------------------------------------------------------------')
print(' \033[91m SPLASH: Connection to proxy refused') print(' \033[91m SPLASH: Connection to proxy refused')
print('') print('')
@ -114,8 +112,6 @@ if __name__ == '__main__':
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
#signal.signal(signal.SIGINT, signal_handler)
r_serv_metadata = redis.StrictRedis( r_serv_metadata = redis.StrictRedis(
host=p.config.get("ARDB_Metadata", "host"), host=p.config.get("ARDB_Metadata", "host"),
port=p.config.getint("ARDB_Metadata", "port"), port=p.config.getint("ARDB_Metadata", "port"),
@ -136,26 +132,15 @@ if __name__ == '__main__':
while True: while True:
# Recovering the streamed message informations. http://eepsites.i2p # Recovering the streamed message informations.
message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service)) message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service))
# # FIXME: remove
if message is None:
print('get ardb message')
message = r_onion.spop('mess_onion')
if message is not None: if message is not None:
splitted = message.split(';') splitted = message.split(';')
if len(splitted) == 2: if len(splitted) == 2:
url, paste = splitted url, paste = splitted
paste = paste.replace(PASTES_FOLDER+'/', '') paste = paste.replace(PASTES_FOLDER+'/', '')
'''
if not '.onion' in url:
print('not onion')
continue
'''
url_list = re.findall(regex_hidden_service, url)[0] url_list = re.findall(regex_hidden_service, url)[0]
if url_list[1] == '': if url_list[1] == '':
@ -238,12 +223,6 @@ if __name__ == '__main__':
r_onion.lpush('last_{}'.format(type_hidden_service), domain) r_onion.lpush('last_{}'.format(type_hidden_service), domain)
r_onion.ltrim('last_{}'.format(type_hidden_service), 0, 15) r_onion.ltrim('last_{}'.format(type_hidden_service), 0, 15)
#send all crawled domain past
#msg = domain
#p.populate_set_out(msg, 'DomainSubject')
#time.sleep(30)
else: else:
continue continue
else: else:

View file

@ -58,9 +58,6 @@ pycountry
# To fetch Onion urls # To fetch Onion urls
PySocks PySocks
#extract subject
newspaper3k
# decompress files # decompress files
sflock sflock

View file

@ -68,6 +68,13 @@ def hiddenServices_page():
last_onions = r_serv_onion.lrange('last_onion', 0 ,-1) last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
list_onion = [] list_onion = []
now = datetime.datetime.now()
date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d"))
statDomains = {}
statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
for onion in last_onions: for onion in last_onions:
metadata_onion = {} metadata_onion = {}
metadata_onion['domain'] = onion metadata_onion['domain'] = onion
@ -83,7 +90,7 @@ def hiddenServices_page():
metadata_onion['status_icon'] = 'fa-times-circle' metadata_onion['status_icon'] = 'fa-times-circle'
list_onion.append(metadata_onion) list_onion.append(metadata_onion)
return render_template("hiddenServices.html", last_onions=list_onion) return render_template("hiddenServices.html", last_onions=list_onion, statDomains=statDomains)
@hiddenServices.route("/hiddenServices/onion_domain", methods=['GET']) @hiddenServices.route("/hiddenServices/onion_domain", methods=['GET'])
def onion_domain(): def onion_domain():

View file

@ -80,10 +80,50 @@
</div> </div>
</div> </div>
</div>
</div> </div>
<div class="col-md-6"> <div class="col-md-6">
<div class="panel panel-info">
<div class="panel-heading">
<i class="fa fa-eye-slash"></i> Domains Crawled Today
</div>
<table class="table table-hover table-striped">
<tbody>
<tr>
<td>
<div style="color:Green; display:inline-block">
<i class="fa fa-check-circle fa-2x"></i>
Domains UP
</div>
</td>
<td>
<div style="color:Green; display:inline-block">
{{ statDomains['domains_up'] }}
</div>
</td>
</tr>
<tr>
<td>
<div style="color:Red; display:inline-block">
<i class="fa fa-times-circle fa-2x"></i>
Domains DOWN
</div>
</td>
<td>
<div style="color:Red; display:inline-block">
{{ statDomains['domains_down'] }}
</div>
</td>
</tr>
<tr>
<td>Crawled Domains</td>
<td>{{ statDomains['total'] }}</td>
</tr>
</tbody>
</table>
</div>
</div> </div>
</div> </div>
@ -125,7 +165,8 @@ function create_line_chart(id, url){
var line = d3.line() var line = d3.line()
.x(function(d) { .x(function(d) {
return x(d.date); return x(d.date);
}).y(function(d) { })
.y(function(d) {
return y(d.value); return y(d.value);
}); });
var svg_line = d3.select('#'+id).append('svg') var svg_line = d3.select('#'+id).append('svg')

View file

@ -5,7 +5,6 @@
Flask functions and routes for the trending modules page Flask functions and routes for the trending modules page
''' '''
import redis import redis
import os
import json import json
import os import os
import flask import flask