chg: [Crawler UI Tags] add tag by day + add crawler status + UI onion blacklist

This commit is contained in:
Terrtia 2019-02-19 11:41:45 +01:00
parent 32acbef4e9
commit da78d0552d
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
5 changed files with 450 additions and 53 deletions

View file

@ -66,8 +66,6 @@ def load_type_blacklist(type_service):
# load domains blacklist
try:
with open(os.path.join(os.environ['AIL_BIN'],'/torcrawler/blacklist_{}.txt'.format(type_service)), 'r') as f:
# # TODO: # FIXME: remove this
r_onion.delete('blacklist_{}'.format(type_service))
lines = f.read().splitlines()
for line in lines:
r_onion.sadd('blacklist_{}'.format(type_service), line)
@ -176,7 +174,9 @@ if __name__ == '__main__':
crawler_depth_limit = p.config.getint("Crawler", "crawler_depth_limit")
# Crawler status
r_cache.sadd('all_crawler:{}'.format(type_hidden_service), splash_port)
r_cache.sadd('all_crawler:{}'.format(splash_port)
r_cache.sadd('all_crawler:{}:{}'.format(mode, type_hidden_service), splash_port)
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'mode', mode)
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting')
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
@ -296,6 +296,11 @@ if __name__ == '__main__':
# update list, last crawled sites
r_onion.lpush('last_{}'.format(type_hidden_service), domain)
r_onion.ltrim('last_{}'.format(type_hidden_service), 0, 15)
# manual
else:
# update list, last crawled sites
r_onion.lpush('last_crawled_manual', domain)
r_onion.ltrim('last_crawled_manual', 0, 15)
#update crawler status
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting')

View file

@ -241,6 +241,10 @@ class Paste(object):
def _get_p_date(self):
return self.p_date
# used
def get_p_date(self):
return self.p_date
def _get_p_size(self):
return self.p_size

View file

@ -8,6 +8,7 @@ import redis
import datetime
import sys
import os
from pyfaup.faup import Faup
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for
from Date import Date
@ -27,6 +28,8 @@ PASTES_FOLDER = Flask_config.PASTES_FOLDER
hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates')
faup = Faup()
# ============ FUNCTIONS ============
def one():
return 1
@ -68,11 +71,73 @@ def unpack_paste_tags(p_tags):
l_tags.append( (tag, complete_tag) )
return l_tags
def is_valid_onion_domain(onion_domain):
# t
print(onion_domain)
faup.decode(onion_domain)
domain_unpack = faup.get()
if domain_unpack['tld']==b'onion' and domain_unpack['scheme'] is None and domain_unpack['port'] is None and domain_unpack['query_string'] is None:
return True
else:
return False
def get_onion_status(domain, date):
if r_serv_onion.sismember('onion_up:'+date , domain):
return True
else:
return False
def get_domain_type(domain):
type_id = domain.split(':')[-1]
if type_id == 'onion':
return 'onion'
else:
return 'regular'
def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None):
list_crawled_metadata = []
for domain in list_domains_crawled:
metadata_domain = {}
# get Domain type
if type is None:
type = get_domain_type(domain)
metadata_domain['domain'] = domain
metadata_domain['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'last_check')
if metadata_domain['last_check'] is None:
metadata_domain['last_check'] = '********'
metadata_domain['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'first_seen')
if metadata_domain['first_seen'] is None:
metadata_domain['first_seen'] = '********'
if r_serv_onion.sismember('{}_up:{}'.format(type, metadata_domain['last_check']) , domain):
metadata_domain['status_text'] = 'UP'
metadata_domain['status_color'] = 'Green'
metadata_domain['status_icon'] = 'fa-check-circle'
else:
metadata_domain['status_text'] = 'DOWN'
metadata_domain['status_color'] = 'Red'
metadata_domain['status_icon'] = 'fa-times-circle'
list_crawled_metadata.append(metadata_domain)
return list_crawled_metadata
def get_crawler_splash_status(mode, type):
crawler_metadata = []
all_crawlers = r_cache.smembers('all_crawler:{}:{}'.format(mode, type))
for crawler in all_crawlers:
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
crawler_info = '{} - {}'.format(crawler, started_time)
if status_info=='Waiting' or status_info=='Crawling':
status=True
else:
status=False
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
crawler_metadata.append({'crawler_info': '8050 - 2019/02/18 - 16:49.54', 'crawling_domain': 'test', 'status_info': 'Crawling', 'status': True})
crawler_metadata.append({'crawler_info': '8051 - 2019/02/18 - 16:49.54', 'crawling_domain': 'test', 'status_info': 'Crawling', 'status': True})
return crawler_metadata
# ============= ROUTES ==============
@hiddenServices.route("/hiddenServices/2", methods=['GET'])
@ -80,36 +145,44 @@ def hiddenServices_page_test():
return render_template("Crawler_index.html")
@hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET'])
def hiddenServices_page_l():
def crawler_splash_onion():
last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
list_onion = []
now = datetime.datetime.now()
date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d"))
date = now.strftime("%Y%m%d")
statDomains = {}
statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
for onion in last_onions:
metadata_onion = {}
metadata_onion['domain'] = onion
metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check')
if metadata_onion['last_check'] is None:
metadata_onion['last_check'] = '********'
metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen')
if metadata_onion['first_seen'] is None:
metadata_onion['first_seen'] = '********'
if get_onion_status(onion, metadata_onion['last_check']):
metadata_onion['status_text'] = 'UP'
metadata_onion['status_color'] = 'Green'
metadata_onion['status_icon'] = 'fa-check-circle'
else:
metadata_onion['status_text'] = 'DOWN'
metadata_onion['status_color'] = 'Red'
metadata_onion['status_icon'] = 'fa-times-circle'
list_onion.append(metadata_onion)
list_onion = get_last_crawled_domains_metadata(last_onions, date, type='onion')
crawler_metadata = get_crawler_splash_status('automatic', 'onion')
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
return render_template("Crawler_Splash_onion.html", last_onions=list_onion, statDomains=statDomains,
crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
@hiddenServices.route("/crawlers/manual_splash_crawler", methods=['GET'])
def manual_splash_crawler():
now = datetime.datetime.now()
date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d"))
# Stats
# user request == CHECK
# preconf crawlers == ?????
#################################################################################
statDomains = {}
#statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
#statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
#statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
#statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
####################################################################################
last_crawled = r_serv_onion.lrange('last_crawled_manual', 0 ,-1)
list_crawled = get_last_crawled_domains_metadata(last_crawled)
crawler_metadata=[]
all_onion_crawler = r_cache.smembers('all_crawler:onion')
@ -125,16 +198,81 @@ def hiddenServices_page_l():
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
return render_template("Crawler_Splash_onion.html", last_onions=list_onion, statDomains=statDomains,
return render_template("Crawler_Splash_onion.html", last_crawled=list_crawled, statDomains=statDomains,
crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
@hiddenServices.route("/crawlers/blacklisted_onion", methods=['GET'])
def blacklisted_onion():
blacklist_onion = request.args.get('blacklist_onion')
unblacklist_onion = request.args.get('unblacklist_onion')
if blacklist_onion is not None:
blacklist_onion = int(blacklist_onion)
if unblacklist_onion is not None:
unblacklist_onion = int(unblacklist_onion)
try:
page = int(request.args.get('page'))
except:
page = 1
if page <= 0:
page = 1
nb_page_max = r_serv_onion.scard('blacklist_onion')/(1000)
if isinstance(nb_page_max, float):
nb_page_max = int(nb_page_max)+1
if page > nb_page_max:
page = nb_page_max
start = 1000*(page -1)
stop = 1000*page
list_blacklisted = list(r_serv_onion.smembers('blacklist_onion'))
list_blacklisted_1 = list_blacklisted[start:stop]
list_blacklisted_2 = list_blacklisted[stop:stop+1000]
return render_template("blacklisted_onion.html", list_blacklisted_1=list_blacklisted_1, list_blacklisted_2=list_blacklisted_2,
page=page, nb_page_max=nb_page_max,
blacklist_onion=blacklist_onion, unblacklist_onion=unblacklist_onion)
@hiddenServices.route("/crawler/blacklist_onion", methods=['GET'])
def blacklist_onion():
onion = request.args.get('onion')
try:
page = int(request.args.get('page'))
except:
page = 1
if is_valid_onion_domain(onion):
res = r_serv_onion.sadd('blacklist_onion', onion)
print(res)
if page:
if res == 0:
return redirect(url_for('hiddenServices.blacklisted_onion', page=page, blacklist_onion=2))
else:
return redirect(url_for('hiddenServices.blacklisted_onion', page=page, blacklist_onion=1))
else:
return redirect(url_for('hiddenServices.blacklisted_onion', page=page, blacklist_onion=0))
@hiddenServices.route("/crawler/unblacklist_onion", methods=['GET'])
def unblacklist_onion():
onion = request.args.get('onion')
try:
page = int(request.args.get('page'))
except:
page = 1
if is_valid_onion_domain(onion):
res = r_serv_onion.srem('blacklist_onion', onion)
if page:
if res == 0:
return redirect(url_for('hiddenServices.blacklisted_onion', page=page, unblacklist_onion=2))
else:
return redirect(url_for('hiddenServices.blacklisted_onion', page=page, unblacklist_onion=1))
else:
return redirect(url_for('hiddenServices.blacklisted_onion', page=page, unblacklist_onion=0))
@hiddenServices.route("/hiddenServices/", methods=['GET'])
def hiddenServices_page():
last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
list_onion = []
now = datetime.datetime.now()
date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d"))
date = now.strftime("%Y%m%d")
statDomains = {}
statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))

View file

@ -69,14 +69,13 @@
</li>
<li class="nav-item">
<a class="nav-link active" href="#">
<i class="fas fa-search"></i>
<i class="fas fa-sync"></i>
Automatic Onion Crawler
</a>
</li>
<li class="nav-item">
<a class="nav-link" href="#">
<i class="fas fa-search"></i>
<i class="fas fa-clock"></i>
Manual Splash Crawler
</a>
</li>
@ -116,6 +115,10 @@
</table>
</div>
<a href="{{ url_for('hiddenServices.blacklisted_onion') }}">
<button type="button" class="btn btn-outline-danger">Show Blacklisted Onion</button>
</a>
</div>
<div class="col-12 col-xl-6">
@ -178,8 +181,30 @@
<div id="barchart_type">
</div>
<div class="card mt-1 mb-1">
<div class="card-header text-white bg-dark">
Crawlers Status
</div>
<div class="card-body px-0 py-0 ">
<table class="table">
<tbody id="tbody_crawler_info">
{% for crawler in crawler_metadata %}
<tr>
<td>
<i class="fas fa-{%if crawler['status']%}check{%else%}times{%endif%}-circle" style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};"></i> {{crawler['crawler_info']}}
</td>
<td>
{{crawler['crawling_domain']}}
</td>
<td style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};">
{{crawler['status_info']}}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
</div>
@ -283,7 +308,7 @@ function refresh_list_crawled(){
}
var newCell = newRow.insertCell(0);
newCell.innerHTML = "<td><i class=\"fa fa-"+icon+"-circle fa-2x\" style=\"color:"+text_color+";\"></i>"+crawler['crawler_info']+"</td>";
newCell.innerHTML = "<td><i class=\"fa fa-"+icon+"-circle\" style=\"color:"+text_color+";\"></i>"+crawler['crawler_info']+"</td>";
newCell = newRow.insertCell(1);
newCell.innerHTML = "<td><a target=\"_blank\" href=\"{{ url_for('hiddenServices.onion_domain') }}?onion_domain="+crawler['crawling_domain']+"\">"+crawler['crawling_domain']+"</a></td>";

View file

@ -0,0 +1,225 @@
<!DOCTYPE html>
<html>
<head>
<title>AIL-Framework</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
<div class="col-12 col-lg-2 p-0 bg-light border-right">
<nav class="navbar navbar-expand navbar-light bg-light flex-md-column flex-row align-items-start py-2">
<h5 class="d-flex text-muted w-100">
<span>Splash Crawlers </span>
<a class="ml-auto" href="#">
<i class="fas fa-plus-circle ml-auto"></i>
</a>
</h5>
<ul class="nav flex-md-column flex-row navbar-nav justify-content-between w-100">
<li class="nav-item">
<a class="nav-link" href="#">
<i class="fas fa-search"></i>
<span>Dashboard</span>
</a>
</li>
<li class="nav-item">
<a class="nav-link" href="{{ url_for('hiddenServices.crawler_splash_onion') }}">
<i class="fas fa-sync"></i>
Automatic Onion Crawler
</a>
</li>
<li class="nav-item">
<a class="nav-link" href="#">
<i class="fas fa-clock"></i>
Manual Splash Crawler
</a>
</li>
</ul>
</nav>
</div>
<div class="col-12 col-lg-10">
<div class="card-deck justify-content-center mx-0">
<div class="card border-dark mt-2">
<div class="card-header bg-dark text-white">
Blacklisted Onions
</div>
<div class="card-body text-dark">
<div class="row">
<div class="col-12 col-md-6">
<div class="card text-center border-danger">
<div class="card-body text-danger">
<h5 class="card-title">Blacklist Onion</h5>
<input type="text" class="form-control {%if blacklist_onion is not none %}{%if blacklist_onion==1 %}is-valid{% else %}is-invalid{%endif%}{%endif%}" id="blacklist_onion_input" placeholder="Onion Address">
<div class="invalid-feedback">
{%if blacklist_onion==2 %}
This Onion is already blacklisted
{% else %}
Incorrect Onion address
{% endif %}
</div>
<div class="valid-feedback">
Onion Blacklisted
</div>
<button type="button" class="btn btn-danger mt-2" onclick="window.location.href ='{{ url_for('hiddenServices.blacklist_onion') }}?redirect=0&onion='+$('#blacklist_onion_input').val();">Blacklist Onion</button>
</div>
</div>
</div>
<div class="col-12 col-md-6 mt-4 mt-md-0">
<div class="card text-center border-success">
<div class="card-body">
<h5 class="card-title">Unblacklist Onion</h5>
<input type="text" class="form-control {%if unblacklist_onion is not none %}{%if unblacklist_onion==1 %}is-valid{% else %}is-invalid{%endif%}{%endif%}" id="unblacklist_onion_input" placeholder="Onion Address">
<div class="invalid-feedback">
{%if unblacklist_onion==2 %}
This Onion is not blacklisted
{% else %}
Incorrect Onion address
{% endif %}
</div>
<div class="valid-feedback">
Onion Unblacklisted
</div>
<button type="button" class="btn btn-outline-secondary mt-2" onclick="window.location.href ='{{ url_for('hiddenServices.unblacklist_onion') }}?redirect=0&onion='+$('#unblacklist_onion_input').val();">Unblacklist Onion</button>
</div>
</div>
</div>
</div>
<div class="row mt-4">
<div class="col-12 col-xl-6">
<table class="table table-striped table-bordered table-hover" id="myTable_1">
<thead class="thead-dark">
<tr>
<th style="max-width: 800px;">Onion</th>
<th style="max-width: 800px;">Unblacklist Onion</th>
</tr>
</thead>
<tbody>
{% for onion in list_blacklisted_1 %}
<tr>
<td>{{onion}}</td>
<td>
<a href="{{ url_for('hiddenServices.unblacklist_onion') }}?page={{page}}&onion={{onion}}">
<button type="button" class="btn btn-outline-danger">UnBlacklist Onion</button>
</a>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="col-12 col-xl-6">
<table class="table table-striped table-bordered table-hover" id="myTable_2">
<thead class="thead-dark">
<tr>
<th style="max-width: 800px;">Onion</th>
<th style="max-width: 800px;">Unblacklist Onion</th>
</tr>
</thead>
<tbody>
{% for onion in list_blacklisted_2 %}
<tr>
<td>{{onion}}</td>
<td>
<a href="{{ url_for('hiddenServices.unblacklist_onion') }}?page={{page}}&onion={{onion}}">
<button type="button" class="btn btn-outline-danger">UnBlacklist Onion</button>
</a>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
</div>
</div>
<div class="d-flex justify-content-center">
<nav class="mt-4" aria-label="...">
<ul class="pagination">
<li class="page-item {%if page==1%}disabled{%endif%}">
<a class="page-link" href="{{ url_for('hiddenServices.blacklisted_onion') }}?page={{page-1}}">Previous</a>
</li>
{%if page>3%}
<li class="page-item"><a class="page-link" href="{{ url_for('hiddenServices.blacklisted_onion') }}?page=1">1</a></li>
<li class="page-item disabled"><a class="page-link" aria-disabled="true" href="#">...</a></li>
<li class="page-item"><a class="page-link" href="{{ url_for('hiddenServices.blacklisted_onion') }}?page={{page-1}}">{{page-1}}</a></li>
<li class="page-item active"><a class="page-link" href="{{ url_for('hiddenServices.blacklisted_onion') }}?page={{page}}">{{page}}</a></li>
{%else%}
{%if page>2%}<li class="page-item"><a class="page-link" href="{{ url_for('hiddenServices.blacklisted_onion') }}?page={{page-2}}">{{page-2}}</a></li>{%endif%}
{%if page>1%}<li class="page-item"><a class="page-link" href="{{ url_for('hiddenServices.blacklisted_onion') }}?page={{page-1}}">{{page-1}}</a></li>{%endif%}
<li class="page-item active"><a class="page-link" href="{{ url_for('hiddenServices.blacklisted_onion') }}?page={{page}}">{{page}}</a></li>
{%endif%}
{%if nb_page_max-page>3%}
<li class="page-item"><a class="page-link" href="{{ url_for('hiddenServices.blacklisted_onion') }}?page={{page+1}}">{{page+1}}</a></li>
<li class="page-item disabled"><a class="page-link" aria-disabled="true" href="#">...</a></li>
<li class="page-item"><a class="page-link" href="{{ url_for('hiddenServices.blacklisted_onion') }}?page={{nb_page_max}}">{{nb_page_max}}</a></li>
{%else%}
{%if nb_page_max-page>2%}<li class="page-item"><a class="page-link" href="{{ url_for('hiddenServices.blacklisted_onion') }}?page={{nb_page_max-2}}">{{nb_page_max-2}}</a></li>{%endif%}
{%if nb_page_max-page>1%}<li class="page-item"><a class="page-link" href="{{ url_for('hiddenServices.blacklisted_onion') }}?page={{nb_page_max-1}}">{{nb_page_max-1}}</a></li>{%endif%}
{%if nb_page_max-page>0%}<li class="page-item"><a class="page-link" href="{{ url_for('hiddenServices.blacklisted_onion') }}?page={{nb_page_max}}">{{nb_page_max}}</a></li>{%endif%}
{%endif%}
<li class="page-item {%if page==nb_page_max%}disabled{%endif%}">
<a class="page-link" href="{{ url_for('hiddenServices.blacklisted_onion') }}?page={{page+1}}" aria-disabled="true">Next</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
</body>
<script>
var table
$(document).ready(function(){
table = $('#myTable_1').DataTable(
{
/*"aLengthMenu": [[5, 10, 15, 20, -1], [5, 10, 15, 20, "All"]],
"iDisplayLength": 10,*/
"order": [[ 0, "asc" ]]
}
);
table = $('#myTable_2').DataTable(
{
/*"aLengthMenu": [[5, 10, 15, 20, -1], [5, 10, 15, 20, "All"]],
"iDisplayLength": 10,*/
"order": [[ 0, "asc" ]]
}
);
});
</script>