mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 00:28:22 +00:00
chg: [UI] basic navbar + sidebar + refractor
This commit is contained in:
parent
516238025f
commit
c2885589cf
5 changed files with 192 additions and 64 deletions
118
bin/Crawler.py
118
bin/Crawler.py
|
@ -16,6 +16,47 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
|
# ======== GLOBAL VARIABLES ========
|
||||||
|
publisher.port = 6380
|
||||||
|
publisher.channel = "Script"
|
||||||
|
|
||||||
|
config_section = 'Crawler'
|
||||||
|
|
||||||
|
# Setup the I/O queues
|
||||||
|
p = Process(config_section)
|
||||||
|
|
||||||
|
accepted_services = ['onion', 'regular']
|
||||||
|
|
||||||
|
dic_regex = {}
|
||||||
|
dic_regex['onion'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||||
|
re.compile(dic_regex['onion'])
|
||||||
|
dic_regex['i2p'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||||
|
re.compile(dic_regex['i2p'])
|
||||||
|
dic_regex['regular'] = dic_regex['i2p']
|
||||||
|
|
||||||
|
faup = Faup()
|
||||||
|
|
||||||
|
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
|
||||||
|
|
||||||
|
r_serv_metadata = redis.StrictRedis(
|
||||||
|
host=p.config.get("ARDB_Metadata", "host"),
|
||||||
|
port=p.config.getint("ARDB_Metadata", "port"),
|
||||||
|
db=p.config.getint("ARDB_Metadata", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
r_cache = redis.StrictRedis(
|
||||||
|
host=p.config.get("Redis_Cache", "host"),
|
||||||
|
port=p.config.getint("Redis_Cache", "port"),
|
||||||
|
db=p.config.getint("Redis_Cache", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
r_onion = redis.StrictRedis(
|
||||||
|
host=p.config.get("ARDB_Onion", "host"),
|
||||||
|
port=p.config.getint("ARDB_Onion", "port"),
|
||||||
|
db=p.config.getint("ARDB_Onion", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
# ======== FUNCTIONS ========
|
||||||
def decode_val(value):
|
def decode_val(value):
|
||||||
if value is not None:
|
if value is not None:
|
||||||
value = value.decode()
|
value = value.decode()
|
||||||
|
@ -105,7 +146,7 @@ def crawl_onion(url, domain, date, date_month, message):
|
||||||
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling')
|
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling')
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
# ======== MAIN ========
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
if len(sys.argv) != 3:
|
if len(sys.argv) != 3:
|
||||||
|
@ -119,83 +160,38 @@ if __name__ == '__main__':
|
||||||
if mode == 'automatic':
|
if mode == 'automatic':
|
||||||
type_hidden_service = 'onion'
|
type_hidden_service = 'onion'
|
||||||
|
|
||||||
publisher.port = 6380
|
# verify crawler type (type_hidden_service)
|
||||||
publisher.channel = "Script"
|
if type_hidden_service not in accepted_services:
|
||||||
|
|
||||||
publisher.info("Script Crawler started")
|
|
||||||
|
|
||||||
config_section = 'Crawler'
|
|
||||||
|
|
||||||
# Setup the I/O queues
|
|
||||||
p = Process(config_section)
|
|
||||||
|
|
||||||
accepted_services = ['onion', 'regular']
|
|
||||||
|
|
||||||
dic_regex = {}
|
|
||||||
dic_regex['onion'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
|
||||||
re.compile(dic_regex['onion'])
|
|
||||||
dic_regex['i2p'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
|
||||||
re.compile(dic_regex['i2p'])
|
|
||||||
dic_regex['regular'] = dic_regex['i2p']
|
|
||||||
|
|
||||||
|
|
||||||
url_onion = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
|
||||||
re.compile(url_onion)
|
|
||||||
url_i2p = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
|
||||||
re.compile(url_i2p)
|
|
||||||
|
|
||||||
if type_hidden_service == 'onion':
|
|
||||||
regex_hidden_service = url_onion
|
|
||||||
elif type_hidden_service == 'i2p':
|
|
||||||
regex_hidden_service = url_i2p
|
|
||||||
elif type_hidden_service == 'regular':
|
|
||||||
regex_hidden_service = url_i2p
|
|
||||||
else:
|
|
||||||
print('incorrect crawler type: {}'.format(type_hidden_service))
|
print('incorrect crawler type: {}'.format(type_hidden_service))
|
||||||
exit(0)
|
exit(0)
|
||||||
|
else:
|
||||||
|
publisher.info("Script Crawler started")
|
||||||
|
|
||||||
|
# load domains blacklist
|
||||||
|
load_type_blacklist(type_hidden_service)
|
||||||
|
|
||||||
splash_url = '{}:{}'.format( p.config.get("Crawler", "splash_url_onion"), splash_port)
|
splash_url = '{}:{}'.format( p.config.get("Crawler", "splash_url_onion"), splash_port)
|
||||||
print('splash url: {}'.format(splash_url))
|
print('splash url: {}'.format(splash_url))
|
||||||
|
|
||||||
crawler_depth_limit = p.config.getint("Crawler", "crawler_depth_limit")
|
crawler_depth_limit = p.config.getint("Crawler", "crawler_depth_limit")
|
||||||
faup = Faup()
|
|
||||||
|
|
||||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
|
|
||||||
|
|
||||||
r_serv_metadata = redis.StrictRedis(
|
|
||||||
host=p.config.get("ARDB_Metadata", "host"),
|
|
||||||
port=p.config.getint("ARDB_Metadata", "port"),
|
|
||||||
db=p.config.getint("ARDB_Metadata", "db"),
|
|
||||||
decode_responses=True)
|
|
||||||
|
|
||||||
r_cache = redis.StrictRedis(
|
|
||||||
host=p.config.get("Redis_Cache", "host"),
|
|
||||||
port=p.config.getint("Redis_Cache", "port"),
|
|
||||||
db=p.config.getint("Redis_Cache", "db"),
|
|
||||||
decode_responses=True)
|
|
||||||
|
|
||||||
r_onion = redis.StrictRedis(
|
|
||||||
host=p.config.get("ARDB_Onion", "host"),
|
|
||||||
port=p.config.getint("ARDB_Onion", "port"),
|
|
||||||
db=p.config.getint("ARDB_Onion", "db"),
|
|
||||||
decode_responses=True)
|
|
||||||
|
|
||||||
# Crawler status
|
# Crawler status
|
||||||
r_cache.sadd('all_crawler:{}'.format(type_hidden_service), splash_port)
|
r_cache.sadd('all_crawler:{}'.format(type_hidden_service), splash_port)
|
||||||
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting')
|
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting')
|
||||||
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
|
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
|
||||||
|
|
||||||
# load domains blacklist
|
|
||||||
load_type_blacklist(type_hidden_service)
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
# Priority Queue - Recovering the streamed message informations.
|
if mode == 'automatic':
|
||||||
message = r_onion.spop('{}_crawler_priority_queue'.format(type_hidden_service))
|
# Priority Queue - Recovering the streamed message informations.
|
||||||
|
message = r_onion.spop('{}_crawler_priority_queue'.format(type_hidden_service))
|
||||||
|
|
||||||
if message is None:
|
if message is None:
|
||||||
# Recovering the streamed message informations.
|
# Recovering the streamed message informations.
|
||||||
message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service))
|
message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service))
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
if message is not None:
|
if message is not None:
|
||||||
|
|
||||||
|
|
|
@ -75,6 +75,10 @@ def get_onion_status(domain, date):
|
||||||
return False
|
return False
|
||||||
# ============= ROUTES ==============
|
# ============= ROUTES ==============
|
||||||
|
|
||||||
|
@hiddenServices.route("/hiddenServices/2", methods=['GET'])
|
||||||
|
def hiddenServices_page_test():
|
||||||
|
return render_template("Crawler_index.html")
|
||||||
|
|
||||||
@hiddenServices.route("/hiddenServices/", methods=['GET'])
|
@hiddenServices.route("/hiddenServices/", methods=['GET'])
|
||||||
def hiddenServices_page():
|
def hiddenServices_page():
|
||||||
last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
|
last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
|
||||||
|
|
82
var/www/modules/hiddenServices/templates/Crawler_index.html
Normal file
82
var/www/modules/hiddenServices/templates/Crawler_index.html
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>AIL-Framework</title>
|
||||||
|
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
||||||
|
<!-- Core CSS -->
|
||||||
|
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||||
|
<link href="{{ url_for('static', filename='font-awesome/css/font-awesome.css') }}" rel="stylesheet">
|
||||||
|
|
||||||
|
<!-- JS -->
|
||||||
|
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||||
|
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
|
||||||
|
</style>
|
||||||
|
|
||||||
|
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
|
||||||
|
{% include 'nav_bar.html' %}
|
||||||
|
|
||||||
|
<div class="container-fluid">
|
||||||
|
<div class="row">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<nav class="col-md-2 d-none d-md-block bg-light sidebar border-right">
|
||||||
|
<div class="sidebar-sticky">
|
||||||
|
<ul class="nav flex-column">
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link active text-dark" href="#">
|
||||||
|
<i class="fa fa-search"></i>
|
||||||
|
Dashboard <span class="sr-only">(current)</span>
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link text-secondary" href="#">
|
||||||
|
<i class="fa fa-search"></i>
|
||||||
|
Automatic Splash Crawler
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link text-secondary" href="#">
|
||||||
|
<i class="fa fa-search"></i>
|
||||||
|
Manual Splash Crawler
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div >
|
||||||
|
<pre>
|
||||||
|
--------------
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
--------------
|
||||||
|
|
||||||
|
</pre>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
$(document).ready(function(){
|
||||||
|
$("#page-Crawler").addClass("active");
|
||||||
|
});
|
||||||
|
</script>
|
46
var/www/templates/nav_bar.html
Normal file
46
var/www/templates/nav_bar.html
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
<nav class="navbar navbar-expand-xl navbar-dark bg-dark">
|
||||||
|
<a class="navbar-brand" href="{{ url_for('dashboard.index') }}">
|
||||||
|
<img src="{{ url_for('static', filename='image/ail-icon.png')}}" alt="AIL" style="width:80px;">
|
||||||
|
</a>
|
||||||
|
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
|
||||||
|
<span class="navbar-toggler-icon"></span>
|
||||||
|
</button>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="collapse navbar-collapse" id="navbarSupportedContent">
|
||||||
|
<ul class="navbar-nav">
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link mr-3" href="{{ url_for('dashboard.index') }}">Home <span class="sr-only">(current)</span></a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="#" aria-disabled="true">Submit</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="#" aria-disabled="true">Browse Pastes</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="#" aria-disabled="true">Leaks Hunter</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" id="page-Crawler" href="#" tabindex="-1" href="#" aria-disabled="true">Crawlers</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="#" aria-disabled="true">Decoded</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="#" aria-disabled="true">Statistics</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="#" aria-disabled="true">Options</a>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<form class="form-inline my-2 my-lg-0 ml-auto justify-content-center">
|
||||||
|
<div class="form-group">
|
||||||
|
<input class="form-control mr-sm-2" type="search" id="global_search" placeholder="Search" aria-label="Search" aria-describedby="advanced_search">
|
||||||
|
<small id="advanced_search" class="form-text text-muted">Advanced Search</small>
|
||||||
|
</div>
|
||||||
|
<button class="btn btn-outline-info my-2 my-sm-0" type="submit"><i class="fa fa-search"></i></button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</nav>
|
|
@ -31,9 +31,9 @@ unzip temp/d3_${D3_JS_VERSION}.zip -d temp/
|
||||||
unzip temp/moment_2.22.2.zip -d temp/
|
unzip temp/moment_2.22.2.zip -d temp/
|
||||||
unzip temp/daterangepicker_v0.18.0.zip -d temp/
|
unzip temp/daterangepicker_v0.18.0.zip -d temp/
|
||||||
|
|
||||||
mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/js/bootstrap.min.js ./static/js/
|
mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/js/bootstrap.min.js ./static/js/bootstrap4.min.js
|
||||||
mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css ./static/css/
|
mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css ./static/css/bootstrap4.min.css
|
||||||
mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css.map ./static/css/
|
mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css.map ./static/css/bootstrap4.min.css.map
|
||||||
|
|
||||||
mv temp/startbootstrap-sb-admin-${SBADMIN_VERSION} temp/sb-admin
|
mv temp/startbootstrap-sb-admin-${SBADMIN_VERSION} temp/sb-admin
|
||||||
mv temp/startbootstrap-sb-admin-2-${SBADMIN_VERSION} temp/sb-admin-2
|
mv temp/startbootstrap-sb-admin-2-${SBADMIN_VERSION} temp/sb-admin-2
|
||||||
|
|
Loading…
Reference in a new issue