mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-13 01:58:22 +00:00
chg: [Crawler UI] Crawler major refractor (end) + basic UI for manual crawler
This commit is contained in:
parent
7b32d7f34e
commit
c0d72e7d2a
4 changed files with 310 additions and 23 deletions
|
@ -30,6 +30,14 @@ def load_blacklist(service_type):
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def update_auto_crawler():
|
||||||
|
current_epoch = int(time.time())
|
||||||
|
list_to_crawl = redis_crawler.zrangebyscore('crawler_auto_queue', '-inf', current_epoch)
|
||||||
|
for elem_to_crawl in list_to_crawl:
|
||||||
|
mess, type = elem_to_crawl.rsplit(';', 1)
|
||||||
|
redis_crawler.sadd('{}_crawler_priority_queue'.format(type), mess)
|
||||||
|
redis_crawler.zrem('crawler_auto_queue', elem_to_crawl)
|
||||||
|
|
||||||
# Extract info form url (url, domain, domain url, ...)
|
# Extract info form url (url, domain, domain url, ...)
|
||||||
def unpack_url(url):
|
def unpack_url(url):
|
||||||
to_crawl = {}
|
to_crawl = {}
|
||||||
|
@ -76,14 +84,14 @@ def get_elem_to_crawl(rotation_mode):
|
||||||
for service_type in rotation_mode:
|
for service_type in rotation_mode:
|
||||||
message = redis_crawler.spop('{}_crawler_priority_queue'.format(service_type))
|
message = redis_crawler.spop('{}_crawler_priority_queue'.format(service_type))
|
||||||
if message is not None:
|
if message is not None:
|
||||||
domain_service_type = type_service
|
domain_service_type = service_type
|
||||||
break
|
break
|
||||||
#load_normal_queue
|
#load_normal_queue
|
||||||
if message is None:
|
if message is None:
|
||||||
for service_type in rotation_mode:
|
for service_type in rotation_mode:
|
||||||
message = redis_crawler.spop('{}_crawler_queue'.format(service_type))
|
message = redis_crawler.spop('{}_crawler_queue'.format(service_type))
|
||||||
if message is not None:
|
if message is not None:
|
||||||
domain_service_type = type_service
|
domain_service_type = service_type
|
||||||
break
|
break
|
||||||
|
|
||||||
if message:
|
if message:
|
||||||
|
@ -109,6 +117,10 @@ def get_crawler_config(redis_server, mode, service_type, domain):
|
||||||
crawler_options[option] = config[option]
|
crawler_options[option] = config[option]
|
||||||
else:
|
else:
|
||||||
crawler_options[option] = default_crawler_config[option]
|
crawler_options[option] = default_crawler_config[option]
|
||||||
|
if mode == 'auto':
|
||||||
|
crawler_options['time'] = int(config['time'])
|
||||||
|
elif mode == 'manual':
|
||||||
|
redis_server.delete('crawler_config:{}:{}:{}'.format(mode, service_type, domain))
|
||||||
return crawler_options
|
return crawler_options
|
||||||
|
|
||||||
def load_crawler_config(service_type, domain, paste, date):
|
def load_crawler_config(service_type, domain, paste, date):
|
||||||
|
@ -239,12 +251,12 @@ def search_potential_source_domain(type_service, domain):
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
if len(sys.argv) != 3:
|
if len(sys.argv) != 2:
|
||||||
print('usage:', 'Crawler.py', 'mode', 'splash_port')
|
print('usage:', 'Crawler.py', 'splash_port')
|
||||||
exit(1)
|
exit(1)
|
||||||
##################################################
|
##################################################
|
||||||
#mode = sys.argv[1]
|
#mode = sys.argv[1]
|
||||||
splash_port = sys.argv[2]
|
splash_port = sys.argv[1]
|
||||||
|
|
||||||
rotation_mode = ['onion', 'regular']
|
rotation_mode = ['onion', 'regular']
|
||||||
default_proto_map = {'http': 80, 'https': 443}
|
default_proto_map = {'http': 80, 'https': 443}
|
||||||
|
@ -303,13 +315,11 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
|
update_auto_crawler()
|
||||||
|
|
||||||
to_crawl = get_elem_to_crawl(rotation_mode)
|
to_crawl = get_elem_to_crawl(rotation_mode)
|
||||||
if to_crawl:
|
if to_crawl:
|
||||||
print(to_crawl)
|
|
||||||
print(to_crawl['url'])
|
|
||||||
url_data = unpack_url(to_crawl['url'])
|
url_data = unpack_url(to_crawl['url'])
|
||||||
print('url')
|
|
||||||
print(url_data)
|
|
||||||
# remove domain from queue
|
# remove domain from queue
|
||||||
redis_crawler.srem('{}_domain_crawler_queue'.format(to_crawl['type_service']), url_data['domain'])
|
redis_crawler.srem('{}_domain_crawler_queue'.format(to_crawl['type_service']), url_data['domain'])
|
||||||
|
|
||||||
|
@ -328,14 +338,15 @@ if __name__ == '__main__':
|
||||||
'date_month': datetime.datetime.now().strftime("%Y%m"),
|
'date_month': datetime.datetime.now().strftime("%Y%m"),
|
||||||
'epoch': int(time.time())}
|
'epoch': int(time.time())}
|
||||||
|
|
||||||
|
# Update crawler status type
|
||||||
|
r_cache.sadd('{}_crawlers'.format(to_crawl['type_service']), splash_port)
|
||||||
|
|
||||||
crawler_config = load_crawler_config(to_crawl['type_service'], url_data['domain'], to_crawl['paste'], date)
|
crawler_config = load_crawler_config(to_crawl['type_service'], url_data['domain'], to_crawl['paste'], date)
|
||||||
print(crawler_config)
|
|
||||||
# check if default crawler
|
# check if default crawler
|
||||||
#if not crawler_config['requested']:
|
if not crawler_config['requested']:
|
||||||
# # Auto crawl only if service not up this month
|
# Auto crawl only if service not up this month
|
||||||
# if redis_crawler.sismember('month_{}_up:{}'.format(to_crawl['type_service'], date['date_month']), url_data['domain']):
|
if redis_crawler.sismember('month_{}_up:{}'.format(to_crawl['type_service'], date['date_month']), url_data['domain']):
|
||||||
# continue
|
continue
|
||||||
|
|
||||||
set_crawled_domain_metadata(to_crawl['type_service'], date, url_data['domain'], to_crawl['paste'])
|
set_crawled_domain_metadata(to_crawl['type_service'], date, url_data['domain'], to_crawl['paste'])
|
||||||
|
|
||||||
|
@ -379,14 +390,20 @@ if __name__ == '__main__':
|
||||||
############################
|
############################
|
||||||
|
|
||||||
# update list, last crawled domains
|
# update list, last crawled domains
|
||||||
redis_crawler.lpush('last_{}'.format(to_crawl['type_service']), url_data['domain'])
|
redis_crawler.lpush('last_{}'.format(to_crawl['type_service']), '{};{}'.format(url_data['domain'], date['epoch']))
|
||||||
redis_crawler.ltrim('last_{}'.format(to_crawl['type_service']), 0, 15)
|
redis_crawler.ltrim('last_{}'.format(to_crawl['type_service']), 0, 15)
|
||||||
|
|
||||||
#update crawler status
|
#update crawler status
|
||||||
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting')
|
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting')
|
||||||
r_cache.hdel('metadata_crawler:{}'.format(splash_port), 'crawling_domain')
|
r_cache.hdel('metadata_crawler:{}'.format(splash_port), 'crawling_domain')
|
||||||
|
|
||||||
time.sleep(60)
|
# Update crawler status type
|
||||||
|
r_cache.srem('{}_crawlers'.format(to_crawl['type_service']), splash_port)
|
||||||
|
|
||||||
|
# add next auto Crawling in queue:
|
||||||
|
if to_crawl['paste'] == 'auto':
|
||||||
|
redis_crawler.zadd('crawler_auto_queue', int(time.time()+crawler_config['crawler_options']['time']) , '{};{}'.format(to_crawl['original_message'], to_crawl['type_service']))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print(' Blacklisted Domain')
|
print(' Blacklisted Domain')
|
||||||
print()
|
print()
|
||||||
|
|
|
@ -237,7 +237,7 @@ function launching_crawler {
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
|
|
||||||
for ((i=first_port;i<=last_port;i++)); do
|
for ((i=first_port;i<=last_port;i++)); do
|
||||||
screen -S "Crawler_AIL" -X screen -t "onion_crawler:$i" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Crawler.py onion $i; read x"
|
screen -S "Crawler_AIL" -X screen -t "onion_crawler:$i" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Crawler.py $i; read x"
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,7 @@ import redis
|
||||||
import datetime
|
import datetime
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
import json
|
||||||
from pyfaup.faup import Faup
|
from pyfaup.faup import Faup
|
||||||
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for
|
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for
|
||||||
|
|
||||||
|
@ -94,13 +95,16 @@ def get_domain_type(domain):
|
||||||
|
|
||||||
def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None):
|
def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None):
|
||||||
list_crawled_metadata = []
|
list_crawled_metadata = []
|
||||||
for domain in list_domains_crawled:
|
for domain_epoch in list_domains_crawled:
|
||||||
|
domain, epoch = domain_epoch.rsplit(';', 1)
|
||||||
metadata_domain = {}
|
metadata_domain = {}
|
||||||
# get Domain type
|
# get Domain type
|
||||||
if type is None:
|
if type is None:
|
||||||
type = get_domain_type(domain)
|
type = get_domain_type(domain)
|
||||||
|
|
||||||
metadata_domain['domain'] = domain
|
metadata_domain['domain'] = domain
|
||||||
|
metadata_domain['epoch'] = epoch
|
||||||
|
print(epoch)
|
||||||
metadata_domain['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'last_check')
|
metadata_domain['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'last_check')
|
||||||
if metadata_domain['last_check'] is None:
|
if metadata_domain['last_check'] is None:
|
||||||
metadata_domain['last_check'] = '********'
|
metadata_domain['last_check'] = '********'
|
||||||
|
@ -118,9 +122,9 @@ def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None):
|
||||||
list_crawled_metadata.append(metadata_domain)
|
list_crawled_metadata.append(metadata_domain)
|
||||||
return list_crawled_metadata
|
return list_crawled_metadata
|
||||||
|
|
||||||
def get_crawler_splash_status(mode, type):
|
def get_crawler_splash_status(type):
|
||||||
crawler_metadata = []
|
crawler_metadata = []
|
||||||
all_crawlers = r_cache.smembers('all_crawler:{}:{}'.format(mode, type))
|
all_crawlers = r_cache.smembers('{}_crawlers'.format(type))
|
||||||
for crawler in all_crawlers:
|
for crawler in all_crawlers:
|
||||||
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
|
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
|
||||||
started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
|
started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
|
||||||
|
@ -132,10 +136,21 @@ def get_crawler_splash_status(mode, type):
|
||||||
status=False
|
status=False
|
||||||
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
|
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
|
||||||
|
|
||||||
crawler_metadata.append({'crawler_info': '8050 - 2019/02/18 - 16:49.54', 'crawling_domain': 'test', 'status_info': 'Crawling', 'status': True})
|
|
||||||
crawler_metadata.append({'crawler_info': '8051 - 2019/02/18 - 16:49.54', 'crawling_domain': 'test', 'status_info': 'Crawling', 'status': True})
|
|
||||||
return crawler_metadata
|
return crawler_metadata
|
||||||
|
|
||||||
|
def create_crawler_config(mode, service_type, crawler_config, domain):
|
||||||
|
print(crawler_config)
|
||||||
|
if mode == 'manual':
|
||||||
|
r_cache.set('crawler_config:{}:{}:{}'.format(mode, service_type, domain), json.dumps(crawler_config))
|
||||||
|
elif mode == 'auto':
|
||||||
|
r_serv_onion.set('crawler_config:{}:{}:{}'.format(mode, service_type, domain), json.dumps(crawler_config))
|
||||||
|
|
||||||
|
def send_url_to_crawl_in_queue(mode, service_type, url):
|
||||||
|
r_serv_onion.sadd('{}_crawler_priority_queue'.format(service_type), '{};{}'.format(url, mode))
|
||||||
|
# add auto crawled url for user UI
|
||||||
|
if mode == 'auto':
|
||||||
|
r_serv_onion.sadd('auto_crawler_url:{}'.format(service_type), url)
|
||||||
|
|
||||||
# ============= ROUTES ==============
|
# ============= ROUTES ==============
|
||||||
|
|
||||||
@hiddenServices.route("/hiddenServices/2", methods=['GET'])
|
@hiddenServices.route("/hiddenServices/2", methods=['GET'])
|
||||||
|
@ -160,7 +175,7 @@ def crawler_splash_onion():
|
||||||
statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
|
statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
|
||||||
|
|
||||||
list_onion = get_last_crawled_domains_metadata(last_onions, date, type='onion')
|
list_onion = get_last_crawled_domains_metadata(last_onions, date, type='onion')
|
||||||
crawler_metadata = get_crawler_splash_status('automatic', 'onion')
|
crawler_metadata = get_crawler_splash_status('onion')
|
||||||
|
|
||||||
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
|
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
|
||||||
return render_template("Crawler_Splash_onion.html", last_onions=list_onion, statDomains=statDomains,
|
return render_template("Crawler_Splash_onion.html", last_onions=list_onion, statDomains=statDomains,
|
||||||
|
@ -267,6 +282,81 @@ def unblacklist_onion():
|
||||||
else:
|
else:
|
||||||
return redirect(url_for('hiddenServices.blacklisted_onion', page=page, unblacklist_onion=0))
|
return redirect(url_for('hiddenServices.blacklisted_onion', page=page, unblacklist_onion=0))
|
||||||
|
|
||||||
|
@hiddenServices.route("/crawlers/create_spider_splash", methods=['POST'])
|
||||||
|
def create_spider_splash():
|
||||||
|
url = request.form.get('url_to_crawl')
|
||||||
|
automatic = request.form.get('crawler_type')
|
||||||
|
crawler_time = request.form.get('crawler_epoch')
|
||||||
|
#html = request.form.get('html_content_id')
|
||||||
|
screenshot = request.form.get('screenshot')
|
||||||
|
har = request.form.get('har')
|
||||||
|
depth_limit = request.form.get('depth_limit')
|
||||||
|
max_pages = request.form.get('max_pages')
|
||||||
|
|
||||||
|
# validate url
|
||||||
|
if url is None or url=='' or url=='\n':
|
||||||
|
return 'incorrect url'
|
||||||
|
|
||||||
|
crawler_config = {}
|
||||||
|
|
||||||
|
# verify user input
|
||||||
|
if automatic:
|
||||||
|
automatic = True
|
||||||
|
else:
|
||||||
|
automatic = False
|
||||||
|
if not screenshot:
|
||||||
|
crawler_config['png'] = 0
|
||||||
|
if not har:
|
||||||
|
crawler_config['har'] = 0
|
||||||
|
|
||||||
|
# verify user input
|
||||||
|
if depth_limit:
|
||||||
|
try:
|
||||||
|
depth_limit = int(depth_limit)
|
||||||
|
if depth_limit < 0:
|
||||||
|
return 'incorrect depth_limit'
|
||||||
|
else:
|
||||||
|
crawler_config['depth_limit'] = depth_limit
|
||||||
|
except:
|
||||||
|
return 'incorrect depth_limit'
|
||||||
|
if max_pages:
|
||||||
|
try:
|
||||||
|
max_pages = int(max_pages)
|
||||||
|
if max_pages < 1:
|
||||||
|
return 'incorrect max_pages'
|
||||||
|
else:
|
||||||
|
crawler_config['closespider_pagecount'] = max_pages
|
||||||
|
except:
|
||||||
|
return 'incorrect max_pages'
|
||||||
|
|
||||||
|
# get service_type
|
||||||
|
faup.decode(url)
|
||||||
|
unpack_url = faup.get()
|
||||||
|
domain = unpack_url['domain'].decode()
|
||||||
|
if unpack_url['tld'] == b'onion':
|
||||||
|
service_type = 'onion'
|
||||||
|
else:
|
||||||
|
service_type = 'regular'
|
||||||
|
|
||||||
|
if automatic:
|
||||||
|
mode = 'auto'
|
||||||
|
try:
|
||||||
|
crawler_time = int(crawler_time)
|
||||||
|
if crawler_time < 0:
|
||||||
|
return 'incorrect epoch'
|
||||||
|
else:
|
||||||
|
crawler_config['time'] = crawler_time
|
||||||
|
except:
|
||||||
|
return 'incorrect epoch'
|
||||||
|
else:
|
||||||
|
mode = 'manual'
|
||||||
|
epoch = None
|
||||||
|
|
||||||
|
create_crawler_config(mode, service_type, crawler_config, domain)
|
||||||
|
send_url_to_crawl_in_queue(mode, service_type, url)
|
||||||
|
|
||||||
|
return redirect(url_for('hiddenServices.manual'))
|
||||||
|
|
||||||
@hiddenServices.route("/hiddenServices/", methods=['GET'])
|
@hiddenServices.route("/hiddenServices/", methods=['GET'])
|
||||||
def hiddenServices_page():
|
def hiddenServices_page():
|
||||||
last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
|
last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
|
||||||
|
|
|
@ -0,0 +1,180 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>AIL-Framework</title>
|
||||||
|
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
||||||
|
<!-- Core CSS -->
|
||||||
|
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||||
|
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||||
|
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
|
||||||
|
|
||||||
|
<!-- JS -->
|
||||||
|
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||||
|
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
|
||||||
|
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||||
|
<script language="javascript" src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
|
||||||
|
<script language="javascript" src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
|
||||||
|
<script language="javascript" src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
|
||||||
|
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
|
||||||
|
{% include 'nav_bar.html' %}
|
||||||
|
|
||||||
|
<div class="container-fluid">
|
||||||
|
<div class="row">
|
||||||
|
|
||||||
|
<div class="col-12 col-lg-2 p-0 bg-light border-right">
|
||||||
|
|
||||||
|
|
||||||
|
<nav class="navbar navbar-expand navbar-light bg-light flex-md-column flex-row align-items-start py-2">
|
||||||
|
<h5 class="d-flex text-muted w-100">
|
||||||
|
<span>Splash Crawlers </span>
|
||||||
|
<a class="ml-auto" href="#">
|
||||||
|
<i class="fas fa-plus-circle ml-auto"></i>
|
||||||
|
</a>
|
||||||
|
</h5>
|
||||||
|
<ul class="nav flex-md-column flex-row navbar-nav justify-content-between w-100"> <!--nav-pills-->
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="#">
|
||||||
|
<i class="fas fa-search"></i>
|
||||||
|
<span>Dashboard</span>
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link active" href="#">
|
||||||
|
<i class="fas fa-sync"></i>
|
||||||
|
Automatic Onion Crawler
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="#">
|
||||||
|
<i class="fas fa-clock"></i>
|
||||||
|
Manual Splash Crawler
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="col-12 col-lg-10">
|
||||||
|
|
||||||
|
|
||||||
|
<div class="card text-white bg-dark mb-3 mt-1">
|
||||||
|
<div class="card-header">
|
||||||
|
<h5 class="card-title">Crawl a Domain</h5>
|
||||||
|
</div>
|
||||||
|
<div class="card-body">
|
||||||
|
<p class="card-text">Enter a domain and choose what kind of data you want.</p>
|
||||||
|
<form action="{{ url_for('hiddenServices.create_spider_splash') }}" method='post'>
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-12 col-lg-6">
|
||||||
|
<div class="input-group" id="date-range-from">
|
||||||
|
<input type="text" class="form-control" id="url_to_crawl" name="url_to_crawl" placeholder="Address or Domain">
|
||||||
|
</div>
|
||||||
|
<div class="d-flex mt-1">
|
||||||
|
<i class="fas fa-user-ninja mt-1"></i> Manual
|
||||||
|
<div class="custom-control custom-switch">
|
||||||
|
<input class="custom-control-input" type="checkbox" name="crawler_type" value="True" id="crawler_type">
|
||||||
|
<label class="custom-control-label" for="crawler_type">
|
||||||
|
<i class="fas fa-clock"></i> Automatic
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="input-group mt-2 mb-2" id="crawler_epoch_input">
|
||||||
|
<div class="input-group-prepend">
|
||||||
|
<span class="input-group-text bg-light"><i class="fas fa-clock"></i> </span>
|
||||||
|
</div>
|
||||||
|
<input class="form-control" type="number" id="crawler_epoch" value="3600" name="crawler_epoch" required>
|
||||||
|
<div class="input-group-append">
|
||||||
|
<span class="input-group-text">Time (seconds) between each crawling</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="col-12 col-lg-6 mt-2 mt-lg-0">
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-12 col-xl-6">
|
||||||
|
<div class="custom-control custom-switch">
|
||||||
|
<input class="custom-control-input" type="checkbox" name="html_content" value="True" id="html_content_id" checked disabled>
|
||||||
|
<label class="custom-control-label" for="html_content_id">
|
||||||
|
<i class="fab fa-html5"></i> HTML
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
<div class="custom-control custom-switch mt-1">
|
||||||
|
<input class="custom-control-input" type="checkbox" name="screenshot" value="True" id="screenshot_id">
|
||||||
|
<label class="custom-control-label" for="screenshot_id">
|
||||||
|
<i class="fas fa-image"></i> Screenshot
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
<div class="custom-control custom-switch mt-1">
|
||||||
|
<input class="custom-control-input" type="checkbox" name="har" value="True" id="har_id">
|
||||||
|
<label class="custom-control-label" for="har_id">
|
||||||
|
<i class="fas fa-file"></i> HAR
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-12 col-xl-6">
|
||||||
|
<div class="input-group form-group mb-0">
|
||||||
|
<div class="input-group-prepend">
|
||||||
|
<span class="input-group-text bg-light"><i class="fas fa-water"></i></span>
|
||||||
|
</div>
|
||||||
|
<input class="form-control" type="number" id="depth_limit" name="depth_limit" value="0" required>
|
||||||
|
<div class="input-group-append">
|
||||||
|
<span class="input-group-text">Depth Limit</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="input-group mt-2">
|
||||||
|
<div class="input-group-prepend">
|
||||||
|
<span class="input-group-text bg-light"><i class="fas fa-copy"></i> </span>
|
||||||
|
</div>
|
||||||
|
<input class="form-control" type="number" id="max_pages" name="max_pages" value="1" required>
|
||||||
|
<div class="input-group-append">
|
||||||
|
<span class="input-group-text">Max Pages</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<button class="btn btn-primary mt-2">
|
||||||
|
<i class="fas fa-spider"></i> Send to Spider
|
||||||
|
</button>
|
||||||
|
<form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
var chart = {};
|
||||||
|
$(document).ready(function(){
|
||||||
|
$("#page-Crawler").addClass("active");
|
||||||
|
manual_crawler_input_controler();
|
||||||
|
|
||||||
|
$('#crawler_type').change(function () {
|
||||||
|
manual_crawler_input_controler();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
function manual_crawler_input_controler() {
|
||||||
|
if($('#crawler_type').is(':checked')){
|
||||||
|
$("#crawler_epoch_input").show();
|
||||||
|
}else{
|
||||||
|
$("#crawler_epoch_input").hide();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
</script>
|
Loading…
Reference in a new issue