mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-22 22:27:17 +00:00
chg: [crawler] add test + relaunch crawlers + major fixs
This commit is contained in:
parent
8754350d39
commit
c0be210d2c
7 changed files with 379 additions and 152 deletions
|
@ -29,7 +29,8 @@ if __name__ == '__main__':
|
||||||
is_manager_connected = crawlers.reload_splash_and_proxies_list()
|
is_manager_connected = crawlers.reload_splash_and_proxies_list()
|
||||||
print(is_manager_connected)
|
print(is_manager_connected)
|
||||||
if is_manager_connected:
|
if is_manager_connected:
|
||||||
crawlers.relaunch_crawlers()
|
if crawlers.test_ail_crawlers():
|
||||||
|
crawlers.relaunch_crawlers()
|
||||||
last_check = int(time.time())
|
last_check = int(time.time())
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
@ -45,7 +46,8 @@ if __name__ == '__main__':
|
||||||
is_manager_connected = crawlers.reload_splash_and_proxies_list()
|
is_manager_connected = crawlers.reload_splash_and_proxies_list()
|
||||||
if is_manager_connected:
|
if is_manager_connected:
|
||||||
print('reload proxies and splash list')
|
print('reload proxies and splash list')
|
||||||
crawlers.relaunch_crawlers()
|
if crawlers.test_ail_crawlers():
|
||||||
|
crawlers.relaunch_crawlers()
|
||||||
session_uuid = current_session_uuid
|
session_uuid = current_session_uuid
|
||||||
if not is_manager_connected:
|
if not is_manager_connected:
|
||||||
print('Error, Can\'t connect to Splash manager')
|
print('Error, Can\'t connect to Splash manager')
|
||||||
|
|
|
@ -16,6 +16,8 @@ import sys
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
@ -25,6 +27,9 @@ from pyfaup.faup import Faup
|
||||||
import requests
|
import requests
|
||||||
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
|
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
|
||||||
|
import git_status
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||||
import ConfigLoader
|
import ConfigLoader
|
||||||
|
|
||||||
|
@ -429,6 +434,19 @@ def get_splash_crawler_status(spash_url):
|
||||||
status=False
|
status=False
|
||||||
return {'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status, 'type': crawler_type}
|
return {'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status, 'type': crawler_type}
|
||||||
|
|
||||||
|
def set_current_crawler_status(splash_url, status, started_time=False, crawled_domain=None, crawler_type=None):
|
||||||
|
# TODO: get crawler type if None
|
||||||
|
# Status: ['Waiting', 'Error', ...]
|
||||||
|
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', status)
|
||||||
|
if started_time:
|
||||||
|
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'started_time', datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
|
||||||
|
if crawler_type:
|
||||||
|
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'type', crawler_type)
|
||||||
|
if crawled_domain:
|
||||||
|
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'crawling_domain', crawled_domain)
|
||||||
|
|
||||||
|
#r_cache.sadd('all_splash_crawlers', splash_url) # # TODO: add me in fct: create_ail_crawler
|
||||||
|
|
||||||
def get_stats_last_crawled_domains(crawler_types, date):
|
def get_stats_last_crawled_domains(crawler_types, date):
|
||||||
statDomains = {}
|
statDomains = {}
|
||||||
for crawler_type in crawler_types:
|
for crawler_type in crawler_types:
|
||||||
|
@ -1014,6 +1032,20 @@ def get_all_splash_by_proxy(proxy_name, r_list=False):
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def get_all_splash_name_by_crawler_type(crawler_type):
|
||||||
|
l_splash_name = []
|
||||||
|
for splash_name in get_all_splash():
|
||||||
|
if get_splash_crawler_type(splash_name) == crawler_type:
|
||||||
|
l_splash_name.append(splash_name)
|
||||||
|
return l_splash_name
|
||||||
|
|
||||||
|
def get_all_splash_url_by_crawler_type(crawler_type):
|
||||||
|
l_splash_url = []
|
||||||
|
for splash_name in get_all_splash_name_by_crawler_type(crawler_type):
|
||||||
|
for splash_url in get_splash_all_url(splash_name, r_list=True):
|
||||||
|
l_splash_url.append(splash_url)
|
||||||
|
return l_splash_url
|
||||||
|
|
||||||
def delete_all_splash_containers():
|
def delete_all_splash_containers():
|
||||||
for splash_name in get_all_splash():
|
for splash_name in get_all_splash():
|
||||||
delete_splash_container(splash_name)
|
delete_splash_container(splash_name)
|
||||||
|
@ -1140,7 +1172,106 @@ def launch_ail_splash_crawler(splash_url, script_options=''):
|
||||||
screen.create_screen(screen_name)
|
screen.create_screen(screen_name)
|
||||||
screen.launch_uniq_windows_script(screen_name, splash_url, dir_project, script_location, script_name, script_options=script_options, kill_previous_windows=True)
|
screen.launch_uniq_windows_script(screen_name, splash_url, dir_project, script_location, script_name, script_options=script_options, kill_previous_windows=True)
|
||||||
|
|
||||||
|
def is_test_ail_crawlers_successful():
|
||||||
|
return r_serv_onion.hget('crawler:tor:test', 'success') == 'True'
|
||||||
|
|
||||||
|
def get_test_ail_crawlers_message():
|
||||||
|
return r_serv_onion.hget('crawler:tor:test', 'message')
|
||||||
|
|
||||||
|
def save_test_ail_crawlers_result(test_success, message):
|
||||||
|
r_serv_onion.hset('crawler:tor:test', 'success', bool(test_success))
|
||||||
|
r_serv_onion.hset('crawler:tor:test', 'message', message)
|
||||||
|
|
||||||
|
def test_ail_crawlers():
|
||||||
|
# # TODO: test regular domain
|
||||||
|
if not ping_splash_manager():
|
||||||
|
manager_url = get_splash_manager_url()
|
||||||
|
error_message = f'Error: Can\'t connect to AIL Splash Manager, http://{manager_url}'
|
||||||
|
print(error_message)
|
||||||
|
save_test_ail_crawlers_result(False, error_message)
|
||||||
|
return False
|
||||||
|
|
||||||
|
splash_url = get_all_splash_url_by_crawler_type('tor')
|
||||||
|
if not splash_url:
|
||||||
|
error_message = f'Error: No Tor Splash Launched'
|
||||||
|
print(error_message)
|
||||||
|
save_test_ail_crawlers_result(False, error_message)
|
||||||
|
return False
|
||||||
|
splash_url = splash_url[0]
|
||||||
|
commit_id = git_status.get_last_commit_id_from_local()
|
||||||
|
crawler_options = {'html': True,
|
||||||
|
'har': False,
|
||||||
|
'png': False,
|
||||||
|
'depth_limit': 0,
|
||||||
|
'closespider_pagecount': 100,
|
||||||
|
'cookiejar_uuid': None,
|
||||||
|
'user_agent': commit_id + '-AIL SPLASH CRAWLER'}
|
||||||
|
date = {'date_day': datetime.now().strftime("%Y%m%d"),
|
||||||
|
'date_month': datetime.now().strftime("%Y%m"),
|
||||||
|
'epoch': int(time.time())}
|
||||||
|
crawler_config = {'splash_url': f'http://{splash_url}',
|
||||||
|
'service_type': 'onion',
|
||||||
|
'url': 'http://eswpccgr5xyovsahffkehgleqthrasfpfdblwbs4lstd345dwq5qumqd.onion',
|
||||||
|
'domain': 'eswpccgr5xyovsahffkehgleqthrasfpfdblwbs4lstd345dwq5qumqd.onion',
|
||||||
|
'port': 80,
|
||||||
|
'original_item': None,
|
||||||
|
'item': None,
|
||||||
|
'crawler_options': crawler_options,
|
||||||
|
'date': date,
|
||||||
|
'requested': 'test'}
|
||||||
|
|
||||||
|
## CHECK IF SPLASH AVAILABLE ##
|
||||||
|
try:
|
||||||
|
r = requests.get(f'http://{splash_url}' , timeout=30.0)
|
||||||
|
retry = False
|
||||||
|
except Exception as e:
|
||||||
|
error_message = f'Error: Can\'t connect to Splash Docker, http://{splash_url}'
|
||||||
|
print(error_message)
|
||||||
|
save_test_ail_crawlers_result(False, error_message)
|
||||||
|
return False
|
||||||
|
## -- ##
|
||||||
|
|
||||||
|
## LAUNCH CRAWLER, TEST MODE ##
|
||||||
|
set_current_crawler_status(splash_url, 'CRAWLER TEST', started_time=True, crawled_domain='TEST DOMAIN', crawler_type='onion')
|
||||||
|
UUID = str(uuid.uuid4())
|
||||||
|
r_cache.set('crawler_request:{}'.format(UUID), json.dumps(crawler_config))
|
||||||
|
|
||||||
|
## LAUNCH CRAWLER, TEST MODE ##
|
||||||
|
tor_crawler_script = os.path.join(os.environ['AIL_BIN'], 'torcrawler/tor_crawler.py')
|
||||||
|
process = subprocess.Popen(["python", tor_crawler_script, UUID],
|
||||||
|
stdout=subprocess.PIPE)
|
||||||
|
while process.poll() is None:
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
if process.returncode == 0:
|
||||||
|
# Scrapy-Splash ERRORS
|
||||||
|
stderr = process.stdout.read().decode()
|
||||||
|
#print(stderr)
|
||||||
|
if stderr:
|
||||||
|
print(f'stderr: {stderr}')
|
||||||
|
save_test_ail_crawlers_result(False, f'Error: {stderr}')
|
||||||
|
set_current_crawler_status(splash_url, 'Error')
|
||||||
|
|
||||||
|
output = process.stdout.read().decode()
|
||||||
|
#print(output)
|
||||||
|
# error: splash:Connection to proxy refused
|
||||||
|
if 'Connection to proxy refused' in output:
|
||||||
|
print('{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format(splash_url))
|
||||||
|
save_test_ail_crawlers_result(False, 'SPASH, PROXY DOWN OR BAD CONFIGURATION')
|
||||||
|
set_current_crawler_status(splash_url, 'Error')
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
set_current_crawler_status(splash_url, 'Waiting')
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
# ERROR
|
||||||
|
stderr = process.stdout.read().decode()
|
||||||
|
output = process.stdout.read().decode()
|
||||||
|
error = f'-stderr-\n{stderr}\n-stdout-\n{output}'
|
||||||
|
print(error)
|
||||||
|
save_test_ail_crawlers_result(splash_url, error)
|
||||||
|
return False
|
||||||
|
return True
|
||||||
## -- ##
|
## -- ##
|
||||||
|
|
||||||
#### ---- ####
|
#### ---- ####
|
||||||
|
@ -1151,5 +1282,7 @@ def launch_ail_splash_crawler(splash_url, script_options=''):
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
res = get_splash_manager_version()
|
res = get_splash_manager_version()
|
||||||
#res = restart_splash_docker('127.0.0.1:8050', 'default_splash_tor')
|
res = test_ail_crawlers()
|
||||||
|
res = is_test_ail_crawlers_successful()
|
||||||
print(res)
|
print(res)
|
||||||
|
print(get_test_ail_crawlers_message())
|
||||||
|
|
|
@ -81,7 +81,7 @@ function main(splash, args)
|
||||||
html = splash:html(),
|
html = splash:html(),
|
||||||
png = splash:png{render_all=true},
|
png = splash:png{render_all=true},
|
||||||
cookies = splash:get_cookies(),
|
cookies = splash:get_cookies(),
|
||||||
last_url = splash:url()
|
last_url = splash:url(),
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
"""
|
"""
|
||||||
|
@ -174,35 +174,54 @@ class TorSplashCrawler():
|
||||||
def parse(self,response):
|
def parse(self,response):
|
||||||
#print(response.headers)
|
#print(response.headers)
|
||||||
#print(response.status)
|
#print(response.status)
|
||||||
|
#print(response.meta)
|
||||||
|
#print(response.data) # # TODO: handle lua script error
|
||||||
|
#{'type': 'ScriptError', 'info': {'error': "'}' expected (to close '{' at line 47) near 'error_retry'",
|
||||||
|
#'message': '[string "..."]:53: \'}\' expected (to close \'{\' at line 47) near \'error_retry\'',
|
||||||
|
#'type': 'LUA_INIT_ERROR', 'source': '[string "..."]', 'line_number': 53},
|
||||||
|
#'error': 400, 'description': 'Error happened while executing Lua script'}
|
||||||
if response.status == 504:
|
if response.status == 504:
|
||||||
# no response
|
# no response
|
||||||
#print('504 detected')
|
#print('504 detected')
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# LUA ERROR # # TODO: print/display errors
|
# LUA ERROR # # TODO: logs errors
|
||||||
elif 'error' in response.data:
|
elif 'error' in response.data:
|
||||||
if(response.data['error'] == 'network99'):
|
if(response.data['error'] == 'network99'):
|
||||||
## splash restart ##
|
## splash restart ##
|
||||||
error_retry = request.meta.get('error_retry', 0)
|
error_retry = response.meta.get('error_retry', 0)
|
||||||
if error_retry < 3:
|
if error_retry < 3:
|
||||||
error_retry += 1
|
error_retry += 1
|
||||||
url= request.meta['current_url']
|
url = response.data['last_url']
|
||||||
father = request.meta['father']
|
father = response.meta['father']
|
||||||
|
|
||||||
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url)
|
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url)
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
if 'cookies' in response.data:
|
||||||
|
all_cookies = response.data['cookies'] # # TODO: use initial cookie ?????
|
||||||
|
else:
|
||||||
|
all_cookies = []
|
||||||
|
l_cookies = self.build_request_arg(all_cookies)
|
||||||
yield SplashRequest(
|
yield SplashRequest(
|
||||||
url,
|
url,
|
||||||
self.parse,
|
self.parse,
|
||||||
errback=self.errback_catcher,
|
errback=self.errback_catcher,
|
||||||
endpoint='execute',
|
endpoint='execute',
|
||||||
cache_args=['lua_source'],
|
dont_filter=True,
|
||||||
meta={'father': father, 'current_url': url, 'error_retry': error_retry},
|
meta={'father': father, 'current_url': url, 'error_retry': error_retry},
|
||||||
args=self.build_request_arg(response.cookiejar)
|
args=l_cookies
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
if self.requested_mode == 'test':
|
||||||
|
crawlers.save_test_ail_crawlers_result(False, 'Connection to proxy refused')
|
||||||
print('Connection to proxy refused')
|
print('Connection to proxy refused')
|
||||||
|
elif response.data['error'] == 'network3':
|
||||||
|
if self.requested_mode == 'test':
|
||||||
|
crawlers.save_test_ail_crawlers_result(False, 'HostNotFoundError: the remote host name was not found (invalid hostname)')
|
||||||
|
print('HostNotFoundError: the remote host name was not found (invalid hostname)')
|
||||||
else:
|
else:
|
||||||
|
if self.requested_mode == 'test':
|
||||||
|
crawlers.save_test_ail_crawlers_result(False, response.data['error'])
|
||||||
print(response.data['error'])
|
print(response.data['error'])
|
||||||
|
|
||||||
elif response.status != 200:
|
elif response.status != 200:
|
||||||
|
@ -213,6 +232,17 @@ class TorSplashCrawler():
|
||||||
#elif crawlers.is_redirection(self.domains[0], response.data['last_url']):
|
#elif crawlers.is_redirection(self.domains[0], response.data['last_url']):
|
||||||
# pass # ignore response
|
# pass # ignore response
|
||||||
else:
|
else:
|
||||||
|
## TEST MODE ##
|
||||||
|
if self.requested_mode == 'test':
|
||||||
|
if 'It works!' in response.data['html']:
|
||||||
|
print(response.data['html'])
|
||||||
|
#print('success')
|
||||||
|
crawlers.save_test_ail_crawlers_result(True, 'It works!')
|
||||||
|
else:
|
||||||
|
print('TEST ERROR')
|
||||||
|
crawlers.save_test_ail_crawlers_result(False, 'TEST ERROR')
|
||||||
|
return
|
||||||
|
## -- ##
|
||||||
|
|
||||||
item_id = crawlers.create_item_id(self.item_dir, self.domains[0])
|
item_id = crawlers.create_item_id(self.item_dir, self.domains[0])
|
||||||
self.save_crawled_item(item_id, response.data['html'])
|
self.save_crawled_item(item_id, response.data['html'])
|
||||||
|
|
|
@ -45,5 +45,9 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
redis_cache.delete('crawler_request:{}'.format(uuid))
|
redis_cache.delete('crawler_request:{}'.format(uuid))
|
||||||
|
|
||||||
crawler = TorSplashCrawler(splash_url, crawler_options)
|
try:
|
||||||
crawler.crawl(splash_url, service_type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item)
|
crawler = TorSplashCrawler(splash_url, crawler_options)
|
||||||
|
crawler.crawl(splash_url, service_type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
print(e, file=sys.stderr)
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
pymisp
|
pymisp
|
||||||
|
d4-pyclient
|
||||||
|
|
||||||
thehive4py
|
thehive4py
|
||||||
|
|
||||||
|
|
|
@ -504,18 +504,22 @@ def crawler_cookiejar_cookie_json_add_post():
|
||||||
def crawler_splash_setings():
|
def crawler_splash_setings():
|
||||||
all_proxies = crawlers.get_all_proxies_metadata()
|
all_proxies = crawlers.get_all_proxies_metadata()
|
||||||
all_splash = crawlers.get_all_splash_crawler_metadata()
|
all_splash = crawlers.get_all_splash_crawler_metadata()
|
||||||
nb_crawlers_to_launch = crawlers.get_nb_crawlers_to_launch()
|
|
||||||
|
|
||||||
splash_manager_url = crawlers.get_splash_manager_url()
|
splash_manager_url = crawlers.get_splash_manager_url()
|
||||||
api_key = crawlers.get_hidden_splash_api_key()
|
api_key = crawlers.get_hidden_splash_api_key()
|
||||||
is_manager_connected = crawlers.get_splash_manager_connection_metadata(force_ping=True)
|
is_manager_connected = crawlers.get_splash_manager_connection_metadata(force_ping=True)
|
||||||
|
|
||||||
|
nb_crawlers_to_launch = crawlers.get_nb_crawlers_to_launch()
|
||||||
crawler_full_config = Config_DB.get_full_config_by_section('crawler')
|
crawler_full_config = Config_DB.get_full_config_by_section('crawler')
|
||||||
|
is_crawler_working = crawlers.is_test_ail_crawlers_successful()
|
||||||
|
crawler_error_mess = crawlers.get_test_ail_crawlers_message()
|
||||||
|
|
||||||
return render_template("settings_splash_crawler.html",
|
return render_template("settings_splash_crawler.html",
|
||||||
is_manager_connected=is_manager_connected,
|
is_manager_connected=is_manager_connected,
|
||||||
splash_manager_url=splash_manager_url, api_key=api_key,
|
splash_manager_url=splash_manager_url, api_key=api_key,
|
||||||
nb_crawlers_to_launch=nb_crawlers_to_launch,
|
|
||||||
all_splash=all_splash, all_proxies=all_proxies,
|
all_splash=all_splash, all_proxies=all_proxies,
|
||||||
|
nb_crawlers_to_launch=nb_crawlers_to_launch,
|
||||||
|
is_crawler_working=is_crawler_working,
|
||||||
|
crawler_error_mess=crawler_error_mess,
|
||||||
crawler_full_config=crawler_full_config)
|
crawler_full_config=crawler_full_config)
|
||||||
|
|
||||||
@crawler_splash.route('/crawler/settings/crawler_manager', methods=['GET', 'POST'])
|
@crawler_splash.route('/crawler/settings/crawler_manager', methods=['GET', 'POST'])
|
||||||
|
@ -555,4 +559,18 @@ def crawler_splash_setings_crawlers_to_lauch():
|
||||||
return render_template("settings_edit_crawlers_to_launch.html",
|
return render_template("settings_edit_crawlers_to_launch.html",
|
||||||
nb_crawlers_to_launch=nb_crawlers_to_launch)
|
nb_crawlers_to_launch=nb_crawlers_to_launch)
|
||||||
|
|
||||||
|
@crawler_splash.route('/crawler/settings/test_crawler', methods=['GET'])
|
||||||
|
@login_required
|
||||||
|
@login_admin
|
||||||
|
def crawler_splash_setings_test_crawler():
|
||||||
|
crawlers.test_ail_crawlers()
|
||||||
|
return redirect(url_for('crawler_splash.crawler_splash_setings'))
|
||||||
|
|
||||||
|
@crawler_splash.route('/crawler/settings/relaunch_crawler', methods=['GET'])
|
||||||
|
@login_required
|
||||||
|
@login_admin
|
||||||
|
def crawler_splash_setings_relaunch_crawler():
|
||||||
|
crawlers.relaunch_crawlers()
|
||||||
|
return redirect(url_for('crawler_splash.crawler_splash_setings'))
|
||||||
|
|
||||||
## - - ##
|
## - - ##
|
||||||
|
|
|
@ -90,11 +90,165 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div {%if not is_manager_connected%}class="hidden"{%endif%}>
|
||||||
|
|
||||||
|
<div class="card border-secondary mb-4">
|
||||||
|
<div class="card-body text-dark">
|
||||||
|
<h5 class="card-title">All Splash Crawlers:</h5>
|
||||||
|
<table class="table table-striped">
|
||||||
|
<thead class="bg-info text-white">
|
||||||
|
<th>
|
||||||
|
Splash name
|
||||||
|
</th>
|
||||||
|
<th>
|
||||||
|
Proxy
|
||||||
|
</th>
|
||||||
|
<th>
|
||||||
|
Crawler type
|
||||||
|
</th>
|
||||||
|
<th>
|
||||||
|
Description
|
||||||
|
</th>
|
||||||
|
<th></th>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for splash_name in all_splash %}
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
{{splash_name}}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{{all_splash[splash_name]['proxy']}}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{%if all_splash[splash_name]['type']=='tor'%}
|
||||||
|
<i class="fas fa-user-secret"></i>
|
||||||
|
{%else%}
|
||||||
|
<i class="fab fa-html5">
|
||||||
|
{%endif%}
|
||||||
|
{{all_splash[splash_name]['type']}}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{{all_splash[splash_name]['description']}}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<div class="d-flex justify-content-end">
|
||||||
|
<!-- <button class="btn btn-outline-dark px-1 py-0">
|
||||||
|
<i class="fas fa-pencil-alt"></i>
|
||||||
|
</button> -->
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card border-secondary">
|
||||||
|
<div class="card-body text-dark">
|
||||||
|
<h5 class="card-title">All Proxies:</h5>
|
||||||
|
<table class="table table-striped">
|
||||||
|
<thead class="bg-info text-white">
|
||||||
|
<th>
|
||||||
|
Proxy name
|
||||||
|
</th>
|
||||||
|
<th>
|
||||||
|
Host
|
||||||
|
</th>
|
||||||
|
<th>
|
||||||
|
Port
|
||||||
|
</th>
|
||||||
|
<th>
|
||||||
|
Type
|
||||||
|
</th>
|
||||||
|
<th>
|
||||||
|
Crawler Type
|
||||||
|
</th>
|
||||||
|
<th>
|
||||||
|
Description
|
||||||
|
</th>
|
||||||
|
<th></th>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for proxy_name in all_proxies %}
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
{{proxy_name}}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{{all_proxies[proxy_name]['host']}}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{{all_proxies[proxy_name]['port']}}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{{all_proxies[proxy_name]['type']}}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{%if all_proxies[proxy_name]['crawler_type']=='tor'%}
|
||||||
|
<i class="fas fa-user-secret"></i>
|
||||||
|
{%else%}
|
||||||
|
<i class="fab fa-html5">
|
||||||
|
{%endif%}
|
||||||
|
{{all_proxies[proxy_name]['crawler_type']}}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{{all_proxies[proxy_name]['description']}}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<div class="d-flex justify-content-end">
|
||||||
|
<!-- <button class="btn btn-outline-dark px-1 py-0">
|
||||||
|
<i class="fas fa-pencil-alt"></i>
|
||||||
|
</button> -->
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div {%if not is_manager_connected%}class="hidden"{%endif%}>
|
<div class="card mb-3 mt-1">
|
||||||
|
<div class="card-header bg-dark text-white">
|
||||||
|
<span class="badge badge-pill badge-light flex-row-reverse float-right">
|
||||||
|
{% if is_crawler_working %}
|
||||||
|
<div style="color:Green;">
|
||||||
|
<i class="fas fa-check-circle fa-2x"></i>
|
||||||
|
{{crawler_error_mess}}
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<div style="color:Red;">
|
||||||
|
<i class="fas fa-times-circle fa-2x"></i>
|
||||||
|
Error
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</span>
|
||||||
|
<h4>Crawlers</h4>
|
||||||
|
</div>
|
||||||
|
<div class="card-body">
|
||||||
|
|
||||||
<div class="card border-secondary mb-4">
|
<pre class="bg-dark text-white">
|
||||||
|
----------------------------
|
||||||
|
- TOR CRAWLER TEST OUTPUT: -
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
{{crawler_error_mess}}
|
||||||
|
</pre>
|
||||||
|
<a href="{{ url_for('crawler_splash.crawler_splash_setings_test_crawler') }}">
|
||||||
|
<button type="button" class="btn btn-primary">
|
||||||
|
ReRun Test <i class="fas fa-rocket"></i>
|
||||||
|
</button>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
<div class="card border-secondary my-4">
|
||||||
<div class="card-body text-dark">
|
<div class="card-body text-dark">
|
||||||
<h5 class="card-title">Number of Crawlers to Launch:</h5>
|
<h5 class="card-title">Number of Crawlers to Launch:</h5>
|
||||||
<table class="table table-sm">
|
<table class="table table-sm">
|
||||||
|
@ -112,175 +266,60 @@
|
||||||
Edit number of crawlers to launch <i class="fas fa-pencil-alt"></i>
|
Edit number of crawlers to launch <i class="fas fa-pencil-alt"></i>
|
||||||
</button>
|
</button>
|
||||||
</a>
|
</a>
|
||||||
</div>
|
<a href="{{ url_for('crawler_splash.crawler_splash_setings_relaunch_crawler') }}">
|
||||||
</div>
|
<button type="button" class="btn btn-danger">
|
||||||
|
ReLaunch Crawlers <i class="fas fa-redo"></i>
|
||||||
<div class="card border-secondary mb-4">
|
</button>
|
||||||
<div class="card-body text-dark">
|
</a>
|
||||||
<h5 class="card-title">All Splash Crawlers:</h5>
|
|
||||||
<table class="table table-striped">
|
|
||||||
<thead class="bg-info text-white">
|
|
||||||
<th>
|
|
||||||
Splash name
|
|
||||||
</th>
|
|
||||||
<th>
|
|
||||||
Proxy
|
|
||||||
</th>
|
|
||||||
<th>
|
|
||||||
Crawler type
|
|
||||||
</th>
|
|
||||||
<th>
|
|
||||||
Description
|
|
||||||
</th>
|
|
||||||
<th></th>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
{% for splash_name in all_splash %}
|
|
||||||
<tr>
|
|
||||||
<td>
|
|
||||||
{{splash_name}}
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
{{all_splash[splash_name]['proxy']}}
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
{%if all_splash[splash_name]['type']=='tor'%}
|
|
||||||
<i class="fas fa-user-secret"></i>
|
|
||||||
{%else%}
|
|
||||||
<i class="fab fa-html5">
|
|
||||||
{%endif%}
|
|
||||||
{{all_splash[splash_name]['type']}}
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
{{all_splash[splash_name]['description']}}
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
<div class="d-flex justify-content-end">
|
|
||||||
<button class="btn btn-outline-dark px-1 py-0">
|
|
||||||
<i class="fas fa-pencil-alt"></i>
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
{% endfor %}
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="card border-secondary">
|
<div class="card border-secondary">
|
||||||
<div class="card-body text-dark">
|
<div class="card-body text-dark">
|
||||||
<h5 class="card-title">All Proxies:</h5>
|
<h5 class="card-title">Crawlers Settings:</h5>
|
||||||
<table class="table table-striped">
|
|
||||||
|
<table class="table table-striped table-hover">
|
||||||
<thead class="bg-info text-white">
|
<thead class="bg-info text-white">
|
||||||
<th>
|
<th>
|
||||||
Proxy name
|
Key
|
||||||
</th>
|
|
||||||
<th>
|
|
||||||
Host
|
|
||||||
</th>
|
|
||||||
<th>
|
|
||||||
Port
|
|
||||||
</th>
|
|
||||||
<th>
|
|
||||||
Type
|
|
||||||
</th>
|
|
||||||
<th>
|
|
||||||
Crawler Type
|
|
||||||
</th>
|
</th>
|
||||||
<th>
|
<th>
|
||||||
Description
|
Description
|
||||||
</th>
|
</th>
|
||||||
|
<th>
|
||||||
|
Value
|
||||||
|
</th>
|
||||||
<th></th>
|
<th></th>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{% for proxy_name in all_proxies %}
|
{% for config_field in crawler_full_config %}
|
||||||
<tr>
|
<tr>
|
||||||
<td>
|
<td>
|
||||||
{{proxy_name}}
|
{{config_field}}
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
{{all_proxies[proxy_name]['host']}}
|
{{crawler_full_config[config_field]['info']}}
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
{{all_proxies[proxy_name]['port']}}
|
{{crawler_full_config[config_field]['value']}}
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
{{all_proxies[proxy_name]['type']}}
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
{%if all_proxies[proxy_name]['crawler_type']=='tor'%}
|
|
||||||
<i class="fas fa-user-secret"></i>
|
|
||||||
{%else%}
|
|
||||||
<i class="fab fa-html5">
|
|
||||||
{%endif%}
|
|
||||||
{{all_proxies[proxy_name]['crawler_type']}}
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
{{all_proxies[proxy_name]['description']}}
|
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
<div class="d-flex justify-content-end">
|
<div class="d-flex justify-content-end">
|
||||||
<button class="btn btn-outline-dark px-1 py-0">
|
<!-- <button class="btn btn-outline-dark px-1 py-0">
|
||||||
<i class="fas fa-pencil-alt"></i>
|
<i class="fas fa-pencil-alt"></i>
|
||||||
</button>
|
</button> -->
|
||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="card mb-3 mt-1">
|
|
||||||
<div class="card-header bg-dark text-white">
|
|
||||||
<h4>Crawlers Settings</h4>
|
|
||||||
</div>
|
|
||||||
<div class="card-body">
|
|
||||||
|
|
||||||
<table class="table table-striped table-hover">
|
|
||||||
<thead class="bg-info text-white">
|
|
||||||
<th>
|
|
||||||
Key
|
|
||||||
</th>
|
|
||||||
<th>
|
|
||||||
Description
|
|
||||||
</th>
|
|
||||||
<th>
|
|
||||||
Value
|
|
||||||
</th>
|
|
||||||
<th></th>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
{% for config_field in crawler_full_config %}
|
|
||||||
<tr>
|
|
||||||
<td>
|
|
||||||
{{config_field}}
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
{{crawler_full_config[config_field]['info']}}
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
{{crawler_full_config[config_field]['value']}}
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
<div class="d-flex justify-content-end">
|
|
||||||
<button class="btn btn-outline-dark px-1 py-0">
|
|
||||||
<i class="fas fa-pencil-alt"></i>
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
{% endfor %}
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
Loading…
Reference in a new issue