chg: [Crawler] css + limit splash RAM

This commit is contained in:
Terrtia 2018-09-27 16:47:48 +02:00
parent e357dce59b
commit ecb2857151
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
7 changed files with 28 additions and 27 deletions

View file

@ -201,6 +201,7 @@ function launching_scripts {
} }
function launching_crawler { function launching_crawler {
if [[ ! $iscrawler ]]; then
CONFIG=$AIL_BIN/packages/config.cfg CONFIG=$AIL_BIN/packages/config.cfg
lport=$(awk '/^\[Crawler\]/{f=1} f==1&&/^splash_onion_port/{print $3;exit}' "${CONFIG}") lport=$(awk '/^\[Crawler\]/{f=1} f==1&&/^splash_onion_port/{print $3;exit}' "${CONFIG}")
@ -223,6 +224,9 @@ function launching_crawler {
done done
echo -e $GREEN"\t* Launching Crawler_AIL scripts"$DEFAULT echo -e $GREEN"\t* Launching Crawler_AIL scripts"$DEFAULT
else
echo -e $RED"\t* A screen is already launched"$DEFAULT
fi
} }
function shutting_down_redis { function shutting_down_redis {

View file

@ -114,7 +114,7 @@ class TorSplashCrawler():
# down ? # down ?
print('504 detected') print('504 detected')
elif response.status != 200: elif response.status != 200:
#print('other: {}'.format(response.status)) print('other response: {}'.format(response.status))
#print(error_log) #print(error_log)
#detect connection to proxy refused #detect connection to proxy refused
error_log = (json.loads(response.body.decode())) error_log = (json.loads(response.body.decode()))

View file

@ -37,7 +37,7 @@ sleep 0.1
for ((i=0;i<=$((${n} - 1));i++)); do for ((i=0;i<=$((${n} - 1));i++)); do
port_number=$((${p} + $i)) port_number=$((${p} + $i))
screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -p '$port_number':8050 --cpus=1 -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash; read x' screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -p '$port_number':8050 --cpus=1 --memory=4.5G -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash; read x'
sleep 0.1 sleep 0.1
echo " Splash server launched on port $port_number" echo " Splash server launched on port $port_number"
done done

View file

@ -6,6 +6,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Hidden Service - AIL</title> <title>Hidden Service - AIL</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
<!-- Core CSS --> <!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap.min.css') }}" rel="stylesheet"> <link href="{{ url_for('static', filename='css/bootstrap.min.css') }}" rel="stylesheet">

View file

@ -6,6 +6,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Show Domain - AIL</title> <title>Show Domain - AIL</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
<!-- Core CSS --> <!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap.min.css') }}" rel="stylesheet"> <link href="{{ url_for('static', filename='css/bootstrap.min.css') }}" rel="stylesheet">

View file

@ -186,7 +186,6 @@ def showpaste(content_range, requested_path):
crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain') crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain')
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father') crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father')
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link') crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link')
crawler_metadata['external_links'] =r_serv_metadata.scard('paste_onion_external_links:'+requested_path)
crawler_metadata['screenshot'] = paste.get_p_rel_path() crawler_metadata['screenshot'] = paste.get_p_rel_path()
else: else:
crawler_metadata['get_metadata'] = False crawler_metadata['get_metadata'] = False

View file

@ -424,9 +424,9 @@
<div class="col-md-5"> <div class="col-md-5">
<div class="row"> <div class="row">
<div class="panel panel-default"> <div class="panel panel-info">
<div class="panel-heading"> <div class="panel-heading">
<i id="flash-tld" class="glyphicon glyphicon-flash " flash-tld=""></i> Graph <i class="fa fa-eye-slash"></i> Crawled Paste
</div> </div>
<table class="table table-hover table-striped"> <table class="table table-hover table-striped">
@ -443,10 +443,6 @@
<td>Source link</td> <td>Source link</td>
<td>{{ crawler_metadata['real_link'] }}</td> <td>{{ crawler_metadata['real_link'] }}</td>
</tr> </tr>
<tr>
<td>External links</td>
<td>{{ crawler_metadata['external_links'] }}</td>
</tr>
</tbody> </tbody>
</table> </table>
</div> </div>