chg: [Crawler] css + limit splash RAM

This commit is contained in:
Terrtia 2018-09-27 16:47:48 +02:00
parent e357dce59b
commit ecb2857151
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
7 changed files with 28 additions and 27 deletions

View file

@ -201,28 +201,32 @@ function launching_scripts {
}
function launching_crawler {
CONFIG=$AIL_BIN/packages/config.cfg
lport=$(awk '/^\[Crawler\]/{f=1} f==1&&/^splash_onion_port/{print $3;exit}' "${CONFIG}")
if [[ ! $iscrawler ]]; then
CONFIG=$AIL_BIN/packages/config.cfg
lport=$(awk '/^\[Crawler\]/{f=1} f==1&&/^splash_onion_port/{print $3;exit}' "${CONFIG}")
IFS='-' read -ra PORTS <<< "$lport"
if [ ${#PORTS[@]} -eq 1 ]
then
first_port=${PORTS[0]}
last_port=${PORTS[0]}
else
first_port=${PORTS[0]}
last_port=${PORTS[1]}
fi
IFS='-' read -ra PORTS <<< "$lport"
if [ ${#PORTS[@]} -eq 1 ]
then
first_port=${PORTS[0]}
last_port=${PORTS[0]}
else
first_port=${PORTS[0]}
last_port=${PORTS[1]}
fi
screen -dmS "Crawler_AIL"
sleep 0.1
for ((i=first_port;i<=last_port;i++)); do
screen -S "Crawler_AIL" -X screen -t "onion_crawler:$i" bash -c 'cd '${AIL_BIN}'; ./Crawler.py onion '$i'; read x'
screen -dmS "Crawler_AIL"
sleep 0.1
done
echo -e $GREEN"\t* Launching Crawler_AIL scripts"$DEFAULT
for ((i=first_port;i<=last_port;i++)); do
screen -S "Crawler_AIL" -X screen -t "onion_crawler:$i" bash -c 'cd '${AIL_BIN}'; ./Crawler.py onion '$i'; read x'
sleep 0.1
done
echo -e $GREEN"\t* Launching Crawler_AIL scripts"$DEFAULT
else
echo -e $RED"\t* A screen is already launched"$DEFAULT
fi
}
function shutting_down_redis {

View file

@ -114,7 +114,7 @@ class TorSplashCrawler():
# down ?
print('504 detected')
elif response.status != 200:
#print('other: {}'.format(response.status))
print('other response: {}'.format(response.status))
#print(error_log)
#detect connection to proxy refused
error_log = (json.loads(response.body.decode()))

View file

@ -37,7 +37,7 @@ sleep 0.1
for ((i=0;i<=$((${n} - 1));i++)); do
port_number=$((${p} + $i))
screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -p '$port_number':8050 --cpus=1 -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash; read x'
screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -p '$port_number':8050 --cpus=1 --memory=4.5G -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash; read x'
sleep 0.1
echo " Splash server launched on port $port_number"
done

View file

@ -6,6 +6,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Hidden Service - AIL</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap.min.css') }}" rel="stylesheet">

View file

@ -6,6 +6,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Show Domain - AIL</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap.min.css') }}" rel="stylesheet">

View file

@ -186,7 +186,6 @@ def showpaste(content_range, requested_path):
crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain')
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father')
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link')
crawler_metadata['external_links'] =r_serv_metadata.scard('paste_onion_external_links:'+requested_path)
crawler_metadata['screenshot'] = paste.get_p_rel_path()
else:
crawler_metadata['get_metadata'] = False

View file

@ -424,9 +424,9 @@
<div class="col-md-5">
<div class="row">
<div class="panel panel-default">
<div class="panel panel-info">
<div class="panel-heading">
<i id="flash-tld" class="glyphicon glyphicon-flash " flash-tld=""></i> Graph
<i class="fa fa-eye-slash"></i> Crawled Paste
</div>
<table class="table table-hover table-striped">
@ -443,10 +443,6 @@
<td>Source link</td>
<td>{{ crawler_metadata['real_link'] }}</td>
</tr>
<tr>
<td>External links</td>
<td>{{ crawler_metadata['external_links'] }}</td>
</tr>
</tbody>
</table>
</div>