mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 00:28:22 +00:00
chg: [Crawler] css + limit splash RAM
This commit is contained in:
parent
e357dce59b
commit
ecb2857151
7 changed files with 28 additions and 27 deletions
|
@ -201,28 +201,32 @@ function launching_scripts {
|
|||
}
|
||||
|
||||
function launching_crawler {
|
||||
CONFIG=$AIL_BIN/packages/config.cfg
|
||||
lport=$(awk '/^\[Crawler\]/{f=1} f==1&&/^splash_onion_port/{print $3;exit}' "${CONFIG}")
|
||||
if [[ ! $iscrawler ]]; then
|
||||
CONFIG=$AIL_BIN/packages/config.cfg
|
||||
lport=$(awk '/^\[Crawler\]/{f=1} f==1&&/^splash_onion_port/{print $3;exit}' "${CONFIG}")
|
||||
|
||||
IFS='-' read -ra PORTS <<< "$lport"
|
||||
if [ ${#PORTS[@]} -eq 1 ]
|
||||
then
|
||||
first_port=${PORTS[0]}
|
||||
last_port=${PORTS[0]}
|
||||
else
|
||||
first_port=${PORTS[0]}
|
||||
last_port=${PORTS[1]}
|
||||
fi
|
||||
IFS='-' read -ra PORTS <<< "$lport"
|
||||
if [ ${#PORTS[@]} -eq 1 ]
|
||||
then
|
||||
first_port=${PORTS[0]}
|
||||
last_port=${PORTS[0]}
|
||||
else
|
||||
first_port=${PORTS[0]}
|
||||
last_port=${PORTS[1]}
|
||||
fi
|
||||
|
||||
screen -dmS "Crawler_AIL"
|
||||
sleep 0.1
|
||||
|
||||
for ((i=first_port;i<=last_port;i++)); do
|
||||
screen -S "Crawler_AIL" -X screen -t "onion_crawler:$i" bash -c 'cd '${AIL_BIN}'; ./Crawler.py onion '$i'; read x'
|
||||
screen -dmS "Crawler_AIL"
|
||||
sleep 0.1
|
||||
done
|
||||
|
||||
echo -e $GREEN"\t* Launching Crawler_AIL scripts"$DEFAULT
|
||||
for ((i=first_port;i<=last_port;i++)); do
|
||||
screen -S "Crawler_AIL" -X screen -t "onion_crawler:$i" bash -c 'cd '${AIL_BIN}'; ./Crawler.py onion '$i'; read x'
|
||||
sleep 0.1
|
||||
done
|
||||
|
||||
echo -e $GREEN"\t* Launching Crawler_AIL scripts"$DEFAULT
|
||||
else
|
||||
echo -e $RED"\t* A screen is already launched"$DEFAULT
|
||||
fi
|
||||
}
|
||||
|
||||
function shutting_down_redis {
|
||||
|
|
|
@ -114,7 +114,7 @@ class TorSplashCrawler():
|
|||
# down ?
|
||||
print('504 detected')
|
||||
elif response.status != 200:
|
||||
#print('other: {}'.format(response.status))
|
||||
print('other response: {}'.format(response.status))
|
||||
#print(error_log)
|
||||
#detect connection to proxy refused
|
||||
error_log = (json.loads(response.body.decode()))
|
||||
|
|
|
@ -37,7 +37,7 @@ sleep 0.1
|
|||
|
||||
for ((i=0;i<=$((${n} - 1));i++)); do
|
||||
port_number=$((${p} + $i))
|
||||
screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -p '$port_number':8050 --cpus=1 -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash; read x'
|
||||
screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -p '$port_number':8050 --cpus=1 --memory=4.5G -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash; read x'
|
||||
sleep 0.1
|
||||
echo " Splash server launched on port $port_number"
|
||||
done
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Hidden Service - AIL</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
|
||||
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap.min.css') }}" rel="stylesheet">
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Show Domain - AIL</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
|
||||
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap.min.css') }}" rel="stylesheet">
|
||||
|
|
|
@ -186,7 +186,6 @@ def showpaste(content_range, requested_path):
|
|||
crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain')
|
||||
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father')
|
||||
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link')
|
||||
crawler_metadata['external_links'] =r_serv_metadata.scard('paste_onion_external_links:'+requested_path)
|
||||
crawler_metadata['screenshot'] = paste.get_p_rel_path()
|
||||
else:
|
||||
crawler_metadata['get_metadata'] = False
|
||||
|
|
|
@ -424,9 +424,9 @@
|
|||
|
||||
<div class="col-md-5">
|
||||
<div class="row">
|
||||
<div class="panel panel-default">
|
||||
<div class="panel panel-info">
|
||||
<div class="panel-heading">
|
||||
<i id="flash-tld" class="glyphicon glyphicon-flash " flash-tld=""></i> Graph
|
||||
<i class="fa fa-eye-slash"></i> Crawled Paste
|
||||
</div>
|
||||
|
||||
<table class="table table-hover table-striped">
|
||||
|
@ -443,10 +443,6 @@
|
|||
<td>Source link</td>
|
||||
<td>{{ crawler_metadata['real_link'] }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>External links</td>
|
||||
<td>{{ crawler_metadata['external_links'] }}</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
|
Loading…
Reference in a new issue