mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
fix: [crawler] option to disable screenshots and har
This commit is contained in:
parent
d6897904d6
commit
d20ae35548
3 changed files with 21 additions and 8 deletions
|
@ -377,14 +377,15 @@ def api_create_cookie(user_id, cookiejar_uuid, cookie_dict):
|
|||
|
||||
#### CRAWLER TASK ####
|
||||
def create_crawler_task(url, screenshot=True, har=True, depth_limit=1, max_pages=100, auto_crawler=False, crawler_delta=3600, cookiejar_uuid=None, user_agent=None):
|
||||
|
||||
crawler_config = {}
|
||||
crawler_config['depth_limit'] = depth_limit
|
||||
crawler_config['closespider_pagecount'] = max_pages
|
||||
|
||||
if screenshot:
|
||||
crawler_config['screenshot'] = True
|
||||
crawler_config['png'] = True
|
||||
else:
|
||||
crawler_config['screenshot'] = False
|
||||
crawler_config['png'] = False
|
||||
if har:
|
||||
crawler_config['har'] = True
|
||||
else:
|
||||
|
|
|
@ -219,13 +219,13 @@ class TorSplashCrawler():
|
|||
all_cookies = []
|
||||
|
||||
# SCREENSHOT
|
||||
if 'png' in response.data:
|
||||
if 'png' in response.data and self.png:
|
||||
sha256_string = Screenshot.save_crawled_screeshot(response.data['png'], 5000000, f_save=self.requested_mode)
|
||||
if sha256_string:
|
||||
Screenshot.save_item_relationship(sha256_string, item_id)
|
||||
Screenshot.save_domain_relationship(sha256_string, self.domains[0])
|
||||
# HAR
|
||||
if 'har' in response.data:
|
||||
if 'har' in response.data and self.har:
|
||||
crawlers.save_har(self.har_dir, item_id, response.data['har'])
|
||||
|
||||
le = LinkExtractor(allow_domains=self.domains, unique=True)
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
issplashed=`screen -ls | egrep '[0-9]+.Docker_Splash' | cut -d. -f1`
|
||||
|
||||
usage() { echo "Usage: sudo $0 [-f <config_absolute_path>] [-p <port_start>] [-n <number_of_splash_servers>]" 1>&2;
|
||||
echo " -f: absolute path to splash docker proxy-profiles directory (used for proxy configuration)";
|
||||
echo " -p: number of the first splash server port number. This number is incremented for the others splash server";
|
||||
|
@ -34,10 +36,6 @@ while getopts ":p:f:n:u:" o; do
|
|||
done
|
||||
shift $((OPTIND-1))
|
||||
|
||||
if [ -z "${u}" ]; then
|
||||
u=3000;
|
||||
fi
|
||||
|
||||
if [ -z "${p}" ] || [ -z "${f}" ] || [ -z "${n}" ]; then
|
||||
usage;
|
||||
fi
|
||||
|
@ -47,6 +45,11 @@ DEFAULT="\\033[0;39m"
|
|||
GREEN="\\033[1;32m"
|
||||
WHITE="\\033[0;02m"
|
||||
|
||||
if [ "$EUID" -ne 0 ]; then
|
||||
echo -e $RED"\t* Please run as root or sudo.\n"$DEFAULT
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -d "${f}" ]; then
|
||||
printf "$RED\n Error -f, proxy-profiles directory: $WHITE${f}$RED not found\n$DEFAULT Please check if you enter the correct path\n"
|
||||
exit 1
|
||||
|
@ -57,6 +60,15 @@ if [ ! -f "${f}default.ini" ]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $issplashed ]]; then
|
||||
echo -e $RED"\t* A screen is already launched, please kill it before creating another one."$DEFAULT
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "${u}" ]; then
|
||||
u=3000;
|
||||
fi
|
||||
|
||||
screen -dmS "Docker_Splash"
|
||||
sleep 0.1
|
||||
|
||||
|
|
Loading…
Reference in a new issue