mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-30 01:37:17 +00:00
fix: [crawler] option to disable screenshots and har
This commit is contained in:
parent
d6897904d6
commit
d20ae35548
3 changed files with 21 additions and 8 deletions
|
@ -377,14 +377,15 @@ def api_create_cookie(user_id, cookiejar_uuid, cookie_dict):
|
||||||
|
|
||||||
#### CRAWLER TASK ####
|
#### CRAWLER TASK ####
|
||||||
def create_crawler_task(url, screenshot=True, har=True, depth_limit=1, max_pages=100, auto_crawler=False, crawler_delta=3600, cookiejar_uuid=None, user_agent=None):
|
def create_crawler_task(url, screenshot=True, har=True, depth_limit=1, max_pages=100, auto_crawler=False, crawler_delta=3600, cookiejar_uuid=None, user_agent=None):
|
||||||
|
|
||||||
crawler_config = {}
|
crawler_config = {}
|
||||||
crawler_config['depth_limit'] = depth_limit
|
crawler_config['depth_limit'] = depth_limit
|
||||||
crawler_config['closespider_pagecount'] = max_pages
|
crawler_config['closespider_pagecount'] = max_pages
|
||||||
|
|
||||||
if screenshot:
|
if screenshot:
|
||||||
crawler_config['screenshot'] = True
|
crawler_config['png'] = True
|
||||||
else:
|
else:
|
||||||
crawler_config['screenshot'] = False
|
crawler_config['png'] = False
|
||||||
if har:
|
if har:
|
||||||
crawler_config['har'] = True
|
crawler_config['har'] = True
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -219,13 +219,13 @@ class TorSplashCrawler():
|
||||||
all_cookies = []
|
all_cookies = []
|
||||||
|
|
||||||
# SCREENSHOT
|
# SCREENSHOT
|
||||||
if 'png' in response.data:
|
if 'png' in response.data and self.png:
|
||||||
sha256_string = Screenshot.save_crawled_screeshot(response.data['png'], 5000000, f_save=self.requested_mode)
|
sha256_string = Screenshot.save_crawled_screeshot(response.data['png'], 5000000, f_save=self.requested_mode)
|
||||||
if sha256_string:
|
if sha256_string:
|
||||||
Screenshot.save_item_relationship(sha256_string, item_id)
|
Screenshot.save_item_relationship(sha256_string, item_id)
|
||||||
Screenshot.save_domain_relationship(sha256_string, self.domains[0])
|
Screenshot.save_domain_relationship(sha256_string, self.domains[0])
|
||||||
# HAR
|
# HAR
|
||||||
if 'har' in response.data:
|
if 'har' in response.data and self.har:
|
||||||
crawlers.save_har(self.har_dir, item_id, response.data['har'])
|
crawlers.save_har(self.har_dir, item_id, response.data['har'])
|
||||||
|
|
||||||
le = LinkExtractor(allow_domains=self.domains, unique=True)
|
le = LinkExtractor(allow_domains=self.domains, unique=True)
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
issplashed=`screen -ls | egrep '[0-9]+.Docker_Splash' | cut -d. -f1`
|
||||||
|
|
||||||
usage() { echo "Usage: sudo $0 [-f <config_absolute_path>] [-p <port_start>] [-n <number_of_splash_servers>]" 1>&2;
|
usage() { echo "Usage: sudo $0 [-f <config_absolute_path>] [-p <port_start>] [-n <number_of_splash_servers>]" 1>&2;
|
||||||
echo " -f: absolute path to splash docker proxy-profiles directory (used for proxy configuration)";
|
echo " -f: absolute path to splash docker proxy-profiles directory (used for proxy configuration)";
|
||||||
echo " -p: number of the first splash server port number. This number is incremented for the others splash server";
|
echo " -p: number of the first splash server port number. This number is incremented for the others splash server";
|
||||||
|
@ -34,10 +36,6 @@ while getopts ":p:f:n:u:" o; do
|
||||||
done
|
done
|
||||||
shift $((OPTIND-1))
|
shift $((OPTIND-1))
|
||||||
|
|
||||||
if [ -z "${u}" ]; then
|
|
||||||
u=3000;
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -z "${p}" ] || [ -z "${f}" ] || [ -z "${n}" ]; then
|
if [ -z "${p}" ] || [ -z "${f}" ] || [ -z "${n}" ]; then
|
||||||
usage;
|
usage;
|
||||||
fi
|
fi
|
||||||
|
@ -47,6 +45,11 @@ DEFAULT="\\033[0;39m"
|
||||||
GREEN="\\033[1;32m"
|
GREEN="\\033[1;32m"
|
||||||
WHITE="\\033[0;02m"
|
WHITE="\\033[0;02m"
|
||||||
|
|
||||||
|
if [ "$EUID" -ne 0 ]; then
|
||||||
|
echo -e $RED"\t* Please run as root or sudo.\n"$DEFAULT
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
if [ ! -d "${f}" ]; then
|
if [ ! -d "${f}" ]; then
|
||||||
printf "$RED\n Error -f, proxy-profiles directory: $WHITE${f}$RED not found\n$DEFAULT Please check if you enter the correct path\n"
|
printf "$RED\n Error -f, proxy-profiles directory: $WHITE${f}$RED not found\n$DEFAULT Please check if you enter the correct path\n"
|
||||||
exit 1
|
exit 1
|
||||||
|
@ -57,6 +60,15 @@ if [ ! -f "${f}default.ini" ]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ $issplashed ]]; then
|
||||||
|
echo -e $RED"\t* A screen is already launched, please kill it before creating another one."$DEFAULT
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "${u}" ]; then
|
||||||
|
u=3000;
|
||||||
|
fi
|
||||||
|
|
||||||
screen -dmS "Docker_Splash"
|
screen -dmS "Docker_Splash"
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue