fix: [crawler] option to disable screenshots and har

This commit is contained in:
Terrtia 2020-06-04 16:05:32 +02:00
parent d6897904d6
commit d20ae35548
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
3 changed files with 21 additions and 8 deletions

View file

@ -377,14 +377,15 @@ def api_create_cookie(user_id, cookiejar_uuid, cookie_dict):
#### CRAWLER TASK #### #### CRAWLER TASK ####
def create_crawler_task(url, screenshot=True, har=True, depth_limit=1, max_pages=100, auto_crawler=False, crawler_delta=3600, cookiejar_uuid=None, user_agent=None): def create_crawler_task(url, screenshot=True, har=True, depth_limit=1, max_pages=100, auto_crawler=False, crawler_delta=3600, cookiejar_uuid=None, user_agent=None):
crawler_config = {} crawler_config = {}
crawler_config['depth_limit'] = depth_limit crawler_config['depth_limit'] = depth_limit
crawler_config['closespider_pagecount'] = max_pages crawler_config['closespider_pagecount'] = max_pages
if screenshot: if screenshot:
crawler_config['screenshot'] = True crawler_config['png'] = True
else: else:
crawler_config['screenshot'] = False crawler_config['png'] = False
if har: if har:
crawler_config['har'] = True crawler_config['har'] = True
else: else:

View file

@ -219,13 +219,13 @@ class TorSplashCrawler():
all_cookies = [] all_cookies = []
# SCREENSHOT # SCREENSHOT
if 'png' in response.data: if 'png' in response.data and self.png:
sha256_string = Screenshot.save_crawled_screeshot(response.data['png'], 5000000, f_save=self.requested_mode) sha256_string = Screenshot.save_crawled_screeshot(response.data['png'], 5000000, f_save=self.requested_mode)
if sha256_string: if sha256_string:
Screenshot.save_item_relationship(sha256_string, item_id) Screenshot.save_item_relationship(sha256_string, item_id)
Screenshot.save_domain_relationship(sha256_string, self.domains[0]) Screenshot.save_domain_relationship(sha256_string, self.domains[0])
# HAR # HAR
if 'har' in response.data: if 'har' in response.data and self.har:
crawlers.save_har(self.har_dir, item_id, response.data['har']) crawlers.save_har(self.har_dir, item_id, response.data['har'])
le = LinkExtractor(allow_domains=self.domains, unique=True) le = LinkExtractor(allow_domains=self.domains, unique=True)

View file

@ -1,5 +1,7 @@
#!/bin/bash #!/bin/bash
issplashed=`screen -ls | egrep '[0-9]+.Docker_Splash' | cut -d. -f1`
usage() { echo "Usage: sudo $0 [-f <config_absolute_path>] [-p <port_start>] [-n <number_of_splash_servers>]" 1>&2; usage() { echo "Usage: sudo $0 [-f <config_absolute_path>] [-p <port_start>] [-n <number_of_splash_servers>]" 1>&2;
echo " -f: absolute path to splash docker proxy-profiles directory (used for proxy configuration)"; echo " -f: absolute path to splash docker proxy-profiles directory (used for proxy configuration)";
echo " -p: number of the first splash server port number. This number is incremented for the others splash server"; echo " -p: number of the first splash server port number. This number is incremented for the others splash server";
@ -34,10 +36,6 @@ while getopts ":p:f:n:u:" o; do
done done
shift $((OPTIND-1)) shift $((OPTIND-1))
if [ -z "${u}" ]; then
u=3000;
fi
if [ -z "${p}" ] || [ -z "${f}" ] || [ -z "${n}" ]; then if [ -z "${p}" ] || [ -z "${f}" ] || [ -z "${n}" ]; then
usage; usage;
fi fi
@ -47,6 +45,11 @@ DEFAULT="\\033[0;39m"
GREEN="\\033[1;32m" GREEN="\\033[1;32m"
WHITE="\\033[0;02m" WHITE="\\033[0;02m"
if [ "$EUID" -ne 0 ]; then
echo -e $RED"\t* Please run as root or sudo.\n"$DEFAULT
exit 1
fi
if [ ! -d "${f}" ]; then if [ ! -d "${f}" ]; then
printf "$RED\n Error -f, proxy-profiles directory: $WHITE${f}$RED not found\n$DEFAULT Please check if you enter the correct path\n" printf "$RED\n Error -f, proxy-profiles directory: $WHITE${f}$RED not found\n$DEFAULT Please check if you enter the correct path\n"
exit 1 exit 1
@ -57,6 +60,15 @@ if [ ! -f "${f}default.ini" ]; then
exit 1 exit 1
fi fi
if [[ $issplashed ]]; then
echo -e $RED"\t* A screen is already launched, please kill it before creating another one."$DEFAULT
exit 1
fi
if [ -z "${u}" ]; then
u=3000;
fi
screen -dmS "Docker_Splash" screen -dmS "Docker_Splash"
sleep 0.1 sleep 0.1