diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index 46a0a0cd..f30aac07 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -377,14 +377,15 @@ def api_create_cookie(user_id, cookiejar_uuid, cookie_dict): #### CRAWLER TASK #### def create_crawler_task(url, screenshot=True, har=True, depth_limit=1, max_pages=100, auto_crawler=False, crawler_delta=3600, cookiejar_uuid=None, user_agent=None): + crawler_config = {} crawler_config['depth_limit'] = depth_limit crawler_config['closespider_pagecount'] = max_pages if screenshot: - crawler_config['screenshot'] = True + crawler_config['png'] = True else: - crawler_config['screenshot'] = False + crawler_config['png'] = False if har: crawler_config['har'] = True else: diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index 60fc103a..41f45acb 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -219,13 +219,13 @@ class TorSplashCrawler(): all_cookies = [] # SCREENSHOT - if 'png' in response.data: + if 'png' in response.data and self.png: sha256_string = Screenshot.save_crawled_screeshot(response.data['png'], 5000000, f_save=self.requested_mode) if sha256_string: Screenshot.save_item_relationship(sha256_string, item_id) Screenshot.save_domain_relationship(sha256_string, self.domains[0]) # HAR - if 'har' in response.data: + if 'har' in response.data and self.har: crawlers.save_har(self.har_dir, item_id, response.data['har']) le = LinkExtractor(allow_domains=self.domains, unique=True) diff --git a/bin/torcrawler/launch_splash_crawler.sh b/bin/torcrawler/launch_splash_crawler.sh index dcbe7bee..87884a57 100755 --- a/bin/torcrawler/launch_splash_crawler.sh +++ b/bin/torcrawler/launch_splash_crawler.sh @@ -1,5 +1,7 @@ #!/bin/bash +issplashed=`screen -ls | egrep '[0-9]+.Docker_Splash' | cut -d. -f1` + usage() { echo "Usage: sudo $0 [-f ] [-p ] [-n ]" 1>&2; echo " -f: absolute path to splash docker proxy-profiles directory (used for proxy configuration)"; echo " -p: number of the first splash server port number. This number is incremented for the others splash server"; @@ -34,10 +36,6 @@ while getopts ":p:f:n:u:" o; do done shift $((OPTIND-1)) -if [ -z "${u}" ]; then - u=3000; -fi - if [ -z "${p}" ] || [ -z "${f}" ] || [ -z "${n}" ]; then usage; fi @@ -47,6 +45,11 @@ DEFAULT="\\033[0;39m" GREEN="\\033[1;32m" WHITE="\\033[0;02m" +if [ "$EUID" -ne 0 ]; then + echo -e $RED"\t* Please run as root or sudo.\n"$DEFAULT + exit 1 +fi + if [ ! -d "${f}" ]; then printf "$RED\n Error -f, proxy-profiles directory: $WHITE${f}$RED not found\n$DEFAULT Please check if you enter the correct path\n" exit 1 @@ -57,6 +60,15 @@ if [ ! -f "${f}default.ini" ]; then exit 1 fi +if [[ $issplashed ]]; then + echo -e $RED"\t* A screen is already launched, please kill it before creating another one."$DEFAULT + exit 1 +fi + +if [ -z "${u}" ]; then + u=3000; +fi + screen -dmS "Docker_Splash" sleep 0.1