From 11d537e2eb028910748990c5592ab614d4cf91d2 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 8 Jan 2021 17:37:18 +0100 Subject: [PATCH] chg: [screenshot + har directory] add option to change screenshots directory --- bin/lib/ConfigLoader.py | 9 +++++++++ bin/lib/Screenshot.py | 2 +- bin/packages/HiddenServices.py | 7 +++---- bin/packages/Item.py | 5 +++-- bin/torcrawler/TorSplashCrawler.py | 8 ++++++-- configs/core.cfg.sample | 9 ++++++++- update/v1.5/Update-ARDB_Onions_screenshots.py | 2 +- var/www/modules/Flask_config.py | 2 +- 8 files changed, 32 insertions(+), 12 deletions(-) diff --git a/bin/lib/ConfigLoader.py b/bin/lib/ConfigLoader.py index c244b2e5..262a44bd 100755 --- a/bin/lib/ConfigLoader.py +++ b/bin/lib/ConfigLoader.py @@ -41,6 +41,15 @@ class ConfigLoader(object): db=self.cfg.getint(redis_name, "db"), decode_responses=decode_responses ) + def get_files_directory(self, key_name): + directory_path = self.cfg.get('Directories', key_name) + # full path + if directory_path[0] == '/': + return directory_path + else: + directory_path = os.path.join(os.environ['AIL_HOME'], directory_path) + return directory_path + def get_config_str(self, section, key_name): return self.cfg.get(section, key_name) diff --git a/bin/lib/Screenshot.py b/bin/lib/Screenshot.py index 46141e30..3f198f52 100755 --- a/bin/lib/Screenshot.py +++ b/bin/lib/Screenshot.py @@ -20,7 +20,7 @@ import ConfigLoader config_loader = ConfigLoader.ConfigLoader() r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') +SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot') config_loader = None # get screenshot relative path diff --git a/bin/packages/HiddenServices.py b/bin/packages/HiddenServices.py index 8ed7372b..7b0c444a 100755 --- a/bin/packages/HiddenServices.py +++ b/bin/packages/HiddenServices.py @@ -60,15 +60,14 @@ class HiddenServices(object): self.paste_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) self.paste_crawled_directory = os.path.join(self.paste_directory, config_loader.get_config_str("Directories", "crawled")) self.paste_crawled_directory_name = config_loader.get_config_str("Directories", "crawled") - self.screenshot_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) - self.screenshot_directory_screenshot = os.path.join(self.screenshot_directory, 'screenshot') + self.screenshot_directory = config_loader.get_files_directory('screenshot') elif type == 'i2p': self.paste_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) - self.screenshot_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) + self.screenshot_directory = config_loader.get_files_directory('screenshot') else: ## TODO: # FIXME: add error pass - + config_loader = None #def remove_absolute_path_link(self, key, value): diff --git a/bin/packages/Item.py b/bin/packages/Item.py index e2b08f7d..36a236e0 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -34,7 +34,8 @@ PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '') r_cache = config_loader.get_redis_conn("Redis_Cache") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -screenshot_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) +screenshot_directory = config_loader.get_files_directory('screenshot') +har_directory = config_loader.get_files_directory('har') config_loader = None @@ -388,7 +389,7 @@ def get_item_screenshot(item_id): return '' def get_item_har_name(item_id): - har_path = os.path.join(screenshot_directory, item_id) + '.json' + har_path = os.path.join(har_directory, item_id) + '.json' if os.path.isfile(har_path): return har_path else: diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index 13e6aaa6..17438d60 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -26,7 +26,7 @@ sys.path.append(os.environ['AIL_BIN']) from Helper import Process sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -#import ConfigLoader +import ConfigLoader import Screenshot import crawlers @@ -133,7 +133,11 @@ class TorSplashCrawler(): config_section = 'Crawler' self.p = Process(config_section) self.item_dir = os.path.join(self.p.config.get("Directories", "crawled"), date_str ) - self.har_dir = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot"), date_str ) + + config_loader = ConfigLoader.ConfigLoader() + self.har_dir = os.path.join(config_loader.get_files_directory('har') , date_str ) + config_loader = None + self.r_serv_log_submit = redis.StrictRedis( host=self.p.config.get("Redis_Log_submit", "host"), port=self.p.config.getint("Redis_Log_submit", "port"), diff --git a/configs/core.cfg.sample b/configs/core.cfg.sample index 8a7b86a7..9dd00e3b 100644 --- a/configs/core.cfg.sample +++ b/configs/core.cfg.sample @@ -4,7 +4,8 @@ dicofilters = Dicos pastes = PASTES hash = HASHS crawled = crawled -crawled_screenshot = CRAWLED_SCREENSHOT +har = CRAWLED_SCREENSHOT +screenshot = CRAWLED_SCREENSHOT/screenshot wordtrending_csv = var/www/static/csv/wordstrendingdata wordsfile = files/wordfile @@ -221,6 +222,11 @@ host = localhost port = 6382 db = 10 +[Kvrocks_Meta] +host = localhost +port = 6383 +db = 0 + [Url] cc_critical = DE @@ -278,6 +284,7 @@ default_crawler_closespider_pagecount = 50 default_crawler_user_agent = Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0 splash_url = http://127.0.0.1 splash_port = 8050-8052 +domain_proxy = onion.foundation [IP] # list of comma-separated CIDR that you wish to be alerted for. e.g: diff --git a/update/v1.5/Update-ARDB_Onions_screenshots.py b/update/v1.5/Update-ARDB_Onions_screenshots.py index 3327878f..41f17c4a 100755 --- a/update/v1.5/Update-ARDB_Onions_screenshots.py +++ b/update/v1.5/Update-ARDB_Onions_screenshots.py @@ -33,7 +33,7 @@ if __name__ == '__main__': config_loader = ConfigLoader.ConfigLoader() SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) - NEW_SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') + NEW_SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot') PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index a1a8de6b..7d11e484 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -100,7 +100,7 @@ dict_update_description = {'v1.5':{'nb_background_update': 5, 'update_warning_me UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted') PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' -SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') +SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot') REPO_ORIGIN = 'https://github.com/ail-project/ail-framework.git'