From 056bad7a49a87c59a42c72a40f8446969199c370 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 17 Dec 2019 15:13:36 +0100 Subject: [PATCH] chg: [screenshot correlation + v2.6] add screenshot-domain correlation + v2.6 update --- bin/lib/Correlate_object.py | 21 +++- bin/lib/Domain.py | 11 ++ bin/lib/Screenshot.py | 104 ++++++++++++++++++ bin/packages/Item.py | 16 +++ bin/torcrawler/TorSplashCrawler.py | 3 + bin/update-background.py | 20 ++++ update/v2.6/Update.py | 34 ++++++ update/v2.6/Update.sh | 39 +++++++ update/v2.6/Update_screenshots.py | 95 ++++++++++++++++ var/www/blueprints/correlation.py | 3 + var/www/modules/Flask_config.py | 4 +- .../correlation/legend_graph_correlation.html | 14 +++ .../correlation/show_correlation.html | 4 + 13 files changed, 363 insertions(+), 5 deletions(-) create mode 100755 bin/lib/Screenshot.py create mode 100755 update/v2.6/Update.py create mode 100755 update/v2.6/Update.sh create mode 100755 update/v2.6/Update_screenshots.py diff --git a/bin/lib/Correlate_object.py b/bin/lib/Correlate_object.py index b6799b6b..df483bbc 100755 --- a/bin/lib/Correlate_object.py +++ b/bin/lib/Correlate_object.py @@ -12,6 +12,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader import Decoded import Domain +import Screenshot sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Pgp @@ -26,7 +27,7 @@ def get_all_correlation_names(): ''' Return a list of all available correlations ''' - return ['pgp', 'cryptocurrency', 'decoded'] + return ['pgp', 'cryptocurrency', 'decoded', 'screenshot'] def get_all_correlation_objects(): ''' @@ -45,6 +46,8 @@ def exist_object(object_type, correlation_id, type_id=None): return Pgp.pgp._exist_corelation_field(type_id, correlation_id) elif object_type == 'cryptocurrency': return Cryptocurrency.cryptocurrency._exist_corelation_field(type_id, correlation_id) + elif object_type == 'screenshot': + return Screenshot.exist_screenshot(correlation_id) else: return False @@ -59,6 +62,8 @@ def get_object_metadata(object_type, correlation_id, type_id=None): return Pgp.pgp.get_metadata(type_id, correlation_id) elif object_type == 'cryptocurrency': return Cryptocurrency.cryptocurrency.get_metadata(type_id, correlation_id) + elif object_type == 'screenshot': + return Screenshot.get_metadata(correlation_id) def get_object_correlation(object_type, value, correlation_names, correlation_objects, requested_correl_type=None): if object_type == 'domain': @@ -71,7 +76,8 @@ def get_object_correlation(object_type, value, correlation_names, correlation_ob return Pgp.pgp.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects) elif object_type == 'cryptocurrency': return Cryptocurrency.cryptocurrency.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects) - + elif object_type == 'screenshot': + return Screenshot.get_screenshot_correlated_object(value, correlation_objects) return {} def get_correlation_node_icon(correlation_name, correlation_type=None, value=None): @@ -130,6 +136,10 @@ def get_correlation_node_icon(correlation_name, correlation_type=None, value=Non else: icon_text = '\uf249' + elif correlation_name == 'screenshot': + node_color = '#E1F5DF' + icon_text = '\uf03e' + elif correlation_name == 'domain': node_radius = 5 node_color = '#3DA760' @@ -162,6 +172,9 @@ def get_item_url(correlation_name, value, correlation_type=None): elif correlation_name == 'decoded': endpoint = 'correlation.show_correlation' url = url_for(endpoint, object_type="decoded", correlation_id=value) + elif correlation_name == 'screenshot': + endpoint = 'correlation.show_correlation' + url = url_for(endpoint, object_type="screenshot", correlation_id=value) elif correlation_name == 'domain': endpoint = 'crawler_splash.showDomain' url = url_for(endpoint, domain=value) @@ -241,7 +254,7 @@ def get_graph_node_object_correlation(object_type, root_value, mode, correlation if mode=="inter": nodes.add(correl_node_id) links.add((root_node_id, correl_node_id)) - if correl in ('decoded', 'domain', 'paste'): + if correl in ('decoded', 'screenshot', 'domain', 'paste'): for correl_val in root_correlation[correl]: correl_node_id = create_node_id(correl, correl_val) @@ -254,7 +267,7 @@ def get_graph_node_object_correlation(object_type, root_value, mode, correlation res = get_object_correlation(correl, correl_val, correlation_names, correlation_objects) if res: for corr_obj in res: - if corr_obj in ('decoded', 'domain', 'paste'): + if corr_obj in ('decoded', 'domain', 'paste', 'screenshot'): for correl_key_val in res[corr_obj]: #filter root value if correl_key_val == root_value: diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index 54fd678a..38fc406f 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -26,6 +26,7 @@ cryptocurrency = Cryptocurrency.cryptocurrency sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader import Correlate_object +import Screenshot config_loader = ConfigLoader.ConfigLoader() r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") @@ -432,6 +433,14 @@ def get_domain_decoded(domain): ''' return Decoded.get_domain_decoded_item(domain) +def get_domain_screenshot(domain): + ''' + Retun all decoded item of a given domain. + + :param domain: crawled domain + ''' + return Screenshot.get_domain_screenshot(domain) + def get_domain_all_correlation(domain, correlation_names=[], get_nb=False): ''' @@ -453,6 +462,8 @@ def get_domain_all_correlation(domain, correlation_names=[], get_nb=False): res = get_domain_pgp(domain, get_nb=get_nb) elif correlation_name=='decoded': res = get_domain_decoded(domain) + elif correlation_name=='screenshot': + res = get_domain_screenshot(domain) else: res = None # add correllation to dict diff --git a/bin/lib/Screenshot.py b/bin/lib/Screenshot.py new file mode 100755 index 00000000..72456e46 --- /dev/null +++ b/bin/lib/Screenshot.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import redis + + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) +import Item +import Date + + +import ConfigLoader + +config_loader = ConfigLoader.ConfigLoader() +r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") +r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") +SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') +config_loader = None + +# get screenshot relative path +def get_screenshot_rel_path(sha256_string, add_extension=False): + screenshot_path = os.path.join(sha256_string[0:2], sha256_string[2:4], sha256_string[4:6], sha256_string[6:8], sha256_string[8:10], sha256_string[10:12], sha256_string[12:]) + if add_extension: + screenshot_path = screenshot_path + '.png' + return screenshot_path + +def exist_screenshot(sha256_string): + screenshot_path = os.path.join(SCREENSHOT_FOLDER, get_screenshot_rel_path(sha256_string, add_extension=True)) + return os.path.isfile(screenshot_path) + +def get_metadata(sha256_string): + metadata_dict = {} + metadata_dict['sha256'] = sha256_string + return metadata_dict + + +def get_screenshot_items_list(sha256_string): + res = r_serv_onion.smembers('screenshot:{}'.format(sha256_string)) + if res: + return list(res) + else: + return [] + +def get_item_screenshot_list(item_id): + ''' + Retun all decoded item of a given item id. + + :param item_id: item id + ''' + screenshot = r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot') + if screenshot: + return [screenshot] + else: + return [] + +def get_domain_screenshot(domain): + ''' + Retun all screenshot of a given domain. + + :param domain: crawled domain + ''' + res = r_serv_onion.smembers('domain_screenshot:{}'.format(domain)) + if res: + return list(res) + else: + return [] + +def get_screenshot_domain(sha256_string): + ''' + Retun all domain of a given screenshot. + + :param sha256_string: sha256_string + ''' + res = r_serv_onion.smembers('screenshot_domain:{}'.format(sha256_string)) + if res: + return list(res) + else: + return [] + +def get_screenshot_correlated_object(sha256_string, correlation_objects=[]): + ''' + Retun all correlation of a given sha256. + + :param sha1_string: sha256 + :type sha1_string: str + + :return: a dict of all correlation for a given sha256 + :rtype: dict + ''' + if correlation_objects is None: + correlation_objects = Correlation.get_all_correlation_objects() + decoded_correlation = {} + for correlation_object in correlation_objects: + if correlation_object == 'paste': + res = get_screenshot_items_list(sha256_string) + elif correlation_object == 'domain': + res = get_screenshot_domain(sha256_string) + else: + res = None + if res: + decoded_correlation[correlation_object] = res + return decoded_correlation diff --git a/bin/packages/Item.py b/bin/packages/Item.py index e4723a7c..156115b0 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -17,6 +17,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader import Correlate_object import Decoded +import Screenshot config_loader = ConfigLoader.ConfigLoader() PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' @@ -168,6 +169,14 @@ def get_item_decoded(item_id): ''' return Decoded.get_item_decoded(item_id) +def get_item_all_screenshot(item_id): + ''' + Return all screenshot of a given item. + + :param item_id: item id + ''' + return Screenshot.get_item_screenshot_list(item_id) + def get_item_all_correlation(item_id, correlation_names=[], get_nb=False): ''' Retun all correlation of a given item id. @@ -188,6 +197,8 @@ def get_item_all_correlation(item_id, correlation_names=[], get_nb=False): res = get_item_pgp(item_id, get_nb=get_nb) elif correlation_name=='decoded': res = get_item_decoded(item_id) + elif correlation_name=='screenshot': + res = get_item_all_screenshot(item_id) else: res = None # add correllation to dict @@ -256,6 +267,11 @@ def is_item_in_domain(domain, item_id): def get_item_domain(item_id): return item_id[19:-36] +def get_domain(item_id): + item_id = item_id.split('/') + item_id = item_id[-1] + return item_id[:-36] + def get_item_children(item_id): return list(r_serv_metadata.smembers('paste_children:{}'.format(item_id))) diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index 9d20f1f3..67c3cffc 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -201,6 +201,9 @@ class TorSplashCrawler(): self.r_serv_metadata.hset('paste_metadata:{}'.format(relative_filename_paste), 'screenshot', hash) # add sha256 metadata self.r_serv_onion.sadd('screenshot:{}'.format(hash), relative_filename_paste) + # domain map + r_serv_onion.sadd('domain_screenshot:{}'.format(domain[0]), hash) + r_serv_onion.sadd('screenshot_domain:{}'.format(hash), domain[0]) if 'har' in response.data: dirname = os.path.dirname(filename_har) diff --git a/bin/update-background.py b/bin/update-background.py index 3d38e69e..a75eb60d 100755 --- a/bin/update-background.py +++ b/bin/update-background.py @@ -74,3 +74,23 @@ if __name__ == "__main__": r_serv.delete('ail:current_background_update') r_serv.delete('update:nb_elem_to_convert') r_serv.delete('update:nb_elem_converted') + + if r_serv.sismember('ail:to_update', 'v2.6'): + new_version = 'v2.6' + r_serv.delete('ail:update_error') + r_serv.delete('ail:current_background_script_stat') + r_serv.set('ail:update_in_progress', new_version) + r_serv.set('ail:current_background_update', new_version) + r_serv.set('ail:current_background_script', 'screenshot update') + + update_file = os.path.join(os.environ['AIL_HOME'], 'update', new_version, 'Update_screenshots.py') + process = subprocess.run(['python' ,update_file]) + + update_progress = r_serv_db.get('ail:current_background_script_stat') + if update_progress: + if int(update_progress) == 100: + r_serv.delete('ail:update_in_progress') + r_serv.delete('ail:current_background_script') + r_serv.delete('ail:current_background_script_stat') + r_serv.delete('ail:current_background_update') + r_serv_db.srem('ail:to_update', new_version) diff --git a/update/v2.6/Update.py b/update/v2.6/Update.py new file mode 100755 index 00000000..c8cdb5ac --- /dev/null +++ b/update/v2.6/Update.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import re +import sys +import time +import redis +import datetime + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +new_version = 'v2.6' + +if __name__ == '__main__': + + start_deb = time.time() + + config_loader = ConfigLoader.ConfigLoader() + r_serv = config_loader.get_redis_conn("ARDB_DB") + config_loader = None + + #Set current update_in_progress + r_serv.set('ail:update_in_progress', new_version) + r_serv.set('ail:current_background_update', new_version) + + r_serv.sadd('ail:to_update', new_version) + + #Set current ail version + r_serv.set('ail:version', new_version) + + #Set current ail version + r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v2.6/Update.sh b/update/v2.6/Update.sh new file mode 100755 index 00000000..874bf0ec --- /dev/null +++ b/update/v2.6/Update.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; + +export PATH=$AIL_HOME:$PATH +export PATH=$AIL_REDIS:$PATH +export PATH=$AIL_ARDB:$PATH +export PATH=$AIL_BIN:$PATH +export PATH=$AIL_FLASK:$PATH + +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" + +echo -e $GREEN"Shutting down AIL ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +bash ${AIL_BIN}/LAUNCH.sh -lav & +wait +echo "" + +echo "" +echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT +echo "" +python ${AIL_HOME}/update/v2.6/Update.py +wait +echo "" +echo "" + +echo "" +echo -e $GREEN"Shutting down ARDB ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +exit 0 diff --git a/update/v2.6/Update_screenshots.py b/update/v2.6/Update_screenshots.py new file mode 100755 index 00000000..2551b8d0 --- /dev/null +++ b/update/v2.6/Update_screenshots.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import time +import redis +import datetime + +from hashlib import sha256 + +from pyfaup.faup import Faup + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) +import Item + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +def get_all_item(screenshot_sha256): + return r_serv_onion.smembers('screenshot:{}'.format(screenshot_sha256)) + +def sanitize_domain(domain): + faup.decode(domain) + domain_sanitized = faup.get() + return domain_sanitized['domain'].lower() + +def update_db(screenshot_sha256): + screenshot_items = get_all_item(screenshot_sha256) + if screenshot_items: + for item_id in screenshot_items: + item_id = item_id.replace(PASTES_FOLDER+'/', '', 1) # remove root path + domain = Item.get_domain(item_id) + + domain_sanitized = sanitize_domain(domain) + if domain != domain_sanitized: + r_serv_onion.sadd('incorrect_domain', domain) + domain = domain_sanitized + + #print(item_id) + #print(domain) + + r_serv_onion.sadd('domain_screenshot:{}'.format(domain), screenshot_sha256) + r_serv_onion.sadd('screenshot_domain:{}'.format(screenshot_sha256), domain) + else: + pass + # broken screenshot + r_serv_onion.sadd('broken_screenshot', screenshot_sha256) + + +if __name__ == '__main__': + + start_deb = time.time() + faup = Faup() + + config_loader = ConfigLoader.ConfigLoader() + + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') + + r_serv_db = config_loader.get_redis_conn("ARDB_DB") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") + config_loader = None + + r_serv_db.set('ail:update_in_progress', 'v2.6') + r_serv_db.set('ail:current_background_update', 'v2.6') + + r_serv_db.set('ail:current_background_script_stat', 20) + r_serv_db.set('ail:current_background_script', 'screenshot update') + + nb = 0 + + if os.path.isdir(SCREENSHOT_FOLDER): + for root, dirs, files in os.walk(SCREENSHOT_FOLDER, topdown=False): + #print(dirs) + for name in files: + nb = nb + 1 + screenshot_sha256 = os.path.join(root, name) + screenshot_sha256 = screenshot_sha256[:-4] # remove .png + screenshot_sha256 = screenshot_sha256.replace(SCREENSHOT_FOLDER, '', 1) + screenshot_sha256 = screenshot_sha256.replace('/', '') + update_db(screenshot_sha256) + #print('Screenshot updated: {}'.format(nb)) + r_serv_db.set('ail:current_background_script', 'screenshot updated: {}'.format(nb)) + + r_serv_db.set('ail:current_background_script_stat', 100) + + end = time.time() + print('ALL screenshot updated: {} in {} s'.format(nb, end - start_deb)) + + r_serv_db.delete('ail:update_in_progress') + r_serv_db.delete('ail:current_background_script') + r_serv_db.delete('ail:current_background_script_stat') + r_serv_db.delete('ail:current_background_update') + r_serv_db.srem('ail:to_update', 'v2.6') diff --git a/var/www/blueprints/correlation.py b/var/www/blueprints/correlation.py index ee9a51b0..a6c4ade4 100644 --- a/var/www/blueprints/correlation.py +++ b/var/www/blueprints/correlation.py @@ -147,6 +147,9 @@ def show_correlation(): correl_option = request.form.get('DecodedCheck') if correl_option: correlation_names.append('decoded') + correl_option = request.form.get('ScreenshotCheck') + if correl_option: + correlation_names.append('screenshot') # correlation_objects correl_option = request.form.get('DomainCheck') if correl_option: diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index 0619b564..fcedd7e7 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -88,7 +88,9 @@ bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info'] dict_update_description = {'v1.5':{'nb_background_update': 5, 'update_warning_message': 'An Update is running on the background. Some informations like Tags, screenshot can be', 'update_warning_message_notice_me': 'missing from the UI.'}, 'v2.4':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Tags/Correlation can be', - 'update_warning_message_notice_me': 'missing from the UI.'} + 'update_warning_message_notice_me': 'missing from the UI.'}, + 'v2.6':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Tags/Correlation can be', + 'update_warning_message_notice_me': 'missing from the UI.'} } UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted') diff --git a/var/www/templates/correlation/legend_graph_correlation.html b/var/www/templates/correlation/legend_graph_correlation.html index 9d562b58..45cee03c 100644 --- a/var/www/templates/correlation/legend_graph_correlation.html +++ b/var/www/templates/correlation/legend_graph_correlation.html @@ -7,6 +7,9 @@ Decoded: + + Screenshot: + Pgp: @@ -96,6 +99,17 @@ other types of file + +
+ + + + + + + screenshot +
+
diff --git a/var/www/templates/correlation/show_correlation.html b/var/www/templates/correlation/show_correlation.html index ea8a7cc3..1e65d654 100644 --- a/var/www/templates/correlation/show_correlation.html +++ b/var/www/templates/correlation/show_correlation.html @@ -144,6 +144,10 @@
+
+ + +