From 6165987ec3aea4a2f51e22a353a7d1909ce1b463 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 10 Apr 2019 15:36:41 +0200 Subject: [PATCH] chg: [DB Migration] add hash dynamic update --- update/v1.4/Update-ARDB_Metadata.py | 28 +++++++++++++++---- update/v1.4/Update-ARDB_Onions.py | 6 ++++ var/www/modules/Flask_config.py | 1 + .../modules/hashDecoded/Flask_hashDecoded.py | 7 +++++ var/www/modules/showpaste/Flask_showpaste.py | 1 + 5 files changed, 37 insertions(+), 6 deletions(-) diff --git a/update/v1.4/Update-ARDB_Metadata.py b/update/v1.4/Update-ARDB_Metadata.py index 2e7fbb3d..7f7951ae 100755 --- a/update/v1.4/Update-ARDB_Metadata.py +++ b/update/v1.4/Update-ARDB_Metadata.py @@ -10,8 +10,7 @@ import configparser def update_hash_item(has_type): #get all hash items: - #all_base64 = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type)) - all_hash_items = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\":20190307'.format(has_type)) + all_base64 = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type)) for item_path in all_hash_items: if PASTES_FOLDER in item_path: base64_key = '{}_paste:{}'.format(has_type, item_path) @@ -76,13 +75,30 @@ if __name__ == '__main__': update_hash_item('hexadecimal') # Update onion metadata - #all_crawled_items = r_serv_tag.smembers('infoleak:submission=\"crawler\"') - all_crawled_items = r_serv_tag.smembers('infoleak:submission=\"crawler\":20190227') + all_crawled_items = r_serv_tag.smembers('infoleak:submission=\"crawler\"') for item_path in all_crawled_items: + domain = None if PASTES_FOLDER in item_path: - item_metadata = 'paste_metadata:{}'.format(item_path) + old_item_metadata = 'paste_metadata:{}'.format(item_path) + item_path = item_path.replace(PASTES_FOLDER, '', 1) + new_item_metadata = 'paste_metadata:{}'.format(item_path) ## TODO: catch error - r_serv_metadata.rename(item_metadata, item_metadata.replace(PASTES_FOLDER, '', 1)) + r_serv_metadata.rename(old_item_metadata, new_item_metadata) + # update domain port + domain = r_serv_metadata.hget('paste_metadata:{}'.format(item_path), 'domain') + if domain: + r_serv_metadata.hset('paste_metadata:{}'.format(item_path), 'domain', '{}:80'.format(domain)) + super_father = r_serv_metadata.hget('paste_metadata:{}'.format(item_path), 'super_father') + if super_father: + if PASTES_FOLDER in super_father: + r_serv_metadata.hset('paste_metadata:{}'.format(item_path), 'super_father', super_father.replace(PASTES_FOLDER, '', 1)) + father = r_serv_metadata.hget('paste_metadata:{}'.format(item_path), 'father') + if father: + if PASTES_FOLDER in father: + r_serv_metadata.hset('paste_metadata:{}'.format(item_path), 'father', father.replace(PASTES_FOLDER, '', 1)) + + + ###################################################################################################################### ###################################################################################################################### diff --git a/update/v1.4/Update-ARDB_Onions.py b/update/v1.4/Update-ARDB_Onions.py index 0e72dc6f..f88cc3a4 100755 --- a/update/v1.4/Update-ARDB_Onions.py +++ b/update/v1.4/Update-ARDB_Onions.py @@ -121,11 +121,17 @@ if __name__ == '__main__': ''' + # update crawler queue for elem in r_serv_onion.smembers('onion_crawler_queue'): if PASTES_FOLDER in elem: r_serv_onion.srem('onion_crawler_queue', elem) r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1)) index = index +1 + for elem in r_serv_onion.smembers('onion_crawler_priority_queue'): + if PASTES_FOLDER in elem: + r_serv_onion.srem('onion_crawler_queue', elem) + r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1)) + index = index +1 ''' diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index f8c128bc..f516a60b 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -163,6 +163,7 @@ bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info'] UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted') PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) +PASTES_FOLDERS = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot")) max_dashboard_logs = int(cfg.get("Flask", "max_dashboard_logs")) diff --git a/var/www/modules/hashDecoded/Flask_hashDecoded.py b/var/www/modules/hashDecoded/Flask_hashDecoded.py index 62a32f75..8f8e7279 100644 --- a/var/www/modules/hashDecoded/Flask_hashDecoded.py +++ b/var/www/modules/hashDecoded/Flask_hashDecoded.py @@ -25,6 +25,7 @@ baseUrl = Flask_config.baseUrl r_serv_metadata = Flask_config.r_serv_metadata vt_enabled = Flask_config.vt_enabled vt_auth = Flask_config.vt_auth +PASTES_FOLDER = Flask_config.PASTES_FOLDERS hashDecoded = Blueprint('hashDecoded', __name__, template_folder='templates') @@ -589,6 +590,12 @@ def hash_graph_node_json(): #get related paste l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1) for paste in l_pastes: + # dynamic update + if PASTES_FOLDER in paste: + score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste) + r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste) + paste = paste.replace(PASTES_FOLDER, '', 1) + r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste) url = paste #nb_seen_in_this_paste = nb_in_file = int(r_serv_metadata.zscore('nb_seen_hash:'+hash, paste)) nb_hash_in_paste = r_serv_metadata.scard('hash_paste:'+paste) diff --git a/var/www/modules/showpaste/Flask_showpaste.py b/var/www/modules/showpaste/Flask_showpaste.py index 276c1eee..a457615e 100644 --- a/var/www/modules/showpaste/Flask_showpaste.py +++ b/var/www/modules/showpaste/Flask_showpaste.py @@ -154,6 +154,7 @@ def showpaste(content_range, requested_path): if r_serv_metadata.scard('hash_paste:'+requested_path) > 0: set_b64 = r_serv_metadata.smembers('hash_paste:'+requested_path) for hash in set_b64: + print(requested_path) nb_in_file = int(r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)) estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') file_type = estimated_type.split('/')[0]