From fc1a04336c0687484210cac3a71bb4ccde8a5a93 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 15 Apr 2019 11:01:33 +0200 Subject: [PATCH] fix: [dynamic update v1.5] make sure updates are excuted in the correct order + fix nb_seen_hash dynamic update --- update/v1.4/Update-ARDB_Metadata.py | 1 - update/v1.4/Update-ARDB_Tags.py | 126 ++++++++++--------- var/www/modules/showpaste/Flask_showpaste.py | 39 ++++-- 3 files changed, 90 insertions(+), 76 deletions(-) diff --git a/update/v1.4/Update-ARDB_Metadata.py b/update/v1.4/Update-ARDB_Metadata.py index 4fddb110..20cfed72 100755 --- a/update/v1.4/Update-ARDB_Metadata.py +++ b/update/v1.4/Update-ARDB_Metadata.py @@ -19,7 +19,6 @@ def update_hash_item(has_type): if r_serv_metadata.exists(base64_key): new_base64_key = base64_key.replace(PASTES_FOLDER, '', 1) res = r_serv_metadata.renamenx(base64_key, new_base64_key) - print(res) if res == 0: print('same key, double name: {}'.format(item_path)) # fusion diff --git a/update/v1.4/Update-ARDB_Tags.py b/update/v1.4/Update-ARDB_Tags.py index 2f4ea4eb..e76e9ab5 100755 --- a/update/v1.4/Update-ARDB_Tags.py +++ b/update/v1.4/Update-ARDB_Tags.py @@ -57,75 +57,77 @@ if __name__ == '__main__': db=2018, decode_responses=True) - print('Updating ARDB_Tags ...') - index = 0 - start = time.time() + if r_serv.exists('v1.5:onions') and r_serv.exists('v1.5:metadata'): - tags_list = r_serv_tag.smembers('list_tags') - # create temp tags metadata - tag_metadata = {} - for tag in tags_list: - tag_metadata[tag] = {} - tag_metadata[tag]['first_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'first_seen') - if tag_metadata[tag]['first_seen'] is None: - tag_metadata[tag]['first_seen'] = 99999999 - else: - tag_metadata[tag]['first_seen'] = int(tag_metadata[tag]['first_seen']) + print('Updating ARDB_Tags ...') + index = 0 + start = time.time() - tag_metadata[tag]['last_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen') - if tag_metadata[tag]['last_seen'] is None: - tag_metadata[tag]['last_seen'] = 0 - else: - tag_metadata[tag]['last_seen'] = int(tag_metadata[tag]['last_seen']) + tags_list = r_serv_tag.smembers('list_tags') + # create temp tags metadata + tag_metadata = {} + for tag in tags_list: + tag_metadata[tag] = {} + tag_metadata[tag]['first_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'first_seen') + if tag_metadata[tag]['first_seen'] is None: + tag_metadata[tag]['first_seen'] = 99999999 + else: + tag_metadata[tag]['first_seen'] = int(tag_metadata[tag]['first_seen']) - for tag in tags_list: + tag_metadata[tag]['last_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen') + if tag_metadata[tag]['last_seen'] is None: + tag_metadata[tag]['last_seen'] = 0 + else: + tag_metadata[tag]['last_seen'] = int(tag_metadata[tag]['last_seen']) - all_item = r_serv_tag.smembers(tag) - for item_path in all_item: - splitted_item_path = item_path.split('/') - #print(tag) - #print(item_path) - try: - item_date = int( ''.join([splitted_item_path[-4], splitted_item_path[-3], splitted_item_path[-2]]) ) - except IndexError: + for tag in tags_list: + + all_item = r_serv_tag.smembers(tag) + for item_path in all_item: + splitted_item_path = item_path.split('/') + #print(tag) + #print(item_path) + try: + item_date = int( ''.join([splitted_item_path[-4], splitted_item_path[-3], splitted_item_path[-2]]) ) + except IndexError: + r_serv_tag.srem(tag, item_path) + continue + + # remove absolute path + new_path = item_path.replace(PASTES_FOLDER, '', 1) + if new_path != item_path: + # save in queue absolute path to remove + r_serv_tag.sadd('maj:v1.5:absolute_path_to_rename', item_path) + + # update metadata first_seen + if item_date < tag_metadata[tag]['first_seen']: + tag_metadata[tag]['first_seen'] = item_date + r_serv_tag.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date) + + # update metadata last_seen + if item_date > tag_metadata[tag]['last_seen']: + tag_metadata[tag]['last_seen'] = item_date + last_seen_db = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen') + if last_seen_db: + if item_date > int(last_seen_db): + r_serv_tag.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date) + else: + tag_metadata[tag]['last_seen'] = last_seen_db + + r_serv_tag.sadd('{}:{}'.format(tag, item_date), new_path) + r_serv_tag.hincrby('daily_tags:{}'.format(item_date), tag, 1) + + # clean db r_serv_tag.srem(tag, item_path) - continue + index = index + 1 - # remove absolute path - new_path = item_path.replace(PASTES_FOLDER, '', 1) - if new_path != item_path: - # save in queue absolute path to remove - r_serv_tag.sadd('maj:v1.5:absolute_path_to_rename', item_path) + #flush browse importante pastes db + r_important_paste_2018.flushdb() + r_important_paste_2019.flushdb() - # update metadata first_seen - if item_date < tag_metadata[tag]['first_seen']: - tag_metadata[tag]['first_seen'] = item_date - r_serv_tag.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date) - - # update metadata last_seen - if item_date > tag_metadata[tag]['last_seen']: - tag_metadata[tag]['last_seen'] = item_date - last_seen_db = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen') - if last_seen_db: - if item_date > int(last_seen_db): - r_serv_tag.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date) - else: - tag_metadata[tag]['last_seen'] = last_seen_db - - r_serv_tag.sadd('{}:{}'.format(tag, item_date), new_path) - r_serv_tag.hincrby('daily_tags:{}'.format(item_date), tag, 1) - - # clean db - r_serv_tag.srem(tag, item_path) - index = index + 1 - - #flush browse importante pastes db - r_important_paste_2018.flushdb() - r_important_paste_2019.flushdb() - - end = time.time() + end = time.time() - print('Updating ARDB_Tags Done => {} paths: {} s'.format(index, end - start)) + print('Updating ARDB_Tags Done => {} paths: {} s'.format(index, end - start)) - r_serv.set('v1.5:tags', 1) + r_serv.set('v1.5:tags', 1) diff --git a/var/www/modules/showpaste/Flask_showpaste.py b/var/www/modules/showpaste/Flask_showpaste.py index 9ea5f22a..becf86d0 100644 --- a/var/www/modules/showpaste/Flask_showpaste.py +++ b/var/www/modules/showpaste/Flask_showpaste.py @@ -157,13 +157,13 @@ def showpaste(content_range, requested_path): # item list not updated if nb_in_file is None: l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1) - for paste in l_pastes: + for paste_name in l_pastes: # dynamic update - if PASTES_FOLDER in paste: - score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste) - r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste) - paste = paste.replace(PASTES_FOLDER, '', 1) - r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste) + if PASTES_FOLDER in paste_name: + score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste_name) + r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste_name) + paste_name = paste_name.replace(PASTES_FOLDER, '', 1) + r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste_name) nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path) nb_in_file = int(nb_in_file) estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') @@ -282,7 +282,8 @@ def show_item_min(requested_path , content_range=0): p_hashtype_list = [] - l_tags = r_serv_metadata.smembers('tag:'+requested_path) + print(requested_path) + l_tags = r_serv_metadata.smembers('tag:'+relative_path) if relative_path is not None: l_tags.union( r_serv_metadata.smembers('tag:'+relative_path) ) item_info['tags'] = l_tags @@ -291,10 +292,22 @@ def show_item_min(requested_path , content_range=0): l_64 = [] # load hash files - if r_serv_metadata.scard('hash_paste:'+requested_path) > 0: - set_b64 = r_serv_metadata.smembers('hash_paste:'+requested_path) + if r_serv_metadata.scard('hash_paste:'+relative_path) > 0: + set_b64 = r_serv_metadata.smembers('hash_paste:'+relative_path) for hash in set_b64: - nb_in_file = int(r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)) + nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, relative_path) + # item list not updated + if nb_in_file is None: + l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1) + for paste_name in l_pastes: + # dynamic update + if PASTES_FOLDER in paste_name: + score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste_name) + r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste_name) + paste_name = paste_name.replace(PASTES_FOLDER, '', 1) + r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste_name) + nb_in_file = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), relative_path) + nb_in_file = int(nb_in_file) estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') file_type = estimated_type.split('/')[0] # set file icon @@ -326,9 +339,9 @@ def show_item_min(requested_path , content_range=0): crawler_metadata = {} if 'infoleak:submission="crawler"' in l_tags: crawler_metadata['get_metadata'] = True - crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain') - crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father') - crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link') + crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+relative_path, 'domain') + crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+relative_path, 'father') + crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+relative_path,'real_link') crawler_metadata['screenshot'] = paste.get_p_rel_path() else: crawler_metadata['get_metadata'] = False