fix: [dynamic update v1.5] make sure updates are excuted in the correct order + fix nb_seen_hash dynamic update

This commit is contained in:
Terrtia 2019-04-15 11:01:33 +02:00
parent a3167a740a
commit fc1a04336c
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
3 changed files with 90 additions and 76 deletions

View file

@ -19,7 +19,6 @@ def update_hash_item(has_type):
if r_serv_metadata.exists(base64_key): if r_serv_metadata.exists(base64_key):
new_base64_key = base64_key.replace(PASTES_FOLDER, '', 1) new_base64_key = base64_key.replace(PASTES_FOLDER, '', 1)
res = r_serv_metadata.renamenx(base64_key, new_base64_key) res = r_serv_metadata.renamenx(base64_key, new_base64_key)
print(res)
if res == 0: if res == 0:
print('same key, double name: {}'.format(item_path)) print('same key, double name: {}'.format(item_path))
# fusion # fusion

View file

@ -57,6 +57,8 @@ if __name__ == '__main__':
db=2018, db=2018,
decode_responses=True) decode_responses=True)
if r_serv.exists('v1.5:onions') and r_serv.exists('v1.5:metadata'):
print('Updating ARDB_Tags ...') print('Updating ARDB_Tags ...')
index = 0 index = 0
start = time.time() start = time.time()

View file

@ -157,13 +157,13 @@ def showpaste(content_range, requested_path):
# item list not updated # item list not updated
if nb_in_file is None: if nb_in_file is None:
l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1) l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1)
for paste in l_pastes: for paste_name in l_pastes:
# dynamic update # dynamic update
if PASTES_FOLDER in paste: if PASTES_FOLDER in paste_name:
score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste) score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste_name)
r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste) r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste_name)
paste = paste.replace(PASTES_FOLDER, '', 1) paste_name = paste_name.replace(PASTES_FOLDER, '', 1)
r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste) r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste_name)
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path) nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
nb_in_file = int(nb_in_file) nb_in_file = int(nb_in_file)
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
@ -282,7 +282,8 @@ def show_item_min(requested_path , content_range=0):
p_hashtype_list = [] p_hashtype_list = []
l_tags = r_serv_metadata.smembers('tag:'+requested_path) print(requested_path)
l_tags = r_serv_metadata.smembers('tag:'+relative_path)
if relative_path is not None: if relative_path is not None:
l_tags.union( r_serv_metadata.smembers('tag:'+relative_path) ) l_tags.union( r_serv_metadata.smembers('tag:'+relative_path) )
item_info['tags'] = l_tags item_info['tags'] = l_tags
@ -291,10 +292,22 @@ def show_item_min(requested_path , content_range=0):
l_64 = [] l_64 = []
# load hash files # load hash files
if r_serv_metadata.scard('hash_paste:'+requested_path) > 0: if r_serv_metadata.scard('hash_paste:'+relative_path) > 0:
set_b64 = r_serv_metadata.smembers('hash_paste:'+requested_path) set_b64 = r_serv_metadata.smembers('hash_paste:'+relative_path)
for hash in set_b64: for hash in set_b64:
nb_in_file = int(r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)) nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, relative_path)
# item list not updated
if nb_in_file is None:
l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1)
for paste_name in l_pastes:
# dynamic update
if PASTES_FOLDER in paste_name:
score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste_name)
r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste_name)
paste_name = paste_name.replace(PASTES_FOLDER, '', 1)
r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste_name)
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), relative_path)
nb_in_file = int(nb_in_file)
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
file_type = estimated_type.split('/')[0] file_type = estimated_type.split('/')[0]
# set file icon # set file icon
@ -326,9 +339,9 @@ def show_item_min(requested_path , content_range=0):
crawler_metadata = {} crawler_metadata = {}
if 'infoleak:submission="crawler"' in l_tags: if 'infoleak:submission="crawler"' in l_tags:
crawler_metadata['get_metadata'] = True crawler_metadata['get_metadata'] = True
crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain') crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+relative_path, 'domain')
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father') crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+relative_path, 'father')
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link') crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+relative_path,'real_link')
crawler_metadata['screenshot'] = paste.get_p_rel_path() crawler_metadata['screenshot'] = paste.get_p_rel_path()
else: else:
crawler_metadata['get_metadata'] = False crawler_metadata['get_metadata'] = False