mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-26 15:57:16 +00:00
fix: [dynamic update v1.5] make sure updates are excuted in the correct order + fix nb_seen_hash dynamic update
This commit is contained in:
parent
a3167a740a
commit
fc1a04336c
3 changed files with 90 additions and 76 deletions
|
@ -19,7 +19,6 @@ def update_hash_item(has_type):
|
|||
if r_serv_metadata.exists(base64_key):
|
||||
new_base64_key = base64_key.replace(PASTES_FOLDER, '', 1)
|
||||
res = r_serv_metadata.renamenx(base64_key, new_base64_key)
|
||||
print(res)
|
||||
if res == 0:
|
||||
print('same key, double name: {}'.format(item_path))
|
||||
# fusion
|
||||
|
|
|
@ -57,75 +57,77 @@ if __name__ == '__main__':
|
|||
db=2018,
|
||||
decode_responses=True)
|
||||
|
||||
print('Updating ARDB_Tags ...')
|
||||
index = 0
|
||||
start = time.time()
|
||||
if r_serv.exists('v1.5:onions') and r_serv.exists('v1.5:metadata'):
|
||||
|
||||
tags_list = r_serv_tag.smembers('list_tags')
|
||||
# create temp tags metadata
|
||||
tag_metadata = {}
|
||||
for tag in tags_list:
|
||||
tag_metadata[tag] = {}
|
||||
tag_metadata[tag]['first_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'first_seen')
|
||||
if tag_metadata[tag]['first_seen'] is None:
|
||||
tag_metadata[tag]['first_seen'] = 99999999
|
||||
else:
|
||||
tag_metadata[tag]['first_seen'] = int(tag_metadata[tag]['first_seen'])
|
||||
print('Updating ARDB_Tags ...')
|
||||
index = 0
|
||||
start = time.time()
|
||||
|
||||
tag_metadata[tag]['last_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
|
||||
if tag_metadata[tag]['last_seen'] is None:
|
||||
tag_metadata[tag]['last_seen'] = 0
|
||||
else:
|
||||
tag_metadata[tag]['last_seen'] = int(tag_metadata[tag]['last_seen'])
|
||||
tags_list = r_serv_tag.smembers('list_tags')
|
||||
# create temp tags metadata
|
||||
tag_metadata = {}
|
||||
for tag in tags_list:
|
||||
tag_metadata[tag] = {}
|
||||
tag_metadata[tag]['first_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'first_seen')
|
||||
if tag_metadata[tag]['first_seen'] is None:
|
||||
tag_metadata[tag]['first_seen'] = 99999999
|
||||
else:
|
||||
tag_metadata[tag]['first_seen'] = int(tag_metadata[tag]['first_seen'])
|
||||
|
||||
for tag in tags_list:
|
||||
tag_metadata[tag]['last_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
|
||||
if tag_metadata[tag]['last_seen'] is None:
|
||||
tag_metadata[tag]['last_seen'] = 0
|
||||
else:
|
||||
tag_metadata[tag]['last_seen'] = int(tag_metadata[tag]['last_seen'])
|
||||
|
||||
all_item = r_serv_tag.smembers(tag)
|
||||
for item_path in all_item:
|
||||
splitted_item_path = item_path.split('/')
|
||||
#print(tag)
|
||||
#print(item_path)
|
||||
try:
|
||||
item_date = int( ''.join([splitted_item_path[-4], splitted_item_path[-3], splitted_item_path[-2]]) )
|
||||
except IndexError:
|
||||
for tag in tags_list:
|
||||
|
||||
all_item = r_serv_tag.smembers(tag)
|
||||
for item_path in all_item:
|
||||
splitted_item_path = item_path.split('/')
|
||||
#print(tag)
|
||||
#print(item_path)
|
||||
try:
|
||||
item_date = int( ''.join([splitted_item_path[-4], splitted_item_path[-3], splitted_item_path[-2]]) )
|
||||
except IndexError:
|
||||
r_serv_tag.srem(tag, item_path)
|
||||
continue
|
||||
|
||||
# remove absolute path
|
||||
new_path = item_path.replace(PASTES_FOLDER, '', 1)
|
||||
if new_path != item_path:
|
||||
# save in queue absolute path to remove
|
||||
r_serv_tag.sadd('maj:v1.5:absolute_path_to_rename', item_path)
|
||||
|
||||
# update metadata first_seen
|
||||
if item_date < tag_metadata[tag]['first_seen']:
|
||||
tag_metadata[tag]['first_seen'] = item_date
|
||||
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date)
|
||||
|
||||
# update metadata last_seen
|
||||
if item_date > tag_metadata[tag]['last_seen']:
|
||||
tag_metadata[tag]['last_seen'] = item_date
|
||||
last_seen_db = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
|
||||
if last_seen_db:
|
||||
if item_date > int(last_seen_db):
|
||||
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date)
|
||||
else:
|
||||
tag_metadata[tag]['last_seen'] = last_seen_db
|
||||
|
||||
r_serv_tag.sadd('{}:{}'.format(tag, item_date), new_path)
|
||||
r_serv_tag.hincrby('daily_tags:{}'.format(item_date), tag, 1)
|
||||
|
||||
# clean db
|
||||
r_serv_tag.srem(tag, item_path)
|
||||
continue
|
||||
index = index + 1
|
||||
|
||||
# remove absolute path
|
||||
new_path = item_path.replace(PASTES_FOLDER, '', 1)
|
||||
if new_path != item_path:
|
||||
# save in queue absolute path to remove
|
||||
r_serv_tag.sadd('maj:v1.5:absolute_path_to_rename', item_path)
|
||||
#flush browse importante pastes db
|
||||
r_important_paste_2018.flushdb()
|
||||
r_important_paste_2019.flushdb()
|
||||
|
||||
# update metadata first_seen
|
||||
if item_date < tag_metadata[tag]['first_seen']:
|
||||
tag_metadata[tag]['first_seen'] = item_date
|
||||
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date)
|
||||
|
||||
# update metadata last_seen
|
||||
if item_date > tag_metadata[tag]['last_seen']:
|
||||
tag_metadata[tag]['last_seen'] = item_date
|
||||
last_seen_db = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
|
||||
if last_seen_db:
|
||||
if item_date > int(last_seen_db):
|
||||
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date)
|
||||
else:
|
||||
tag_metadata[tag]['last_seen'] = last_seen_db
|
||||
|
||||
r_serv_tag.sadd('{}:{}'.format(tag, item_date), new_path)
|
||||
r_serv_tag.hincrby('daily_tags:{}'.format(item_date), tag, 1)
|
||||
|
||||
# clean db
|
||||
r_serv_tag.srem(tag, item_path)
|
||||
index = index + 1
|
||||
|
||||
#flush browse importante pastes db
|
||||
r_important_paste_2018.flushdb()
|
||||
r_important_paste_2019.flushdb()
|
||||
|
||||
end = time.time()
|
||||
end = time.time()
|
||||
|
||||
|
||||
print('Updating ARDB_Tags Done => {} paths: {} s'.format(index, end - start))
|
||||
print('Updating ARDB_Tags Done => {} paths: {} s'.format(index, end - start))
|
||||
|
||||
r_serv.set('v1.5:tags', 1)
|
||||
r_serv.set('v1.5:tags', 1)
|
||||
|
|
|
@ -157,13 +157,13 @@ def showpaste(content_range, requested_path):
|
|||
# item list not updated
|
||||
if nb_in_file is None:
|
||||
l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1)
|
||||
for paste in l_pastes:
|
||||
for paste_name in l_pastes:
|
||||
# dynamic update
|
||||
if PASTES_FOLDER in paste:
|
||||
score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste)
|
||||
r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste)
|
||||
paste = paste.replace(PASTES_FOLDER, '', 1)
|
||||
r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste)
|
||||
if PASTES_FOLDER in paste_name:
|
||||
score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste_name)
|
||||
r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste_name)
|
||||
paste_name = paste_name.replace(PASTES_FOLDER, '', 1)
|
||||
r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste_name)
|
||||
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
|
||||
nb_in_file = int(nb_in_file)
|
||||
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
|
||||
|
@ -282,7 +282,8 @@ def show_item_min(requested_path , content_range=0):
|
|||
|
||||
p_hashtype_list = []
|
||||
|
||||
l_tags = r_serv_metadata.smembers('tag:'+requested_path)
|
||||
print(requested_path)
|
||||
l_tags = r_serv_metadata.smembers('tag:'+relative_path)
|
||||
if relative_path is not None:
|
||||
l_tags.union( r_serv_metadata.smembers('tag:'+relative_path) )
|
||||
item_info['tags'] = l_tags
|
||||
|
@ -291,10 +292,22 @@ def show_item_min(requested_path , content_range=0):
|
|||
|
||||
l_64 = []
|
||||
# load hash files
|
||||
if r_serv_metadata.scard('hash_paste:'+requested_path) > 0:
|
||||
set_b64 = r_serv_metadata.smembers('hash_paste:'+requested_path)
|
||||
if r_serv_metadata.scard('hash_paste:'+relative_path) > 0:
|
||||
set_b64 = r_serv_metadata.smembers('hash_paste:'+relative_path)
|
||||
for hash in set_b64:
|
||||
nb_in_file = int(r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path))
|
||||
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, relative_path)
|
||||
# item list not updated
|
||||
if nb_in_file is None:
|
||||
l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1)
|
||||
for paste_name in l_pastes:
|
||||
# dynamic update
|
||||
if PASTES_FOLDER in paste_name:
|
||||
score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste_name)
|
||||
r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste_name)
|
||||
paste_name = paste_name.replace(PASTES_FOLDER, '', 1)
|
||||
r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste_name)
|
||||
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), relative_path)
|
||||
nb_in_file = int(nb_in_file)
|
||||
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
|
||||
file_type = estimated_type.split('/')[0]
|
||||
# set file icon
|
||||
|
@ -326,9 +339,9 @@ def show_item_min(requested_path , content_range=0):
|
|||
crawler_metadata = {}
|
||||
if 'infoleak:submission="crawler"' in l_tags:
|
||||
crawler_metadata['get_metadata'] = True
|
||||
crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain')
|
||||
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father')
|
||||
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link')
|
||||
crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+relative_path, 'domain')
|
||||
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+relative_path, 'father')
|
||||
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+relative_path,'real_link')
|
||||
crawler_metadata['screenshot'] = paste.get_p_rel_path()
|
||||
else:
|
||||
crawler_metadata['get_metadata'] = False
|
||||
|
|
Loading…
Reference in a new issue