mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
fix: [dynamic update v1.5] make sure updates are excuted in the correct order + fix nb_seen_hash dynamic update
This commit is contained in:
parent
a3167a740a
commit
fc1a04336c
3 changed files with 90 additions and 76 deletions
|
@ -19,7 +19,6 @@ def update_hash_item(has_type):
|
||||||
if r_serv_metadata.exists(base64_key):
|
if r_serv_metadata.exists(base64_key):
|
||||||
new_base64_key = base64_key.replace(PASTES_FOLDER, '', 1)
|
new_base64_key = base64_key.replace(PASTES_FOLDER, '', 1)
|
||||||
res = r_serv_metadata.renamenx(base64_key, new_base64_key)
|
res = r_serv_metadata.renamenx(base64_key, new_base64_key)
|
||||||
print(res)
|
|
||||||
if res == 0:
|
if res == 0:
|
||||||
print('same key, double name: {}'.format(item_path))
|
print('same key, double name: {}'.format(item_path))
|
||||||
# fusion
|
# fusion
|
||||||
|
|
|
@ -57,75 +57,77 @@ if __name__ == '__main__':
|
||||||
db=2018,
|
db=2018,
|
||||||
decode_responses=True)
|
decode_responses=True)
|
||||||
|
|
||||||
print('Updating ARDB_Tags ...')
|
if r_serv.exists('v1.5:onions') and r_serv.exists('v1.5:metadata'):
|
||||||
index = 0
|
|
||||||
start = time.time()
|
|
||||||
|
|
||||||
tags_list = r_serv_tag.smembers('list_tags')
|
print('Updating ARDB_Tags ...')
|
||||||
# create temp tags metadata
|
index = 0
|
||||||
tag_metadata = {}
|
start = time.time()
|
||||||
for tag in tags_list:
|
|
||||||
tag_metadata[tag] = {}
|
|
||||||
tag_metadata[tag]['first_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'first_seen')
|
|
||||||
if tag_metadata[tag]['first_seen'] is None:
|
|
||||||
tag_metadata[tag]['first_seen'] = 99999999
|
|
||||||
else:
|
|
||||||
tag_metadata[tag]['first_seen'] = int(tag_metadata[tag]['first_seen'])
|
|
||||||
|
|
||||||
tag_metadata[tag]['last_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
|
tags_list = r_serv_tag.smembers('list_tags')
|
||||||
if tag_metadata[tag]['last_seen'] is None:
|
# create temp tags metadata
|
||||||
tag_metadata[tag]['last_seen'] = 0
|
tag_metadata = {}
|
||||||
else:
|
for tag in tags_list:
|
||||||
tag_metadata[tag]['last_seen'] = int(tag_metadata[tag]['last_seen'])
|
tag_metadata[tag] = {}
|
||||||
|
tag_metadata[tag]['first_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'first_seen')
|
||||||
|
if tag_metadata[tag]['first_seen'] is None:
|
||||||
|
tag_metadata[tag]['first_seen'] = 99999999
|
||||||
|
else:
|
||||||
|
tag_metadata[tag]['first_seen'] = int(tag_metadata[tag]['first_seen'])
|
||||||
|
|
||||||
for tag in tags_list:
|
tag_metadata[tag]['last_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
|
||||||
|
if tag_metadata[tag]['last_seen'] is None:
|
||||||
|
tag_metadata[tag]['last_seen'] = 0
|
||||||
|
else:
|
||||||
|
tag_metadata[tag]['last_seen'] = int(tag_metadata[tag]['last_seen'])
|
||||||
|
|
||||||
all_item = r_serv_tag.smembers(tag)
|
for tag in tags_list:
|
||||||
for item_path in all_item:
|
|
||||||
splitted_item_path = item_path.split('/')
|
all_item = r_serv_tag.smembers(tag)
|
||||||
#print(tag)
|
for item_path in all_item:
|
||||||
#print(item_path)
|
splitted_item_path = item_path.split('/')
|
||||||
try:
|
#print(tag)
|
||||||
item_date = int( ''.join([splitted_item_path[-4], splitted_item_path[-3], splitted_item_path[-2]]) )
|
#print(item_path)
|
||||||
except IndexError:
|
try:
|
||||||
|
item_date = int( ''.join([splitted_item_path[-4], splitted_item_path[-3], splitted_item_path[-2]]) )
|
||||||
|
except IndexError:
|
||||||
|
r_serv_tag.srem(tag, item_path)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# remove absolute path
|
||||||
|
new_path = item_path.replace(PASTES_FOLDER, '', 1)
|
||||||
|
if new_path != item_path:
|
||||||
|
# save in queue absolute path to remove
|
||||||
|
r_serv_tag.sadd('maj:v1.5:absolute_path_to_rename', item_path)
|
||||||
|
|
||||||
|
# update metadata first_seen
|
||||||
|
if item_date < tag_metadata[tag]['first_seen']:
|
||||||
|
tag_metadata[tag]['first_seen'] = item_date
|
||||||
|
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date)
|
||||||
|
|
||||||
|
# update metadata last_seen
|
||||||
|
if item_date > tag_metadata[tag]['last_seen']:
|
||||||
|
tag_metadata[tag]['last_seen'] = item_date
|
||||||
|
last_seen_db = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
|
||||||
|
if last_seen_db:
|
||||||
|
if item_date > int(last_seen_db):
|
||||||
|
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date)
|
||||||
|
else:
|
||||||
|
tag_metadata[tag]['last_seen'] = last_seen_db
|
||||||
|
|
||||||
|
r_serv_tag.sadd('{}:{}'.format(tag, item_date), new_path)
|
||||||
|
r_serv_tag.hincrby('daily_tags:{}'.format(item_date), tag, 1)
|
||||||
|
|
||||||
|
# clean db
|
||||||
r_serv_tag.srem(tag, item_path)
|
r_serv_tag.srem(tag, item_path)
|
||||||
continue
|
index = index + 1
|
||||||
|
|
||||||
# remove absolute path
|
#flush browse importante pastes db
|
||||||
new_path = item_path.replace(PASTES_FOLDER, '', 1)
|
r_important_paste_2018.flushdb()
|
||||||
if new_path != item_path:
|
r_important_paste_2019.flushdb()
|
||||||
# save in queue absolute path to remove
|
|
||||||
r_serv_tag.sadd('maj:v1.5:absolute_path_to_rename', item_path)
|
|
||||||
|
|
||||||
# update metadata first_seen
|
end = time.time()
|
||||||
if item_date < tag_metadata[tag]['first_seen']:
|
|
||||||
tag_metadata[tag]['first_seen'] = item_date
|
|
||||||
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date)
|
|
||||||
|
|
||||||
# update metadata last_seen
|
|
||||||
if item_date > tag_metadata[tag]['last_seen']:
|
|
||||||
tag_metadata[tag]['last_seen'] = item_date
|
|
||||||
last_seen_db = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
|
|
||||||
if last_seen_db:
|
|
||||||
if item_date > int(last_seen_db):
|
|
||||||
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date)
|
|
||||||
else:
|
|
||||||
tag_metadata[tag]['last_seen'] = last_seen_db
|
|
||||||
|
|
||||||
r_serv_tag.sadd('{}:{}'.format(tag, item_date), new_path)
|
|
||||||
r_serv_tag.hincrby('daily_tags:{}'.format(item_date), tag, 1)
|
|
||||||
|
|
||||||
# clean db
|
|
||||||
r_serv_tag.srem(tag, item_path)
|
|
||||||
index = index + 1
|
|
||||||
|
|
||||||
#flush browse importante pastes db
|
|
||||||
r_important_paste_2018.flushdb()
|
|
||||||
r_important_paste_2019.flushdb()
|
|
||||||
|
|
||||||
end = time.time()
|
|
||||||
|
|
||||||
|
|
||||||
print('Updating ARDB_Tags Done => {} paths: {} s'.format(index, end - start))
|
print('Updating ARDB_Tags Done => {} paths: {} s'.format(index, end - start))
|
||||||
|
|
||||||
r_serv.set('v1.5:tags', 1)
|
r_serv.set('v1.5:tags', 1)
|
||||||
|
|
|
@ -157,13 +157,13 @@ def showpaste(content_range, requested_path):
|
||||||
# item list not updated
|
# item list not updated
|
||||||
if nb_in_file is None:
|
if nb_in_file is None:
|
||||||
l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1)
|
l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1)
|
||||||
for paste in l_pastes:
|
for paste_name in l_pastes:
|
||||||
# dynamic update
|
# dynamic update
|
||||||
if PASTES_FOLDER in paste:
|
if PASTES_FOLDER in paste_name:
|
||||||
score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste)
|
score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste_name)
|
||||||
r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste)
|
r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste_name)
|
||||||
paste = paste.replace(PASTES_FOLDER, '', 1)
|
paste_name = paste_name.replace(PASTES_FOLDER, '', 1)
|
||||||
r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste)
|
r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste_name)
|
||||||
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
|
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
|
||||||
nb_in_file = int(nb_in_file)
|
nb_in_file = int(nb_in_file)
|
||||||
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
|
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
|
||||||
|
@ -282,7 +282,8 @@ def show_item_min(requested_path , content_range=0):
|
||||||
|
|
||||||
p_hashtype_list = []
|
p_hashtype_list = []
|
||||||
|
|
||||||
l_tags = r_serv_metadata.smembers('tag:'+requested_path)
|
print(requested_path)
|
||||||
|
l_tags = r_serv_metadata.smembers('tag:'+relative_path)
|
||||||
if relative_path is not None:
|
if relative_path is not None:
|
||||||
l_tags.union( r_serv_metadata.smembers('tag:'+relative_path) )
|
l_tags.union( r_serv_metadata.smembers('tag:'+relative_path) )
|
||||||
item_info['tags'] = l_tags
|
item_info['tags'] = l_tags
|
||||||
|
@ -291,10 +292,22 @@ def show_item_min(requested_path , content_range=0):
|
||||||
|
|
||||||
l_64 = []
|
l_64 = []
|
||||||
# load hash files
|
# load hash files
|
||||||
if r_serv_metadata.scard('hash_paste:'+requested_path) > 0:
|
if r_serv_metadata.scard('hash_paste:'+relative_path) > 0:
|
||||||
set_b64 = r_serv_metadata.smembers('hash_paste:'+requested_path)
|
set_b64 = r_serv_metadata.smembers('hash_paste:'+relative_path)
|
||||||
for hash in set_b64:
|
for hash in set_b64:
|
||||||
nb_in_file = int(r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path))
|
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, relative_path)
|
||||||
|
# item list not updated
|
||||||
|
if nb_in_file is None:
|
||||||
|
l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1)
|
||||||
|
for paste_name in l_pastes:
|
||||||
|
# dynamic update
|
||||||
|
if PASTES_FOLDER in paste_name:
|
||||||
|
score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste_name)
|
||||||
|
r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste_name)
|
||||||
|
paste_name = paste_name.replace(PASTES_FOLDER, '', 1)
|
||||||
|
r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste_name)
|
||||||
|
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), relative_path)
|
||||||
|
nb_in_file = int(nb_in_file)
|
||||||
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
|
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
|
||||||
file_type = estimated_type.split('/')[0]
|
file_type = estimated_type.split('/')[0]
|
||||||
# set file icon
|
# set file icon
|
||||||
|
@ -326,9 +339,9 @@ def show_item_min(requested_path , content_range=0):
|
||||||
crawler_metadata = {}
|
crawler_metadata = {}
|
||||||
if 'infoleak:submission="crawler"' in l_tags:
|
if 'infoleak:submission="crawler"' in l_tags:
|
||||||
crawler_metadata['get_metadata'] = True
|
crawler_metadata['get_metadata'] = True
|
||||||
crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain')
|
crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+relative_path, 'domain')
|
||||||
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father')
|
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+relative_path, 'father')
|
||||||
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link')
|
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+relative_path,'real_link')
|
||||||
crawler_metadata['screenshot'] = paste.get_p_rel_path()
|
crawler_metadata['screenshot'] = paste.get_p_rel_path()
|
||||||
else:
|
else:
|
||||||
crawler_metadata['get_metadata'] = False
|
crawler_metadata['get_metadata'] = False
|
||||||
|
|
Loading…
Reference in a new issue