chg: [DB] remove absolute path

This commit is contained in:
Terrtia 2019-04-10 17:47:40 +02:00
parent d44acea04d
commit e83174327a
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
5 changed files with 55 additions and 39 deletions

View file

@ -119,9 +119,7 @@ Redis and ARDB overview
```
ARDB overview
---------------------------
ARDB_DB
* DB 1 - Curve
* DB 2 - TermFreq
----------------------------------------- TERM ----------------------------------------
@ -141,30 +139,23 @@ ARDB_DB
SET - 'TrackedNotifications' term/set
* DB 3 - Trending
* DB 4 - Sentiment
----------------------------------------- SENTIMENT ------------------------------------
SET - 'Provider_set' Provider
KEY - 'UniqID' INT
SET - provider_timestamp UniqID
SET - UniqID avg_score
* DB 5 - TermCred
* DB 6 - Tags
----------------------------------------------------------------------------------------
SET - tag paste*
DB 5 - TermCred
----------------------------------------------------------------------------------------
* DB 7 - Metadata:
----------------------------------------------------------------------------------------
SET - 'tag:' + paste tag
----------------------------------------------------------------------------------------
----------------------------------------- BASE64 ----------------------------------------
@ -186,26 +177,12 @@ ARDB_DB
SET - 'hash_base64_all_type' hash_type *
SET - 'hash_binary_all_type' hash_type *
SET - 'hash_paste:'+paste hash *
SET - 'base64_paste:'+paste hash *
SET - 'binary_paste:'+paste hash *
ZADD - 'hash_date:'+20180622 hash * nb_seen_this_day
ZADD - 'base64_date:'+20180622 hash * nb_seen_this_day
ZADD - 'binary_date:'+20180622 hash * nb_seen_this_day
ZADD - 'nb_seen_hash:'+hash paste * nb_seen_in_paste
ZADD - 'base64_hash:'+hash paste * nb_seen_in_paste
ZADD - 'binary_hash:'+hash paste * nb_seen_in_paste
ZADD - 'base64_type:'+type date nb_seen
ZADD - 'binary_type:'+type date nb_seen
GET - 'base64_decoded:'+date nd_decoded
GET - 'binary_decoded:'+date nd_decoded
* DB 8 - Statistics
* DB 9 - Onion:
----------------------------------------------------------------------------------------

View file

@ -10,8 +10,10 @@ import configparser
def update_hash_item(has_type):
#get all hash items:
all_base64 = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type))
#all_base64 = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type))
all_hash_items = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\":20180925'.format(has_type))
for item_path in all_hash_items:
item_path = '/home/aurelien/git/python3/AIL-framework/PASTES/archive/pastebin.com_pro/2018/09/25/Fu9akJaz.gz'
if PASTES_FOLDER in item_path:
base64_key = '{}_paste:{}'.format(has_type, item_path)
hash_key = 'hash_paste:{}'.format(item_path)

View file

@ -87,8 +87,8 @@ if __name__ == '__main__':
for date_history in all_onion_history:
pass
#print('onion_history:{}:{}'.format(onion_domain, date_history))
#r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
#r_serv_onion.delete('onion_history:{}'.format(onion_domain))
r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
r_serv_onion.delete('onion_history:{}'.format(onion_domain))
# clean up domain
all_domain_up = r_serv_onion.smembers('full_onion_up')
@ -105,19 +105,19 @@ if __name__ == '__main__':
item_father = item_father[0]
#print(item_father)
# delete old history
#r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
# create new history
root_key = get_domain_root_from_paste_childrens(item_father, onion_domain)
if root_key:
#r_serv_onion.zadd('crawler_history_onion:{}:80'.format(onion_domain), get_date_epoch(date_history), root_key)
r_serv_onion.zadd('crawler_history_onion:{}:80'.format(onion_domain), get_date_epoch(date_history), root_key)
print('crawler_history_onion:{}:80 {} {}'.format(onion_domain, get_date_epoch(date_history), root_key))
#update service metadata: paste_parent
#r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key)
r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key)
#r_serv_onion.delete('onion_history:{}'.format(onion_domain))
r_serv_onion.delete('onion_history:{}'.format(onion_domain))
#r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'ports', '80')
#r_serv_onion.hdel('onion_metadata:{}'.format(onion_domain), 'last_seen')
r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'ports', '80')
r_serv_onion.hdel('onion_metadata:{}'.format(onion_domain), 'last_seen')
'''

View file

@ -7,6 +7,14 @@ import time
import redis
import configparser
def tags_key_fusion(old_item_path_key, new_item_path_key):
print('fusion:')
print(old_item_path_key)
print(new_item_path_key)
for tag in r_serv_metadata.smembers(old_item_path_key):
r_serv_metadata.sadd(new_item_path_key, tag)
r_serv_metadata.srem(old_item_path_key, tag)
if __name__ == '__main__':
start_deb = time.time()
@ -115,6 +123,24 @@ if __name__ == '__main__':
r_important_paste_2018.flushdb()
r_important_paste_2019.flushdb()
#update item metadata tags
tag_not_updated = True
total_to_update = r_serv_tag.scard('maj:v1.5:absolute_path_to_rename')
nb_updated = 0
while tag_not_updated:
item_path = r_serv_tag.spop('maj:v1.5:absolute_path_to_rename')
old_tag_item_key = 'tag:{}'.format(item_path)
new_item_path = item_path.replace(PASTES_FOLDER, '', 1)
new_tag_item_key = 'tag:{}'.format(new_item_path)
res = r_serv_metadata.renamenx(old_tag_item_key, new_tag_item_key)
if res == 0:
tags_key_fusion(old_tag_item_key, new_tag_item_key)
nb_updated += 1
if r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') == 0:
tag_not_updated = false
else:
print('{}/{} Tags updated'.format(nb_updated, total_to_update))
end = time.time()

View file

@ -153,8 +153,19 @@ def showpaste(content_range, requested_path):
if r_serv_metadata.scard('hash_paste:'+requested_path) > 0:
set_b64 = r_serv_metadata.smembers('hash_paste:'+requested_path)
for hash in set_b64:
print(requested_path)
nb_in_file = int(r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path))
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
# item list not updated
if nb_in_file is None:
l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1)
for paste in l_pastes:
# dynamic update
if PASTES_FOLDER in paste:
score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste)
r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste)
paste = paste.replace(PASTES_FOLDER, '', 1)
r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste)
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
nb_in_file = int(nb_in_file)
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
file_type = estimated_type.split('/')[0]
# set file icon