mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-30 01:37:17 +00:00
chg: [update DB] add migration script
This commit is contained in:
parent
f6d7d2ae16
commit
0519b4a437
4 changed files with 506 additions and 1 deletions
22
OVERVIEW.md
22
OVERVIEW.md
|
@ -67,12 +67,32 @@ Redis and ARDB overview
|
||||||
| | father | **item father** |
|
| | father | **item father** |
|
||||||
| | domain | **crawled domain**:**domain port** |
|
| | domain | **crawled domain**:**domain port** |
|
||||||
|
|
||||||
|
##### Set:
|
||||||
|
| Key | Field |
|
||||||
|
| ------ | ------ |
|
||||||
|
| tag:**item path** | **tag** |
|
||||||
|
| | |
|
||||||
|
| paste_children:**item path** | **item path** |
|
||||||
|
| | |
|
||||||
|
| hash_paste:**item path** | **hash** |
|
||||||
|
| base64_paste:**item path** | **hash** |
|
||||||
|
| hexadecimal_paste:**item path** | **hash** |
|
||||||
|
| binary_paste:**item path** | **hash** |
|
||||||
|
|
||||||
|
##### Zset:
|
||||||
|
| Key | Field | Value |
|
||||||
|
| ------ | ------ | ------ |
|
||||||
|
| nb_seen_hash:**hash** | **item** | **nb_seen** |
|
||||||
|
| base64_hash:**hash** | **item** | **nb_seen** |
|
||||||
|
| binary_hash:**hash** | **item** | **nb_seen** |
|
||||||
|
| hexadecimal_hash:**hash** | **item** | **nb_seen** |
|
||||||
|
|
||||||
## DB9 - Crawler:
|
## DB9 - Crawler:
|
||||||
|
|
||||||
##### Hset:
|
##### Hset:
|
||||||
| Key | Field | Value |
|
| Key | Field | Value |
|
||||||
| ------ | ------ | ------ |
|
| ------ | ------ | ------ |
|
||||||
| **service type**:**domain** | first_seen | **date** |
|
| **service type**_metadata:**domain** | first_seen | **date** |
|
||||||
| | last_check | **date** |
|
| | last_check | **date** |
|
||||||
| | ports | **port**;**port**;**port** ... |
|
| | ports | **port**;**port**;**port** ... |
|
||||||
| | paste_parent | **parent last crawling (can be auto or manual)** |
|
| | paste_parent | **parent last crawling (can be auto or manual)** |
|
||||||
|
|
228
update/v1.4/Update-ARDB_Metadata.py
Executable file
228
update/v1.4/Update-ARDB_Metadata.py
Executable file
|
@ -0,0 +1,228 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import redis
|
||||||
|
import configparser
|
||||||
|
|
||||||
|
|
||||||
|
def update_hash_item(has_type):
|
||||||
|
#get all hash items:
|
||||||
|
#all_base64 = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type))
|
||||||
|
all_hash_items = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\":20190307'.format(has_type))
|
||||||
|
for item_path in all_hash_items:
|
||||||
|
if PASTES_FOLDER in item_path:
|
||||||
|
base64_key = '{}_paste:{}'.format(has_type, item_path)
|
||||||
|
hash_key = 'hash_paste:{}'.format(item_path)
|
||||||
|
|
||||||
|
## TODO: catch error
|
||||||
|
if r_serv_metadata.exists(base64_key):
|
||||||
|
res = r_serv_metadata.renamenx(base64_key, base64_key.replace(PASTES_FOLDER, '', 1))
|
||||||
|
## TODO: key merge
|
||||||
|
if not res:
|
||||||
|
print('same key, double name: {}'.format(item_path))
|
||||||
|
|
||||||
|
if r_serv_metadata.exists(hash_key):
|
||||||
|
## TODO: catch error
|
||||||
|
res = r_serv_metadata.renamenx(hash_key, hash_key.replace(PASTES_FOLDER, '', 1))
|
||||||
|
## TODO: key merge
|
||||||
|
if not res:
|
||||||
|
print('same key, double name: {}'.format(item_path))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
start_deb = time.time()
|
||||||
|
|
||||||
|
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||||
|
if not os.path.exists(configfile):
|
||||||
|
raise Exception('Unable to find the configuration file. \
|
||||||
|
Did you set environment variables? \
|
||||||
|
Or activate the virtualenv.')
|
||||||
|
cfg = configparser.ConfigParser()
|
||||||
|
cfg.read(configfile)
|
||||||
|
|
||||||
|
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/'
|
||||||
|
|
||||||
|
r_serv_metadata = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Metadata", "host"),
|
||||||
|
port=cfg.getint("ARDB_Metadata", "port"),
|
||||||
|
db=cfg.getint("ARDB_Metadata", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
r_serv_tag = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Tags", "host"),
|
||||||
|
port=cfg.getint("ARDB_Tags", "port"),
|
||||||
|
db=cfg.getint("ARDB_Tags", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
r_serv_onion = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Onion", "host"),
|
||||||
|
port=cfg.getint("ARDB_Onion", "port"),
|
||||||
|
db=cfg.getint("ARDB_Onion", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
## Update metadata ##
|
||||||
|
print('Updating ARDB_Metadata ...')
|
||||||
|
index = 0
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
# Update base64
|
||||||
|
update_hash_item('base64')
|
||||||
|
# Update binary
|
||||||
|
update_hash_item('binary')
|
||||||
|
# Update binary
|
||||||
|
update_hash_item('hexadecimal')
|
||||||
|
|
||||||
|
# Update onion metadata
|
||||||
|
#all_crawled_items = r_serv_tag.smembers('infoleak:submission=\"crawler\"')
|
||||||
|
all_crawled_items = r_serv_tag.smembers('infoleak:submission=\"crawler\":20190227')
|
||||||
|
for item_path in all_crawled_items:
|
||||||
|
if PASTES_FOLDER in item_path:
|
||||||
|
item_metadata = 'paste_metadata:{}'.format(item_path)
|
||||||
|
## TODO: catch error
|
||||||
|
r_serv_metadata.rename(item_metadata, item_metadata.replace(PASTES_FOLDER, '', 1))
|
||||||
|
|
||||||
|
######################################################################################################################
|
||||||
|
######################################################################################################################
|
||||||
|
######################################################################################################################
|
||||||
|
######################################################################################################################
|
||||||
|
######################################################################################################################
|
||||||
|
######################################################################################################################
|
||||||
|
'''
|
||||||
|
|
||||||
|
string_keys_to_rename = ['misp_events:{}*'.format(PASTES_FOLDER), 'hive_cases:{}*'.format(PASTES_FOLDER)]
|
||||||
|
for key_to_rename in string_keys_to_rename:
|
||||||
|
|
||||||
|
keys_to_rename = []
|
||||||
|
for key in r_serv_metadata.scan_iter(key_to_rename):
|
||||||
|
new_key = key.replace(PASTES_FOLDER, '', 1)
|
||||||
|
keys_to_rename.append( (key, new_key) )
|
||||||
|
index = index + 1
|
||||||
|
for key, new_key in keys_to_rename:
|
||||||
|
r_serv_metadata.rename(key, new_key)
|
||||||
|
|
||||||
|
keys_to_rename = None
|
||||||
|
|
||||||
|
set_keys_to_rename = ['tag:{}*'.format(PASTES_FOLDER), 'paste_regular_external_links:{}*'.format(PASTES_FOLDER), 'paste_onion_external_links:{}*'.format(PASTES_FOLDER), 'paste_children:{}*'.format(PASTES_FOLDER)]
|
||||||
|
for key_to_rename in set_keys_to_rename:
|
||||||
|
|
||||||
|
keys_to_remove = []
|
||||||
|
keys_to_rename = []
|
||||||
|
for key in r_serv_metadata.scan_iter(key_to_rename):
|
||||||
|
new_key = key.replace(PASTES_FOLDER, '', 1)
|
||||||
|
# a set with this key already exist
|
||||||
|
if r_serv_metadata.exists(new_key):
|
||||||
|
# save data
|
||||||
|
for new_key_value in r_serv_metadata.smembers(key):
|
||||||
|
r_serv_metadata.sadd(new_key, new_key_value)
|
||||||
|
keys_to_remove.append(key)
|
||||||
|
else:
|
||||||
|
keys_to_rename.append( (key, new_key) )
|
||||||
|
index = index + 1
|
||||||
|
for key in keys_to_remove:
|
||||||
|
r_serv_metadata.delete(key)
|
||||||
|
for key, new_key in keys_to_rename:
|
||||||
|
r_serv_metadata.rename(key, new_key)
|
||||||
|
|
||||||
|
keys_to_remove = None
|
||||||
|
keys_to_rename = None
|
||||||
|
|
||||||
|
|
||||||
|
zset_keys_to_rename = ['nb_seen_hash:*', 'base64_hash:*', 'binary_hash:*']
|
||||||
|
for key_to_rename in zset_keys_to_rename:
|
||||||
|
|
||||||
|
keys_to_remove = []
|
||||||
|
zkeys_to_remove = []
|
||||||
|
keys_to_add = []
|
||||||
|
for key in r_serv_metadata.scan_iter(key_to_rename):
|
||||||
|
temp = []
|
||||||
|
for zset_key, value in r_serv_metadata.zscan_iter(key, '*{}*'.format(PASTES_FOLDER)):
|
||||||
|
new_key = zset_key.replace(PASTES_FOLDER, '', 1)
|
||||||
|
index = index +1
|
||||||
|
temp.append((key, zset_key))
|
||||||
|
keys_to_add.append((key, new_key, value))
|
||||||
|
if 0 < len(temp) < r_serv_metadata.zcard(key):
|
||||||
|
zkeys_to_remove.extend(temp)
|
||||||
|
else:
|
||||||
|
keys_to_remove.append(key)
|
||||||
|
for key in keys_to_remove:
|
||||||
|
r_serv_metadata.delete(key)
|
||||||
|
for key, zset_key in zkeys_to_remove:
|
||||||
|
r_serv_metadata.zrem(key, zset_key)
|
||||||
|
for key, new_key, value in keys_to_add:
|
||||||
|
r_serv_metadata.zincrby(key, new_key, int(value))
|
||||||
|
keys_to_remove = None
|
||||||
|
zkeys_to_remove = None
|
||||||
|
keys_to_add = None
|
||||||
|
|
||||||
|
set_keys_to_rename = ['paste_children:*']
|
||||||
|
for key_to_rename in set_keys_to_rename:
|
||||||
|
keys_to_remove = []
|
||||||
|
skeys_to_remove = []
|
||||||
|
keys_to_add = []
|
||||||
|
for key in r_serv_metadata.scan_iter(key_to_rename):
|
||||||
|
temp = []
|
||||||
|
for set_key in r_serv_metadata.sscan_iter(key, '*{}*'.format(PASTES_FOLDER)):
|
||||||
|
new_key = set_key.replace(PASTES_FOLDER, '', 1)
|
||||||
|
index = index +1
|
||||||
|
temp.append((key, set_key))
|
||||||
|
keys_to_add.append((key, new_key))
|
||||||
|
if 0 < len(temp) < r_serv_metadata.scard(key):
|
||||||
|
skeys_to_remove.extend(temp)
|
||||||
|
else:
|
||||||
|
keys_to_remove.append(key)
|
||||||
|
for key in keys_to_remove:
|
||||||
|
r_serv_metadata.delete(key)
|
||||||
|
for key, set_key in skeys_to_remove:
|
||||||
|
r_serv_metadata.srem(key, set_key)
|
||||||
|
for key, new_key in keys_to_add:
|
||||||
|
r_serv_metadata.sadd(key, new_key)
|
||||||
|
keys_to_remove = None
|
||||||
|
skeys_to_remove = None
|
||||||
|
keys_to_add = None
|
||||||
|
|
||||||
|
hset_keys_to_rename = ['paste_metadata:{}*'.format(PASTES_FOLDER)]
|
||||||
|
for key_to_rename in hset_keys_to_rename:
|
||||||
|
|
||||||
|
keys_to_rename = []
|
||||||
|
for key in r_serv_metadata.scan_iter(key_to_rename):
|
||||||
|
new_key = key.replace(PASTES_FOLDER, '', 1)
|
||||||
|
# a hset with this key already exist
|
||||||
|
if r_serv_metadata.exists(new_key):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
keys_to_rename.append((key, new_key))
|
||||||
|
index = index + 1
|
||||||
|
for key, new_key in keys_to_rename:
|
||||||
|
r_serv_metadata.rename(key, new_key)
|
||||||
|
keys_to_rename = None
|
||||||
|
|
||||||
|
# to verify 120/100 try with scan
|
||||||
|
hset_keys_to_rename = ['paste_metadata:*']
|
||||||
|
for key_to_rename in hset_keys_to_rename:
|
||||||
|
for key in r_serv_metadata.scan_iter(key_to_rename):
|
||||||
|
father = r_serv_metadata.hget(key, 'father')
|
||||||
|
super_father = r_serv_metadata.hget(key, 'super_father')
|
||||||
|
|
||||||
|
if father:
|
||||||
|
if PASTES_FOLDER in father:
|
||||||
|
index = index + 1
|
||||||
|
r_serv_metadata.hdel(key, 'father')
|
||||||
|
r_serv_metadata.hset(key, 'father', father.replace(PASTES_FOLDER, '', 1))
|
||||||
|
|
||||||
|
if super_father:
|
||||||
|
if PASTES_FOLDER in super_father:
|
||||||
|
index = index + 1
|
||||||
|
r_serv_metadata.hdel(key, 'super_father')
|
||||||
|
r_serv_metadata.hset(key, 'super_father', super_father.replace(PASTES_FOLDER, '', 1))
|
||||||
|
|
||||||
|
keys_to_rename = None
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
print('Updating ARDB_Metadata Done => {} paths: {} s'.format(index, end - start))
|
||||||
|
print()
|
136
update/v1.4/Update-ARDB_Onions.py
Executable file
136
update/v1.4/Update-ARDB_Onions.py
Executable file
|
@ -0,0 +1,136 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import redis
|
||||||
|
import datetime
|
||||||
|
import configparser
|
||||||
|
|
||||||
|
def substract_date(date_from, date_to):
|
||||||
|
date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
|
||||||
|
date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
|
||||||
|
delta = date_to - date_from # timedelta
|
||||||
|
l_date = []
|
||||||
|
for i in range(delta.days + 1):
|
||||||
|
date = date_from + datetime.timedelta(i)
|
||||||
|
l_date.append( date.strftime('%Y%m%d') )
|
||||||
|
return l_date
|
||||||
|
|
||||||
|
def get_date_epoch(date):
|
||||||
|
return int(datetime.datetime(int(date[0:4]), int(date[4:6]), int(date[6:8])).timestamp())
|
||||||
|
|
||||||
|
def get_domain_root_from_paste_childrens(item_father, domain):
|
||||||
|
item_children = r_serv_metadata.smembers('paste_children:{}'.format(item_father))
|
||||||
|
domain_root = ''
|
||||||
|
for item_path in item_children:
|
||||||
|
# remove absolute_path
|
||||||
|
if PASTES_FOLDER in item_path:
|
||||||
|
#r_serv_metadata.srem('paste_children:{}'.format(item_father), item_path)
|
||||||
|
item_path = item_path.replace(PASTES_FOLDER, '', 1)
|
||||||
|
#r_serv_metadata.sadd('paste_children:{}'.format(item_father), item_path)
|
||||||
|
if domain in item_path:
|
||||||
|
domain_root = item_path
|
||||||
|
return domain_root
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
start_deb = time.time()
|
||||||
|
|
||||||
|
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||||
|
if not os.path.exists(configfile):
|
||||||
|
raise Exception('Unable to find the configuration file. \
|
||||||
|
Did you set environment variables? \
|
||||||
|
Or activate the virtualenv.')
|
||||||
|
cfg = configparser.ConfigParser()
|
||||||
|
cfg.read(configfile)
|
||||||
|
|
||||||
|
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/'
|
||||||
|
|
||||||
|
r_serv_metadata = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Metadata", "host"),
|
||||||
|
port=cfg.getint("ARDB_Metadata", "port"),
|
||||||
|
db=cfg.getint("ARDB_Metadata", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
r_serv_tag = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Tags", "host"),
|
||||||
|
port=cfg.getint("ARDB_Tags", "port"),
|
||||||
|
db=cfg.getint("ARDB_Tags", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
r_serv_onion = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Onion", "host"),
|
||||||
|
port=cfg.getint("ARDB_Onion", "port"),
|
||||||
|
db=cfg.getint("ARDB_Onion", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
## Update Onion ##
|
||||||
|
print('Updating ARDB_Onion ...')
|
||||||
|
index = 0
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
# clean down domain from db
|
||||||
|
date_from = '20180929'
|
||||||
|
date_today = datetime.date.today().strftime("%Y%m%d")
|
||||||
|
for date in substract_date(date_from, date_today):
|
||||||
|
|
||||||
|
onion_down = r_serv_onion.smembers('onion_down:{}'.format(date))
|
||||||
|
#print(onion_down)
|
||||||
|
for onion_domain in onion_down:
|
||||||
|
if not r_serv_onion.sismember('full_onion_up', onion_domain):
|
||||||
|
# delete history
|
||||||
|
all_onion_history = r_serv_onion.lrange('onion_history:{}'.format(onion_domain), 0 ,-1)
|
||||||
|
if all_onion_history:
|
||||||
|
for date_history in all_onion_history:
|
||||||
|
pass
|
||||||
|
#print('onion_history:{}:{}'.format(onion_domain, date_history))
|
||||||
|
#r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
|
||||||
|
#r_serv_onion.delete('onion_history:{}'.format(onion_domain))
|
||||||
|
|
||||||
|
# clean up domain
|
||||||
|
all_domain_up = r_serv_onion.smembers('full_onion_up')
|
||||||
|
for onion_domain in all_domain_up:
|
||||||
|
# delete history
|
||||||
|
all_onion_history = r_serv_onion.lrange('onion_history:{}'.format(onion_domain), 0 ,-1)
|
||||||
|
if all_onion_history:
|
||||||
|
for date_history in all_onion_history:
|
||||||
|
print('--------')
|
||||||
|
print('onion_history:{}:{}'.format(onion_domain, date_history))
|
||||||
|
#item_father = r_serv_onion.lpop('onion_history:{}:{}'.format(onion_domain, date_history))
|
||||||
|
item_father = r_serv_onion.lrange('onion_history:{}:{}'.format(onion_domain, date_history), 0, 0)
|
||||||
|
print('item_father: {}'.format(item_father))
|
||||||
|
item_father = item_father[0]
|
||||||
|
#print(item_father)
|
||||||
|
# delete old history
|
||||||
|
#r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
|
||||||
|
# create new history
|
||||||
|
root_key = get_domain_root_from_paste_childrens(item_father, onion_domain)
|
||||||
|
if root_key:
|
||||||
|
#r_serv_onion.zadd('crawler_history_onion:{}:80'.format(onion_domain), get_date_epoch(date_history), root_key)
|
||||||
|
print('crawler_history_onion:{}:80 {} {}'.format(onion_domain, get_date_epoch(date_history), root_key))
|
||||||
|
#update service metadata: paste_parent
|
||||||
|
#r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key)
|
||||||
|
|
||||||
|
#r_serv_onion.delete('onion_history:{}'.format(onion_domain))
|
||||||
|
|
||||||
|
#r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'ports', '80')
|
||||||
|
#r_serv_onion.hdel('onion_metadata:{}'.format(onion_domain), 'last_seen')
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
for elem in r_serv_onion.smembers('onion_crawler_queue'):
|
||||||
|
if PASTES_FOLDER in elem:
|
||||||
|
r_serv_onion.srem('onion_crawler_queue', elem)
|
||||||
|
r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1))
|
||||||
|
index = index +1
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
end = time.time()
|
||||||
|
print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
|
||||||
|
print()
|
||||||
|
print('Done in {} s'.format(end - start_deb))
|
121
update/v1.4/Update-ARDB_Tags.py
Executable file
121
update/v1.4/Update-ARDB_Tags.py
Executable file
|
@ -0,0 +1,121 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import redis
|
||||||
|
import configparser
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
start_deb = time.time()
|
||||||
|
|
||||||
|
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||||
|
if not os.path.exists(configfile):
|
||||||
|
raise Exception('Unable to find the configuration file. \
|
||||||
|
Did you set environment variables? \
|
||||||
|
Or activate the virtualenv.')
|
||||||
|
cfg = configparser.ConfigParser()
|
||||||
|
cfg.read(configfile)
|
||||||
|
|
||||||
|
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/'
|
||||||
|
|
||||||
|
r_serv_metadata = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Metadata", "host"),
|
||||||
|
port=cfg.getint("ARDB_Metadata", "port"),
|
||||||
|
db=cfg.getint("ARDB_Metadata", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
r_serv_tag = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Tags", "host"),
|
||||||
|
port=cfg.getint("ARDB_Tags", "port"),
|
||||||
|
db=cfg.getint("ARDB_Tags", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
r_serv_onion = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Onion", "host"),
|
||||||
|
port=cfg.getint("ARDB_Onion", "port"),
|
||||||
|
db=cfg.getint("ARDB_Onion", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
r_serv_onion = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Onion", "host"),
|
||||||
|
port=cfg.getint("ARDB_Onion", "port"),
|
||||||
|
db=cfg.getint("ARDB_Onion", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
r_important_paste_2018 = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Metadata", "host"),
|
||||||
|
port=cfg.getint("ARDB_Metadata", "port"),
|
||||||
|
db=2018,
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
r_important_paste_2019 = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Metadata", "host"),
|
||||||
|
port=cfg.getint("ARDB_Metadata", "port"),
|
||||||
|
db=2018,
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
print('Updating ARDB_Tags ...')
|
||||||
|
index = 0
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
tags_list = r_serv_tag.smembers('list_tags')
|
||||||
|
# create temp tags metadata
|
||||||
|
tag_metadata = {}
|
||||||
|
for tag in tags_list:
|
||||||
|
tag_metadata[tag] = {}
|
||||||
|
tag_metadata[tag]['first_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'first_seen')
|
||||||
|
if tag_metadata[tag]['first_seen'] is None:
|
||||||
|
tag_metadata[tag]['first_seen'] = 99999999
|
||||||
|
else:
|
||||||
|
tag_metadata[tag]['first_seen'] = int(tag_metadata[tag]['first_seen'])
|
||||||
|
|
||||||
|
tag_metadata[tag]['last_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
|
||||||
|
if tag_metadata[tag]['last_seen'] is None:
|
||||||
|
tag_metadata[tag]['last_seen'] = 0
|
||||||
|
else:
|
||||||
|
tag_metadata[tag]['last_seen'] = int(tag_metadata[tag]['last_seen'])
|
||||||
|
|
||||||
|
for tag in tags_list:
|
||||||
|
|
||||||
|
all_item = r_serv_tag.smembers(tag)
|
||||||
|
for item_path in all_item:
|
||||||
|
splitted_item_path = item_path.split('/')
|
||||||
|
#print(tag)
|
||||||
|
#print(item_path)
|
||||||
|
item_date = int( ''.join([splitted_item_path[-4], splitted_item_path[-3], splitted_item_path[-2]]) )
|
||||||
|
|
||||||
|
# remove absolute path
|
||||||
|
new_path = item_path.replace(PASTES_FOLDER, '', 1)
|
||||||
|
if new_path != item_path:
|
||||||
|
# save in queue absolute path to remove
|
||||||
|
r_serv_tag.sadd('maj:v1.5:absolute_path_to_rename', item_path)
|
||||||
|
|
||||||
|
# update metadata first_seen
|
||||||
|
if item_date < tag_metadata[tag]['first_seen']:
|
||||||
|
tag_metadata[tag]['first_seen'] = item_date
|
||||||
|
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date)
|
||||||
|
|
||||||
|
# update metadata last_seen
|
||||||
|
if item_date > tag_metadata[tag]['last_seen']:
|
||||||
|
tag_metadata[tag]['last_seen'] = item_date
|
||||||
|
r_serv_tag.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date)
|
||||||
|
|
||||||
|
|
||||||
|
r_serv_tag.sadd('{}:{}'.format(tag, item_date), new_path)
|
||||||
|
r_serv_tag.hincrby('daily_tags:{}'.format(item_date), tag, 1)
|
||||||
|
|
||||||
|
# clean db
|
||||||
|
r_serv_tag.srem(tag, item_path)
|
||||||
|
index = index + 1
|
||||||
|
|
||||||
|
#flush browse importante pastes db
|
||||||
|
r_important_paste_2018.flushdb()
|
||||||
|
r_important_paste_2019.flushdb()
|
||||||
|
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
|
||||||
|
print('Updating ARDB_Tags Done => {} paths: {} s'.format(index, end - start))
|
Loading…
Reference in a new issue