mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-22 22:27:17 +00:00
chg: [DB] update items tags metadata
This commit is contained in:
parent
e83174327a
commit
3cc614a1ad
6 changed files with 99 additions and 42 deletions
|
@ -59,6 +59,8 @@ class HiddenServices(object):
|
||||||
db=cfg.getint("ARDB_Metadata", "db"),
|
db=cfg.getint("ARDB_Metadata", "db"),
|
||||||
decode_responses=True)
|
decode_responses=True)
|
||||||
|
|
||||||
|
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/'
|
||||||
|
|
||||||
self.domain = domain
|
self.domain = domain
|
||||||
self.type = type
|
self.type = type
|
||||||
self.port = port
|
self.port = port
|
||||||
|
@ -76,9 +78,16 @@ class HiddenServices(object):
|
||||||
## TODO: # FIXME: add error
|
## TODO: # FIXME: add error
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def remove_absolute_path_link(self, key, value):
|
#def remove_absolute_path_link(self, key, value):
|
||||||
print(key)
|
# print(key)
|
||||||
print(value)
|
# print(value)
|
||||||
|
|
||||||
|
def update_item_path_children(self, key, children):
|
||||||
|
if self.PASTES_FOLDER in children:
|
||||||
|
self.r_serv_metadata.srem(key, children)
|
||||||
|
children = children.replace(self.PASTES_FOLDER, '', 1)
|
||||||
|
self.r_serv_metadata.sadd(key, children)
|
||||||
|
return children
|
||||||
|
|
||||||
def get_origin_paste_name(self):
|
def get_origin_paste_name(self):
|
||||||
origin_item = self.r_serv_onion.hget('onion_metadata:{}'.format(self.domain), 'paste_parent')
|
origin_item = self.r_serv_onion.hget('onion_metadata:{}'.format(self.domain), 'paste_parent')
|
||||||
|
@ -106,7 +115,6 @@ class HiddenServices(object):
|
||||||
# need to remove it
|
# need to remove it
|
||||||
else:
|
else:
|
||||||
p_tags = self.r_serv_metadata.smembers('tag:{}'.format(os.path.join(self.paste_directory, item)))
|
p_tags = self.r_serv_metadata.smembers('tag:{}'.format(os.path.join(self.paste_directory, item)))
|
||||||
print(p_tags)
|
|
||||||
for tag in p_tags:
|
for tag in p_tags:
|
||||||
self.tags[tag] = self.tags.get(tag, 0) + 1
|
self.tags[tag] = self.tags.get(tag, 0) + 1
|
||||||
|
|
||||||
|
@ -158,8 +166,10 @@ class HiddenServices(object):
|
||||||
if father is None:
|
if father is None:
|
||||||
return []
|
return []
|
||||||
l_crawled_pastes = []
|
l_crawled_pastes = []
|
||||||
paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(father))
|
key = 'paste_children:{}'.format(father)
|
||||||
|
paste_childrens = self.r_serv_metadata.smembers(key)
|
||||||
for children in paste_childrens:
|
for children in paste_childrens:
|
||||||
|
children = self.update_item_path_children(key, children)
|
||||||
if self.domain in children:
|
if self.domain in children:
|
||||||
l_crawled_pastes.append(children)
|
l_crawled_pastes.append(children)
|
||||||
self.update_domain_tags(children)
|
self.update_domain_tags(children)
|
||||||
|
@ -174,8 +184,8 @@ class HiddenServices(object):
|
||||||
else:
|
else:
|
||||||
key = os.path.join(self.paste_directory, item)
|
key = os.path.join(self.paste_directory, item)
|
||||||
link = self.r_serv_metadata.hget('paste_metadata:{}'.format(key), 'real_link')
|
link = self.r_serv_metadata.hget('paste_metadata:{}'.format(key), 'real_link')
|
||||||
if link:
|
#if link:
|
||||||
self.remove_absolute_path_link(key, link)
|
#self.remove_absolute_path_link(key, link)
|
||||||
|
|
||||||
return link
|
return link
|
||||||
|
|
||||||
|
|
|
@ -85,19 +85,19 @@ if __name__ == '__main__':
|
||||||
item_path = item_path.replace(PASTES_FOLDER, '', 1)
|
item_path = item_path.replace(PASTES_FOLDER, '', 1)
|
||||||
new_item_metadata = 'paste_metadata:{}'.format(item_path)
|
new_item_metadata = 'paste_metadata:{}'.format(item_path)
|
||||||
## TODO: catch error
|
## TODO: catch error
|
||||||
r_serv_metadata.rename(old_item_metadata, new_item_metadata)
|
res = r_serv_metadata.renamenx(old_item_metadata, new_item_metadata)
|
||||||
# update domain port
|
# update domain port
|
||||||
domain = r_serv_metadata.hget('paste_metadata:{}'.format(item_path), 'domain')
|
domain = r_serv_metadata.hget(new_item_metadata, 'domain')
|
||||||
if domain:
|
if domain:
|
||||||
r_serv_metadata.hset('paste_metadata:{}'.format(item_path), 'domain', '{}:80'.format(domain))
|
r_serv_metadata.hset(new_item_metadata, 'domain', '{}:80'.format(domain))
|
||||||
super_father = r_serv_metadata.hget('paste_metadata:{}'.format(item_path), 'super_father')
|
super_father = r_serv_metadata.hget(new_item_metadata, 'super_father')
|
||||||
if super_father:
|
if super_father:
|
||||||
if PASTES_FOLDER in super_father:
|
if PASTES_FOLDER in super_father:
|
||||||
r_serv_metadata.hset('paste_metadata:{}'.format(item_path), 'super_father', super_father.replace(PASTES_FOLDER, '', 1))
|
r_serv_metadata.hset(new_item_metadata, 'super_father', super_father.replace(PASTES_FOLDER, '', 1))
|
||||||
father = r_serv_metadata.hget('paste_metadata:{}'.format(item_path), 'father')
|
father = r_serv_metadata.hget(new_item_metadata, 'father')
|
||||||
if father:
|
if father:
|
||||||
if PASTES_FOLDER in father:
|
if PASTES_FOLDER in father:
|
||||||
r_serv_metadata.hset('paste_metadata:{}'.format(item_path), 'father', father.replace(PASTES_FOLDER, '', 1))
|
r_serv_metadata.hset(new_item_metadata, 'father', father.replace(PASTES_FOLDER, '', 1))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -47,12 +47,6 @@ if __name__ == '__main__':
|
||||||
db=cfg.getint("ARDB_Onion", "db"),
|
db=cfg.getint("ARDB_Onion", "db"),
|
||||||
decode_responses=True)
|
decode_responses=True)
|
||||||
|
|
||||||
r_serv_onion = redis.StrictRedis(
|
|
||||||
host=cfg.get("ARDB_Onion", "host"),
|
|
||||||
port=cfg.getint("ARDB_Onion", "port"),
|
|
||||||
db=cfg.getint("ARDB_Onion", "db"),
|
|
||||||
decode_responses=True)
|
|
||||||
|
|
||||||
r_important_paste_2018 = redis.StrictRedis(
|
r_important_paste_2018 = redis.StrictRedis(
|
||||||
host=cfg.get("ARDB_Metadata", "host"),
|
host=cfg.get("ARDB_Metadata", "host"),
|
||||||
port=cfg.getint("ARDB_Metadata", "port"),
|
port=cfg.getint("ARDB_Metadata", "port"),
|
||||||
|
@ -123,24 +117,6 @@ if __name__ == '__main__':
|
||||||
r_important_paste_2018.flushdb()
|
r_important_paste_2018.flushdb()
|
||||||
r_important_paste_2019.flushdb()
|
r_important_paste_2019.flushdb()
|
||||||
|
|
||||||
#update item metadata tags
|
|
||||||
tag_not_updated = True
|
|
||||||
total_to_update = r_serv_tag.scard('maj:v1.5:absolute_path_to_rename')
|
|
||||||
nb_updated = 0
|
|
||||||
while tag_not_updated:
|
|
||||||
item_path = r_serv_tag.spop('maj:v1.5:absolute_path_to_rename')
|
|
||||||
old_tag_item_key = 'tag:{}'.format(item_path)
|
|
||||||
new_item_path = item_path.replace(PASTES_FOLDER, '', 1)
|
|
||||||
new_tag_item_key = 'tag:{}'.format(new_item_path)
|
|
||||||
res = r_serv_metadata.renamenx(old_tag_item_key, new_tag_item_key)
|
|
||||||
if res == 0:
|
|
||||||
tags_key_fusion(old_tag_item_key, new_tag_item_key)
|
|
||||||
nb_updated += 1
|
|
||||||
if r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') == 0:
|
|
||||||
tag_not_updated = false
|
|
||||||
else:
|
|
||||||
print('{}/{} Tags updated'.format(nb_updated, total_to_update))
|
|
||||||
|
|
||||||
end = time.time()
|
end = time.time()
|
||||||
|
|
||||||
|
|
||||||
|
|
70
update/v1.4/Update-ARDB_Tags_background.py
Executable file
70
update/v1.4/Update-ARDB_Tags_background.py
Executable file
|
@ -0,0 +1,70 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import redis
|
||||||
|
import configparser
|
||||||
|
|
||||||
|
def tags_key_fusion(old_item_path_key, new_item_path_key):
|
||||||
|
print('fusion:')
|
||||||
|
print(old_item_path_key)
|
||||||
|
print(new_item_path_key)
|
||||||
|
for tag in r_serv_metadata.smembers(old_item_path_key):
|
||||||
|
r_serv_metadata.sadd(new_item_path_key, tag)
|
||||||
|
r_serv_metadata.srem(old_item_path_key, tag)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
start_deb = time.time()
|
||||||
|
|
||||||
|
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||||
|
if not os.path.exists(configfile):
|
||||||
|
raise Exception('Unable to find the configuration file. \
|
||||||
|
Did you set environment variables? \
|
||||||
|
Or activate the virtualenv.')
|
||||||
|
cfg = configparser.ConfigParser()
|
||||||
|
cfg.read(configfile)
|
||||||
|
|
||||||
|
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/'
|
||||||
|
|
||||||
|
r_serv_metadata = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Metadata", "host"),
|
||||||
|
port=cfg.getint("ARDB_Metadata", "port"),
|
||||||
|
db=cfg.getint("ARDB_Metadata", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
r_serv_tag = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Tags", "host"),
|
||||||
|
port=cfg.getint("ARDB_Tags", "port"),
|
||||||
|
db=cfg.getint("ARDB_Tags", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
print('Updating ARDB_Tags ...')
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
#update item metadata tags
|
||||||
|
tag_not_updated = True
|
||||||
|
total_to_update = r_serv_tag.scard('maj:v1.5:absolute_path_to_rename')
|
||||||
|
nb_updated = 0
|
||||||
|
if total_to_update > 0:
|
||||||
|
while tag_not_updated:
|
||||||
|
item_path = r_serv_tag.spop('maj:v1.5:absolute_path_to_rename')
|
||||||
|
old_tag_item_key = 'tag:{}'.format(item_path)
|
||||||
|
new_item_path = item_path.replace(PASTES_FOLDER, '', 1)
|
||||||
|
new_tag_item_key = 'tag:{}'.format(new_item_path)
|
||||||
|
res = r_serv_metadata.renamenx(old_tag_item_key, new_tag_item_key)
|
||||||
|
if res == 0:
|
||||||
|
tags_key_fusion(old_tag_item_key, new_tag_item_key)
|
||||||
|
nb_updated += 1
|
||||||
|
if r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') == 0:
|
||||||
|
tag_not_updated = False
|
||||||
|
else:
|
||||||
|
progress = int((nb_updated * 100) /total_to_update)
|
||||||
|
print('{}/{} Tags updated {}%'.format(nb_updated, total_to_update, progress))
|
||||||
|
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
|
||||||
|
print('Updating ARDB_Tags Done: {} s'.format(end - start))
|
|
@ -175,7 +175,6 @@ def get_crawler_splash_status(type):
|
||||||
return crawler_metadata
|
return crawler_metadata
|
||||||
|
|
||||||
def create_crawler_config(mode, service_type, crawler_config, domain):
|
def create_crawler_config(mode, service_type, crawler_config, domain):
|
||||||
print(crawler_config)
|
|
||||||
if mode == 'manual':
|
if mode == 'manual':
|
||||||
r_cache.set('crawler_config:{}:{}:{}'.format(mode, service_type, domain), json.dumps(crawler_config))
|
r_cache.set('crawler_config:{}:{}:{}'.format(mode, service_type, domain), json.dumps(crawler_config))
|
||||||
elif mode == 'auto':
|
elif mode == 'auto':
|
||||||
|
@ -559,8 +558,10 @@ def show_domain():
|
||||||
|
|
||||||
h = HiddenServices(domain, type, port=port)
|
h = HiddenServices(domain, type, port=port)
|
||||||
item_core = h.get_domain_crawled_core_item(epoch=epoch)
|
item_core = h.get_domain_crawled_core_item(epoch=epoch)
|
||||||
epoch = item_core['epoch']
|
if item_core:
|
||||||
l_pastes = h.get_last_crawled_pastes(item_root=item_core['root_item'])
|
l_pastes = h.get_last_crawled_pastes(item_root=item_core['root_item'])
|
||||||
|
else:
|
||||||
|
l_pastes = []
|
||||||
dict_links = h.get_all_links(l_pastes)
|
dict_links = h.get_all_links(l_pastes)
|
||||||
if l_pastes:
|
if l_pastes:
|
||||||
status = True
|
status = True
|
||||||
|
|
|
@ -63,7 +63,7 @@
|
||||||
{% for domain in domains_by_day[date] %}
|
{% for domain in domains_by_day[date] %}
|
||||||
<tr>
|
<tr>
|
||||||
<td>
|
<td>
|
||||||
<a target="_blank" href="{{ url_for('hiddenServices.onion_domain') }}?onion_domain={{ domain }}">{{ domain }}</a>
|
<a target="_blank" href="{{ url_for('hiddenServices.show_domain') }}?domain={{ domain }}">{{ domain }}</a>
|
||||||
<div>
|
<div>
|
||||||
{% for tag in domain_metadata[domain]['tags'] %}
|
{% for tag in domain_metadata[domain]['tags'] %}
|
||||||
<a href="{{ url_for('Tags.get_tagged_paste') }}?ltags={{ tag }}">
|
<a href="{{ url_for('Tags.get_tagged_paste') }}?ltags={{ tag }}">
|
||||||
|
|
Loading…
Reference in a new issue