chg: [whosh index] add data retention fct

This commit is contained in:
Terrtia 2020-06-30 10:42:10 +02:00
parent f20df89446
commit 25420005e7
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0

103
bin/lib/index_whoosh.py Executable file
View file

@ -0,0 +1,103 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import redis
from shutil import rmtree
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
config_loader = ConfigLoader.ConfigLoader()
INDEX_PATH = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Indexer", "path"))
all_index_file = os.path.join(INDEX_PATH, 'all_index.txt')
config_loader = None
def get_first_index_name():
with open(all_index_file) as f:
first_index = f.readline().replace('\n', '')
return first_index
def get_last_index_name():
with open(all_index_file) as f:
for line in f: # # FIXME: replace by tail ?
pass
last_index = line.replace('\n', '')
return last_index
def get_all_index():
all_index = []
with open(all_index_file) as f:
for line in f:
line = line.replace('\n', '')
if line:
all_index.append(line)
return all_index
def get_index_full_path(index_name):
return os.path.join(INDEX_PATH, index_name)
# remove empty line
def check_index_list_integrity():
with open(all_index_file, 'r') as f:
lines = f.readlines()
with open(all_index_file, 'w') as f:
for line in lines:
if line != '\n':
f.write(line)
def _remove_index_name_from_all_index(index_name):
with open(all_index_file, 'r') as f:
lines = f.readlines()
with open(all_index_file, 'w') as f:
for line in lines:
if line.replace('\n', '') != index_name:
f.write(line)
def delete_index_by_name(index_name):
index_path = get_index_full_path(index_name)
index_path = os.path.realpath(index_path)
# incorrect filename
if not os.path.commonprefix([index_path, INDEX_PATH]) == INDEX_PATH:
raise Exception('Path traversal detected {}'.format(index_path))
if not os.path.isdir(index_path):
print('Error: The index directory {} doesn\'t exist'.format(index_path))
return None
res = rmtree(index_path)
_remove_index_name_from_all_index(index_name)
def delete_first_index():
index_name = get_first_index_name()
delete_index_by_name(index_name)
def delete_last_index():
index_name = get_last_index_name()
delete_index_by_name(index_name)
#### DATA RETENTION ####
#keep time most recent index
def delete_older_index_by_time(int_time):
all_index = get_all_index()
if all_index:
if int(all_index[-1]) > int_time: # make sure to keep one files
for index_name in all_index:
if int(index_name) < int_time:
print('deleting index {} ...'.format(index_name))
delete_index_by_name(index_name)
# keep x most recent index
def delete_older_index(number_of_index_to_keep):
if number_of_index_to_keep > 1:
all_index = get_all_index()
if len(get_all_index()) > number_of_index_to_keep:
for index_name in all_index[0:-number_of_index_to_keep]:
print('deleting index {} ...'.format(index_name))
delete_index_by_name(index_name)
##-- DATA RETENTION --##
# if __name__ == '__main__':
# delete_older_index(3)