From 25420005e7b92fdbf217046e78ff482be9544681 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 30 Jun 2020 10:42:10 +0200 Subject: [PATCH] chg: [whosh index] add data retention fct --- bin/lib/index_whoosh.py | 103 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100755 bin/lib/index_whoosh.py diff --git a/bin/lib/index_whoosh.py b/bin/lib/index_whoosh.py new file mode 100755 index 00000000..b5657697 --- /dev/null +++ b/bin/lib/index_whoosh.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import redis + +from shutil import rmtree + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +config_loader = ConfigLoader.ConfigLoader() +INDEX_PATH = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Indexer", "path")) +all_index_file = os.path.join(INDEX_PATH, 'all_index.txt') +config_loader = None + +def get_first_index_name(): + with open(all_index_file) as f: + first_index = f.readline().replace('\n', '') + return first_index + +def get_last_index_name(): + with open(all_index_file) as f: + for line in f: # # FIXME: replace by tail ? + pass + last_index = line.replace('\n', '') + return last_index + +def get_all_index(): + all_index = [] + with open(all_index_file) as f: + for line in f: + line = line.replace('\n', '') + if line: + all_index.append(line) + return all_index + +def get_index_full_path(index_name): + return os.path.join(INDEX_PATH, index_name) + +# remove empty line +def check_index_list_integrity(): + with open(all_index_file, 'r') as f: + lines = f.readlines() + with open(all_index_file, 'w') as f: + for line in lines: + if line != '\n': + f.write(line) + +def _remove_index_name_from_all_index(index_name): + with open(all_index_file, 'r') as f: + lines = f.readlines() + with open(all_index_file, 'w') as f: + for line in lines: + if line.replace('\n', '') != index_name: + f.write(line) + +def delete_index_by_name(index_name): + index_path = get_index_full_path(index_name) + index_path = os.path.realpath(index_path) + # incorrect filename + if not os.path.commonprefix([index_path, INDEX_PATH]) == INDEX_PATH: + raise Exception('Path traversal detected {}'.format(index_path)) + if not os.path.isdir(index_path): + print('Error: The index directory {} doesn\'t exist'.format(index_path)) + return None + res = rmtree(index_path) + _remove_index_name_from_all_index(index_name) + +def delete_first_index(): + index_name = get_first_index_name() + delete_index_by_name(index_name) + +def delete_last_index(): + index_name = get_last_index_name() + delete_index_by_name(index_name) + +#### DATA RETENTION #### + +#keep time most recent index +def delete_older_index_by_time(int_time): + all_index = get_all_index() + if all_index: + if int(all_index[-1]) > int_time: # make sure to keep one files + for index_name in all_index: + if int(index_name) < int_time: + print('deleting index {} ...'.format(index_name)) + delete_index_by_name(index_name) + +# keep x most recent index +def delete_older_index(number_of_index_to_keep): + if number_of_index_to_keep > 1: + all_index = get_all_index() + if len(get_all_index()) > number_of_index_to_keep: + for index_name in all_index[0:-number_of_index_to_keep]: + print('deleting index {} ...'.format(index_name)) + delete_index_by_name(index_name) + +##-- DATA RETENTION --## + +# if __name__ == '__main__': +# delete_older_index(3)