Added backward support + renamed index with timestamp

This commit is contained in:
Mokaddem 2017-03-15 11:51:35 +01:00
parent 381e72ee99
commit 4005b72180
3 changed files with 55 additions and 22 deletions

View file

@ -25,15 +25,23 @@ INDEX_SIZE_THRESHOLD = 500 #Mb
TIME_WAIT = 1.0 #sec TIME_WAIT = 1.0 #sec
# return in bytes # return in bytes
def check_index_size(indexnum): def check_index_size(indexname):
global baseindexpath global baseindexpath
the_index_name = "index_"+str(indexnum) if indexnum != 0 else "old_index" the_index_name = indexname if indexname != "0" else "old_index"
the_index_name = os.path.join(baseindexpath, the_index_name) the_index_name = join(baseindexpath, the_index_name)
cur_sum = 0 cur_sum = 0
for root, dirs, files in os.walk(the_index_name): for root, dirs, files in os.walk(the_index_name):
cur_sum += sum(getsize(join(root, name)) for name in files) cur_sum += sum(getsize(join(root, name)) for name in files)
return cur_sum return cur_sum
def move_index_into_old_index_folder(baseindexpath):
command_move = "mv {} {}"
command_dir = "mkdir {}"
os.system(command_dir.format(join(baseindexpath, "old_index")))
for files in os.listdir(baseindexpath):
if not files == "old_index":
os.system(command_move.format(join(baseindexpath, files), join(join(baseindexpath, "old_index"), files)))
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
@ -44,9 +52,9 @@ if __name__ == "__main__":
p = Process(config_section) p = Process(config_section)
# Indexer configuration - index dir and schema setup # Indexer configuration - index dir and schema setup
baseindexpath = os.path.join(os.environ['AIL_HOME'], baseindexpath = join(os.environ['AIL_HOME'],
p.config.get("Indexer", "path")) p.config.get("Indexer", "path"))
indexRegister_path = os.path.join(os.environ['AIL_HOME'], indexRegister_path = join(os.environ['AIL_HOME'],
p.config.get("Indexer", "register")) p.config.get("Indexer", "register"))
indexertype = p.config.get("Indexer", "type") indexertype = p.config.get("Indexer", "type")
if indexertype == "whoosh": if indexertype == "whoosh":
@ -57,23 +65,33 @@ if __name__ == "__main__":
os.mkdir(baseindexpath) os.mkdir(baseindexpath)
# create the index register if not present # create the index register if not present
if not os.path.isfile(indexRegister_path): time_now = int(time.time())
if not os.path.isfile(indexRegister_path): #index are not organised
#move all files to old_index folder
move_index_into_old_index_folder(baseindexpath)
#create all_index.txt
with open(indexRegister_path, 'w') as f: with open(indexRegister_path, 'w') as f:
f.write("1") f.write(str(time_now))
#create dir
os.system("mkdir "+join(baseindexpath, str(time_now)))
with open(indexRegister_path, "r") as f: with open(indexRegister_path, "r") as f:
allIndex = f.read() allIndex = f.read()
allIndex = allIndex.split(',') allIndex = allIndex.split(',') # format [time1,time2]
allIndex.sort() allIndex.sort()
indexnum = int(allIndex[-1])
indexpath = os.path.join(baseindexpath, "index_"+str(indexnum)) try:
indexname = allIndex[-1].strip('\n\r')
except IndexError as e:
indexname = time_now
indexpath = join(baseindexpath, str(indexname))
if not exists_in(indexpath): if not exists_in(indexpath):
ix = create_in(indexpath, schema) ix = create_in(indexpath, schema)
else: else:
ix = open_dir(indexpath) ix = open_dir(indexpath)
last_refresh = time.time() last_refresh = time_now
# LOGGING # # LOGGING #
publisher.info("ZMQ Indexer is Running") publisher.info("ZMQ Indexer is Running")
@ -90,17 +108,19 @@ if __name__ == "__main__":
continue continue
docpath = message.split(" ", -1)[-1] docpath = message.split(" ", -1)[-1]
paste = PST.get_p_content() paste = PST.get_p_content()
print "Indexing :", docpath print "Indexing - "+indexname+" :", docpath
if time.time() - last_refresh > TIME_WAIT: #avoid calculating the index's size at each message if time.time() - last_refresh > TIME_WAIT: #avoid calculating the index's size at each message
last_refresh = time.time() last_refresh = time.time()
if check_index_size(indexnum) > INDEX_SIZE_THRESHOLD*(1000*1000): if check_index_size(indexname) > INDEX_SIZE_THRESHOLD*(1000*1000):
indexpath = os.path.join(baseindexpath, "index_"+str(indexnum+1)) timestamp = int(time.time())
ix = create_in(indexpath, schema, indexname=str(indexnum+1)) indexpath = join(baseindexpath, str(timestamp))
ix = create_in(indexpath, schema)
indexname = str(timestamp)
## Correctly handle the file ## Correctly handle the file
with open(indexRegister_path, "a") as f: with open(indexRegister_path, "a") as f:
f.write(","+str(indexnum)) f.write(","+str(timestamp))
if indexertype == "whoosh": if indexertype == "whoosh":

View file

@ -7,6 +7,7 @@
import redis import redis
import json import json
import os import os
import datetime
import flask import flask
from flask import Flask, render_template, jsonify, request from flask import Flask, render_template, jsonify, request
@ -33,10 +34,13 @@ indexRegister_path = os.path.join(os.environ['AIL_HOME'],
def get_current_index(): def get_current_index():
with open(indexRegister_path, "r") as f: with open(indexRegister_path, "r") as f:
allIndex = f.read() allIndex = f.read()
allIndex = allIndex.split(',') allIndex = allIndex.split(',') # format [time1,time2]
allIndex.sort() allIndex.sort()
indexnum = int(allIndex[-1]) try:
indexpath = os.path.join(baseindexpath, "index_"+str(indexnum)) indexname = allIndex[-1].strip('\n\r')
except IndexError as e:
indexname = "no-index"
indexpath = os.path.join(baseindexpath, indexname)
return indexpath return indexpath
def get_index_list(selected_index=""): def get_index_list(selected_index=""):
@ -44,23 +48,32 @@ def get_index_list(selected_index=""):
for dirs in os.listdir(baseindexpath): for dirs in os.listdir(baseindexpath):
if os.path.isdir(os.path.join(baseindexpath, dirs)): if os.path.isdir(os.path.join(baseindexpath, dirs)):
value = dirs value = dirs
name = dirs + " - " + \ name = to_iso_date(dirs) + " - " + \
str(get_dir_size(dirs) / (1000*1000)) + " Mb " + \ str(get_dir_size(dirs) / (1000*1000)) + " Mb " + \
"(" + str(get_item_count(dirs)) + " Items" + ")" "(" + str(get_item_count(dirs)) + " Items" + ")"
flag = dirs==selected_index.split('/')[-1] flag = dirs==selected_index.split('/')[-1]
index_list.append([ value, name, flag]) index_list.append([ value, name, flag])
return index_list return index_list
def get_dir_size(directory): def get_dir_size(directory):
cur_sum = 0 cur_sum = 0
for directory, subdirs, files in os.walk(os.path.join(baseindexpath,directory)): for directory, subdirs, files in os.walk(os.path.join(baseindexpath,directory)):
cur_sum += sum(os.path.getsize(os.path.join(directory, name)) for name in files) try:
cur_sum += sum(os.path.getsize(os.path.join(directory, name)) for name in files)
except OSError as e: #File disappeared
pass
return cur_sum return cur_sum
def get_item_count(dirs): def get_item_count(dirs):
ix = index.open_dir(os.path.join(baseindexpath, dirs)) ix = index.open_dir(os.path.join(baseindexpath, dirs))
return ix.doc_count_all() return ix.doc_count_all()
def to_iso_date(timestamp):
if timestamp == "old_index":
return "old_index"
return str(datetime.datetime.fromtimestamp(int(timestamp))).split()[0]
# ============ ROUTES ============ # ============ ROUTES ============

View file

@ -92,7 +92,7 @@
<div class="row"> <div class="row">
<div class="col-md-12"> <div class="col-md-12">
<strong style="">Index: </strong> <strong style="">Index: </strong>
<select class="form-control" id="index_name" style="display: inline-block; margin-bottom: 5px; width: 25%"> <select class="form-control" id="index_name" style="display: inline-block; margin-bottom: 5px; width: 30%">
{% for indexElem in index_list %} {% for indexElem in index_list %}
<option {% if indexElem[2] %} selected="selected" {% endif %} value="{{ indexElem[0] }}" >{{ indexElem[1] }}</option> <option {% if indexElem[2] %} selected="selected" {% endif %} value="{{ indexElem[0] }}" >{{ indexElem[1] }}</option>
{% endfor %} {% endfor %}