mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-12-03 11:17:17 +00:00
Indexer now create an index_dir when it became too large (search in index not updated yet)
This commit is contained in:
parent
3c9cdac39a
commit
248469d61e
1 changed files with 51 additions and 7 deletions
|
@ -16,9 +16,24 @@ from pubsublogger import publisher
|
||||||
from whoosh.index import create_in, exists_in, open_dir
|
from whoosh.index import create_in, exists_in, open_dir
|
||||||
from whoosh.fields import Schema, TEXT, ID
|
from whoosh.fields import Schema, TEXT, ID
|
||||||
import os
|
import os
|
||||||
|
from os.path import join, getsize
|
||||||
|
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
|
|
||||||
|
# Config variable
|
||||||
|
INDEX_SIZE_THRESHOLD = 500 #Mb
|
||||||
|
TIME_WAIT = 1.0 #sec
|
||||||
|
|
||||||
|
# return in bytes
|
||||||
|
def check_index_size(indexnum):
|
||||||
|
global baseindexpath
|
||||||
|
the_index_name = "index_"+str(indexnum) if indexnum != 0 else "old_index"
|
||||||
|
the_index_name = os.path.join(baseindexpath, the_index_name)
|
||||||
|
cur_sum = 0
|
||||||
|
for root, dirs, files in os.walk(the_index_name):
|
||||||
|
cur_sum += sum(getsize(join(root, name)) for name in files)
|
||||||
|
return cur_sum
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
|
@ -29,19 +44,36 @@ if __name__ == "__main__":
|
||||||
p = Process(config_section)
|
p = Process(config_section)
|
||||||
|
|
||||||
# Indexer configuration - index dir and schema setup
|
# Indexer configuration - index dir and schema setup
|
||||||
indexpath = os.path.join(os.environ['AIL_HOME'],
|
baseindexpath = os.path.join(os.environ['AIL_HOME'],
|
||||||
p.config.get("Indexer", "path"))
|
p.config.get("Indexer", "path"))
|
||||||
|
indexRegister_path = os.path.join(os.environ['AIL_HOME'],
|
||||||
|
p.config.get("Indexer", "register"))
|
||||||
indexertype = p.config.get("Indexer", "type")
|
indexertype = p.config.get("Indexer", "type")
|
||||||
if indexertype == "whoosh":
|
if indexertype == "whoosh":
|
||||||
schema = Schema(title=TEXT(stored=True), path=ID(stored=True,
|
schema = Schema(title=TEXT(stored=True), path=ID(stored=True,
|
||||||
unique=True),
|
unique=True),
|
||||||
content=TEXT)
|
content=TEXT)
|
||||||
if not os.path.exists(indexpath):
|
if not os.path.exists(baseindexpath):
|
||||||
os.mkdir(indexpath)
|
os.mkdir(baseindexpath)
|
||||||
if not exists_in(indexpath):
|
|
||||||
ix = create_in(indexpath, schema)
|
# create the index register if not present
|
||||||
else:
|
if not os.path.isfile(indexRegister_path):
|
||||||
ix = open_dir(indexpath)
|
with open(indexRegister_path, 'w') as f:
|
||||||
|
f.write("1")
|
||||||
|
|
||||||
|
with open(indexRegister_path, "r") as f:
|
||||||
|
allIndex = f.read()
|
||||||
|
allIndex = allIndex.split(',')
|
||||||
|
allIndex.sort()
|
||||||
|
indexnum = int(allIndex[-1])
|
||||||
|
|
||||||
|
indexpath = os.path.join(baseindexpath, "index_"+str(indexnum))
|
||||||
|
if not exists_in(indexpath):
|
||||||
|
ix = create_in(indexpath, schema)
|
||||||
|
else:
|
||||||
|
ix = open_dir(indexpath)
|
||||||
|
|
||||||
|
last_refresh = time.time()
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.info("ZMQ Indexer is Running")
|
publisher.info("ZMQ Indexer is Running")
|
||||||
|
@ -59,6 +91,18 @@ if __name__ == "__main__":
|
||||||
docpath = message.split(" ", -1)[-1]
|
docpath = message.split(" ", -1)[-1]
|
||||||
paste = PST.get_p_content()
|
paste = PST.get_p_content()
|
||||||
print "Indexing :", docpath
|
print "Indexing :", docpath
|
||||||
|
|
||||||
|
|
||||||
|
if time.time() - last_refresh > TIME_WAIT: #avoid calculating the index's size at each message
|
||||||
|
last_refresh = time.time()
|
||||||
|
if check_index_size(indexnum) > INDEX_SIZE_THRESHOLD*(1000*1000):
|
||||||
|
indexpath = os.path.join(baseindexpath, "index_"+str(indexnum+1))
|
||||||
|
ix = create_in(indexpath, schema, indexname=str(indexnum+1))
|
||||||
|
## Correctly handle the file
|
||||||
|
with open(indexRegister_path, "a") as f:
|
||||||
|
f.write(","+str(indexnum))
|
||||||
|
|
||||||
|
|
||||||
if indexertype == "whoosh":
|
if indexertype == "whoosh":
|
||||||
indexwriter = ix.writer()
|
indexwriter = ix.writer()
|
||||||
indexwriter.update_document(
|
indexwriter.update_document(
|
||||||
|
|
Loading…
Reference in a new issue