mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-27 08:17:17 +00:00
commit
3eeaefa38a
5 changed files with 198 additions and 28 deletions
|
@ -15,10 +15,28 @@ from pubsublogger import publisher
|
||||||
|
|
||||||
from whoosh.index import create_in, exists_in, open_dir
|
from whoosh.index import create_in, exists_in, open_dir
|
||||||
from whoosh.fields import Schema, TEXT, ID
|
from whoosh.fields import Schema, TEXT, ID
|
||||||
|
import shutil
|
||||||
import os
|
import os
|
||||||
|
from os.path import join, getsize
|
||||||
|
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
|
|
||||||
|
# Config variable
|
||||||
|
TIME_WAIT = 60*15 #sec
|
||||||
|
|
||||||
|
# return in bytes
|
||||||
|
def check_index_size(baseindexpath, indexname):
|
||||||
|
the_index_name = join(baseindexpath, indexname)
|
||||||
|
cur_sum = 0
|
||||||
|
for root, dirs, files in os.walk(the_index_name):
|
||||||
|
cur_sum += sum(getsize(join(root, name)) for name in files)
|
||||||
|
return cur_sum
|
||||||
|
|
||||||
|
def move_index_into_old_index_folder(baseindexpath):
|
||||||
|
for cur_file in os.listdir(baseindexpath):
|
||||||
|
if not cur_file == "old_index":
|
||||||
|
shutil.move(join(baseindexpath, cur_file), join(join(baseindexpath, "old_index"), cur_file))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
|
@ -29,20 +47,51 @@ if __name__ == "__main__":
|
||||||
p = Process(config_section)
|
p = Process(config_section)
|
||||||
|
|
||||||
# Indexer configuration - index dir and schema setup
|
# Indexer configuration - index dir and schema setup
|
||||||
indexpath = os.path.join(os.environ['AIL_HOME'],
|
baseindexpath = join(os.environ['AIL_HOME'],
|
||||||
p.config.get("Indexer", "path"))
|
p.config.get("Indexer", "path"))
|
||||||
|
indexRegister_path = join(os.environ['AIL_HOME'],
|
||||||
|
p.config.get("Indexer", "register"))
|
||||||
indexertype = p.config.get("Indexer", "type")
|
indexertype = p.config.get("Indexer", "type")
|
||||||
|
INDEX_SIZE_THRESHOLD = int(p.config.get("Indexer", "index_max_size"))
|
||||||
if indexertype == "whoosh":
|
if indexertype == "whoosh":
|
||||||
schema = Schema(title=TEXT(stored=True), path=ID(stored=True,
|
schema = Schema(title=TEXT(stored=True), path=ID(stored=True,
|
||||||
unique=True),
|
unique=True),
|
||||||
content=TEXT)
|
content=TEXT)
|
||||||
if not os.path.exists(indexpath):
|
if not os.path.exists(baseindexpath):
|
||||||
os.mkdir(indexpath)
|
os.mkdir(baseindexpath)
|
||||||
|
|
||||||
|
# create the index register if not present
|
||||||
|
time_now = int(time.time())
|
||||||
|
if not os.path.isfile(indexRegister_path): #index are not organised
|
||||||
|
print("Indexes are not organized")
|
||||||
|
print("moving all files in folder 'old_index' ")
|
||||||
|
#move all files to old_index folder
|
||||||
|
move_index_into_old_index_folder(baseindexpath)
|
||||||
|
print("Creating new index")
|
||||||
|
#create all_index.txt
|
||||||
|
with open(indexRegister_path, 'w') as f:
|
||||||
|
f.write(str(time_now))
|
||||||
|
#create dir
|
||||||
|
os.mkdir(join(baseindexpath, str(time_now)))
|
||||||
|
|
||||||
|
with open(indexRegister_path, "r") as f:
|
||||||
|
allIndex = f.read()
|
||||||
|
allIndex = allIndex.split() # format [time1\ntime2]
|
||||||
|
allIndex.sort()
|
||||||
|
|
||||||
|
try:
|
||||||
|
indexname = allIndex[-1].strip('\n\r')
|
||||||
|
except IndexError as e:
|
||||||
|
indexname = time_now
|
||||||
|
|
||||||
|
indexpath = join(baseindexpath, str(indexname))
|
||||||
if not exists_in(indexpath):
|
if not exists_in(indexpath):
|
||||||
ix = create_in(indexpath, schema)
|
ix = create_in(indexpath, schema)
|
||||||
else:
|
else:
|
||||||
ix = open_dir(indexpath)
|
ix = open_dir(indexpath)
|
||||||
|
|
||||||
|
last_refresh = time_now
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.info("ZMQ Indexer is Running")
|
publisher.info("ZMQ Indexer is Running")
|
||||||
|
|
||||||
|
@ -58,7 +107,24 @@ if __name__ == "__main__":
|
||||||
continue
|
continue
|
||||||
docpath = message.split(" ", -1)[-1]
|
docpath = message.split(" ", -1)[-1]
|
||||||
paste = PST.get_p_content()
|
paste = PST.get_p_content()
|
||||||
print "Indexing :", docpath
|
print "Indexing - "+indexname+" :", docpath
|
||||||
|
|
||||||
|
|
||||||
|
if time.time() - last_refresh > TIME_WAIT: #avoid calculating the index's size at each message
|
||||||
|
last_refresh = time.time()
|
||||||
|
if check_index_size(baseindexpath, indexname) >= INDEX_SIZE_THRESHOLD*(1000*1000):
|
||||||
|
timestamp = int(time.time())
|
||||||
|
print("Creating new index", timestamp)
|
||||||
|
indexpath = join(baseindexpath, str(timestamp))
|
||||||
|
indexname = str(timestamp)
|
||||||
|
#update all_index
|
||||||
|
with open(indexRegister_path, "a") as f:
|
||||||
|
f.write(str(timestamp))
|
||||||
|
#create new dir
|
||||||
|
os.mkdir(indexpath)
|
||||||
|
ix = create_in(indexpath, schema)
|
||||||
|
|
||||||
|
|
||||||
if indexertype == "whoosh":
|
if indexertype == "whoosh":
|
||||||
indexwriter = ix.writer()
|
indexwriter = ix.writer()
|
||||||
indexwriter.update_document(
|
indexwriter.update_document(
|
||||||
|
|
|
@ -123,6 +123,8 @@ cc_tld = r'\.de$'
|
||||||
[Indexer]
|
[Indexer]
|
||||||
type = whoosh
|
type = whoosh
|
||||||
path = indexdir
|
path = indexdir
|
||||||
|
#size in Mb
|
||||||
|
index_max_size = 2000
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
|
|
|
@ -7,10 +7,14 @@
|
||||||
import redis
|
import redis
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import datetime
|
||||||
import flask
|
import flask
|
||||||
from flask import Flask, render_template, jsonify, request
|
from flask import Flask, render_template, jsonify, request
|
||||||
|
|
||||||
import Paste
|
import Paste
|
||||||
|
from whoosh import index
|
||||||
|
from whoosh.fields import Schema, TEXT, ID
|
||||||
|
from whoosh.qparser import QueryParser
|
||||||
|
|
||||||
# ============ VARIABLES ============
|
# ============ VARIABLES ============
|
||||||
import Flask_config
|
import Flask_config
|
||||||
|
@ -20,7 +24,62 @@ cfg = Flask_config.cfg
|
||||||
r_serv_pasteName = Flask_config.r_serv_pasteName
|
r_serv_pasteName = Flask_config.r_serv_pasteName
|
||||||
max_preview_char = Flask_config.max_preview_char
|
max_preview_char = Flask_config.max_preview_char
|
||||||
max_preview_modal = Flask_config.max_preview_modal
|
max_preview_modal = Flask_config.max_preview_modal
|
||||||
|
|
||||||
|
|
||||||
|
baseindexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path"))
|
||||||
|
indexRegister_path = os.path.join(os.environ['AIL_HOME'],
|
||||||
|
cfg.get("Indexer", "register"))
|
||||||
|
|
||||||
# ============ FUNCTIONS ============
|
# ============ FUNCTIONS ============
|
||||||
|
def get_current_index():
|
||||||
|
with open(indexRegister_path, "r") as f:
|
||||||
|
allIndex = f.read()
|
||||||
|
allIndex = allIndex.split() # format [time1\ntime2]
|
||||||
|
allIndex.sort()
|
||||||
|
try:
|
||||||
|
indexname = allIndex[-1].strip('\n\r')
|
||||||
|
except IndexError as e:
|
||||||
|
indexname = "no-index"
|
||||||
|
indexpath = os.path.join(baseindexpath, indexname)
|
||||||
|
return indexpath
|
||||||
|
|
||||||
|
def get_index_list(selected_index=""):
|
||||||
|
temp = []
|
||||||
|
index_list = []
|
||||||
|
for dirs in os.listdir(baseindexpath):
|
||||||
|
if os.path.isdir(os.path.join(baseindexpath, dirs)):
|
||||||
|
value = dirs
|
||||||
|
name = to_iso_date(dirs) + " - " + \
|
||||||
|
str(get_dir_size(dirs) / (1000*1000)) + " Mb " + \
|
||||||
|
"(" + str(get_item_count(dirs)) + " Items" + ")"
|
||||||
|
flag = dirs==selected_index.split('/')[-1]
|
||||||
|
if dirs == "old_index":
|
||||||
|
temp = [value, name, flag]
|
||||||
|
else:
|
||||||
|
index_list.append([value, name, flag])
|
||||||
|
|
||||||
|
index_list.sort(reverse=True, key=lambda x: x[0])
|
||||||
|
if len(temp) != 0:
|
||||||
|
index_list.append(temp)
|
||||||
|
return index_list
|
||||||
|
|
||||||
|
def get_dir_size(directory):
|
||||||
|
cur_sum = 0
|
||||||
|
for directory, subdirs, files in os.walk(os.path.join(baseindexpath,directory)):
|
||||||
|
try:
|
||||||
|
cur_sum += sum(os.path.getsize(os.path.join(directory, name)) for name in files)
|
||||||
|
except OSError as e: #File disappeared
|
||||||
|
pass
|
||||||
|
return cur_sum
|
||||||
|
|
||||||
|
def get_item_count(dirs):
|
||||||
|
ix = index.open_dir(os.path.join(baseindexpath, dirs))
|
||||||
|
return ix.doc_count_all()
|
||||||
|
|
||||||
|
def to_iso_date(timestamp):
|
||||||
|
if timestamp == "old_index":
|
||||||
|
return "old_index"
|
||||||
|
return str(datetime.datetime.fromtimestamp(int(timestamp))).split()[0]
|
||||||
|
|
||||||
|
|
||||||
# ============ ROUTES ============
|
# ============ ROUTES ============
|
||||||
|
@ -34,8 +93,15 @@ def search():
|
||||||
c = [] #preview of the paste content
|
c = [] #preview of the paste content
|
||||||
paste_date = []
|
paste_date = []
|
||||||
paste_size = []
|
paste_size = []
|
||||||
|
index_name = request.form['index_name']
|
||||||
num_elem_to_get = 50
|
num_elem_to_get = 50
|
||||||
|
|
||||||
|
# select correct index
|
||||||
|
if index_name is None or index_name == "0":
|
||||||
|
selected_index = get_current_index()
|
||||||
|
else:
|
||||||
|
selected_index = os.path.join(baseindexpath, index_name)
|
||||||
|
|
||||||
# Search filename
|
# Search filename
|
||||||
for path in r_serv_pasteName.smembers(q[0]):
|
for path in r_serv_pasteName.smembers(q[0]):
|
||||||
r.append(path)
|
r.append(path)
|
||||||
|
@ -49,13 +115,9 @@ def search():
|
||||||
paste_size.append(paste._get_p_size())
|
paste_size.append(paste._get_p_size())
|
||||||
|
|
||||||
# Search full line
|
# Search full line
|
||||||
from whoosh import index
|
|
||||||
from whoosh.fields import Schema, TEXT, ID
|
|
||||||
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
|
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
|
||||||
|
|
||||||
indexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path"))
|
ix = index.open_dir(selected_index)
|
||||||
ix = index.open_dir(indexpath)
|
|
||||||
from whoosh.qparser import QueryParser
|
|
||||||
with ix.searcher() as searcher:
|
with ix.searcher() as searcher:
|
||||||
query = QueryParser("content", ix.schema).parse(" ".join(q))
|
query = QueryParser("content", ix.schema).parse(" ".join(q))
|
||||||
results = searcher.search_page(query, 1, pagelen=num_elem_to_get)
|
results = searcher.search_page(query, 1, pagelen=num_elem_to_get)
|
||||||
|
@ -72,7 +134,14 @@ def search():
|
||||||
results = searcher.search(query)
|
results = searcher.search(query)
|
||||||
num_res = len(results)
|
num_res = len(results)
|
||||||
|
|
||||||
return render_template("search.html", r=r, c=c, query=request.form['query'], paste_date=paste_date, paste_size=paste_size, char_to_display=max_preview_modal, num_res=num_res)
|
index_min = 1
|
||||||
|
index_max = len(get_index_list())
|
||||||
|
return render_template("search.html", r=r, c=c,
|
||||||
|
query=request.form['query'], paste_date=paste_date,
|
||||||
|
paste_size=paste_size, char_to_display=max_preview_modal,
|
||||||
|
num_res=num_res, index_min=index_min, index_max=index_max,
|
||||||
|
index_list=get_index_list(selected_index)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/get_more_search_result", methods=['POST'])
|
@app.route("/get_more_search_result", methods=['POST'])
|
||||||
|
@ -81,20 +150,23 @@ def get_more_search_result():
|
||||||
q = []
|
q = []
|
||||||
q.append(query)
|
q.append(query)
|
||||||
page_offset = int(request.form['page_offset'])
|
page_offset = int(request.form['page_offset'])
|
||||||
|
index_name = request.form['index_name']
|
||||||
num_elem_to_get = 50
|
num_elem_to_get = 50
|
||||||
|
|
||||||
|
# select correct index
|
||||||
|
if index_name is None or index_name == "0":
|
||||||
|
selected_index = get_current_index()
|
||||||
|
else:
|
||||||
|
selected_index = os.path.join(baseindexpath, index_name)
|
||||||
|
|
||||||
path_array = []
|
path_array = []
|
||||||
preview_array = []
|
preview_array = []
|
||||||
date_array = []
|
date_array = []
|
||||||
size_array = []
|
size_array = []
|
||||||
|
|
||||||
from whoosh import index
|
|
||||||
from whoosh.fields import Schema, TEXT, ID
|
|
||||||
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
|
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
|
||||||
|
|
||||||
indexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path"))
|
ix = index.open_dir(selected_index)
|
||||||
ix = index.open_dir(indexpath)
|
|
||||||
from whoosh.qparser import QueryParser
|
|
||||||
with ix.searcher() as searcher:
|
with ix.searcher() as searcher:
|
||||||
query = QueryParser("content", ix.schema).parse(" ".join(q))
|
query = QueryParser("content", ix.schema).parse(" ".join(q))
|
||||||
results = searcher.search_page(query, page_offset, num_elem_to_get)
|
results = searcher.search_page(query, page_offset, num_elem_to_get)
|
||||||
|
@ -113,7 +185,6 @@ def get_more_search_result():
|
||||||
to_return["preview_array"] = preview_array
|
to_return["preview_array"] = preview_array
|
||||||
to_return["date_array"] = date_array
|
to_return["date_array"] = date_array
|
||||||
to_return["size_array"] = size_array
|
to_return["size_array"] = size_array
|
||||||
print "len(path_array)="+str(len(path_array))
|
|
||||||
if len(path_array) < num_elem_to_get: #pagelength
|
if len(path_array) < num_elem_to_get: #pagelength
|
||||||
to_return["moreData"] = False
|
to_return["moreData"] = False
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -89,6 +89,16 @@
|
||||||
</div>
|
</div>
|
||||||
<!-- /.panel-heading -->
|
<!-- /.panel-heading -->
|
||||||
<div class="panel-body">
|
<div class="panel-body">
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-md-12">
|
||||||
|
<strong style="">Index: </strong>
|
||||||
|
<select class="form-control" id="index_name" style="display: inline-block; margin-bottom: 5px; width: 30%">
|
||||||
|
{% for indexElem in index_list %}
|
||||||
|
<option {% if indexElem[2] %} selected="selected" {% endif %} value="{{ indexElem[0] }}" >{{ indexElem[1] }}</option>
|
||||||
|
{% endfor %}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<table class="table table-striped table-bordered table-hover" id="myTable">
|
<table class="table table-striped table-bordered table-hover" id="myTable">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
|
@ -100,16 +110,14 @@
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody id="table_body">
|
<tbody id="table_body">
|
||||||
{% set i = 0 %}
|
|
||||||
{% for path in r %}
|
{% for path in r %}
|
||||||
<tr>
|
<tr>
|
||||||
<td>{{ i + 1 }}</td>
|
<td>{{ loop.index0 + 1 }}</td>
|
||||||
<td><a target="_blank" href="{{ url_for('showsavedpaste') }}?paste={{ path }}&num={{ i+1 }}"> {{ path }}</a></td>
|
<td><a target="_blank" href="{{ url_for('showsavedpaste') }}?paste={{ path }}&num={{ loop.index0+1 }}"> {{ path }}</a></td>
|
||||||
<td>{{ paste_date[i] }}</td>
|
<td>{{ paste_date[loop.index0] }}</td>
|
||||||
<td>{{ paste_size[i] }}</td>
|
<td>{{ paste_size[loop.index0] }}</td>
|
||||||
<td><p><span class="glyphicon glyphicon-info-sign" data-toggle="tooltip" data-placement="left" title="{{ c[i] }}"></span> <button type="button" class="btn-link" data-num="{{ i + 1 }}" data-toggle="modal" data-target="#mymodal" data-url="{{ url_for('showsavedpaste') }}?paste={{ path }}&num={{ i+1 }}" data-path="{{ path }}"><span class="fa fa-search-plus"></span></button></p></td>
|
<td><p><span class="glyphicon glyphicon-info-sign" data-toggle="tooltip" data-placement="left" title="{{ c[loop.index0] }}"></span> <button type="button" class="btn-link" data-num="{{ loop.index0 + 1 }}" data-toggle="modal" data-target="#mymodal" data-url="{{ url_for('showsavedpaste') }}?paste={{ path }}&num={{ loop.index0+1 }}" data-path="{{ path }}"><span class="fa fa-search-plus"></span></button></p></td>
|
||||||
</tr>
|
</tr>
|
||||||
{% set i = i + 1 %}
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
@ -157,6 +165,28 @@
|
||||||
if (init_num_of_elements_in_table == pagelen) {
|
if (init_num_of_elements_in_table == pagelen) {
|
||||||
$("#load_more_json_button1").show();
|
$("#load_more_json_button1").show();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$('#index_name').on('change', function() {
|
||||||
|
var form = document.createElement('form');
|
||||||
|
form.setAttribute("method", 'post');
|
||||||
|
form.setAttribute("action", "{{ url_for('search') }}");
|
||||||
|
|
||||||
|
var input1 = document.createElement('input');
|
||||||
|
input1.setAttribute("type", "hidden");
|
||||||
|
input1.setAttribute("name", "index_name");
|
||||||
|
input1.setAttribute("value", this.value);
|
||||||
|
form.appendChild(input1);
|
||||||
|
|
||||||
|
var input2 = document.createElement('input');
|
||||||
|
input2.setAttribute("type", "hidden");
|
||||||
|
input2.setAttribute("name", "query");
|
||||||
|
input2.setAttribute("value", "{{ query }}");
|
||||||
|
form.appendChild(input2);
|
||||||
|
|
||||||
|
document.body.appendChild(form);
|
||||||
|
form.submit();
|
||||||
|
})
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
</script>
|
</script>
|
||||||
|
@ -171,7 +201,7 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
function load_search_50_data() {
|
function load_search_50_data() {
|
||||||
var options = { query: query, page_offset: page_offset };
|
var options = { query: query, page_offset: page_offset, index_name: $("#index_name").val() };
|
||||||
$.post( "{{ url_for('get_more_search_result') }}", options).done(function( data ) {
|
$.post( "{{ url_for('get_more_search_result') }}", options).done(function( data ) {
|
||||||
|
|
||||||
for(i=0; i<data.path_array.length; i++) {
|
for(i=0; i<data.path_array.length; i++) {
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
<div class="input-group custom-search-form">
|
<div class="input-group custom-search-form">
|
||||||
<form action="/search" id="form-search" method=POST>
|
<form action="/search" id="form-search" method=POST>
|
||||||
<input type="text" name="query" class="form-control" placeholder="Search Paste">
|
<input type="text" name="query" class="form-control" placeholder="Search Paste">
|
||||||
|
<input type="hidden" name="index_name" class="form-control" value="0" placeholder="Index Name">
|
||||||
<span class="input-group-btn">
|
<span class="input-group-btn">
|
||||||
<button class="btn btn-default" type="submit">
|
<button class="btn btn-default" type="submit">
|
||||||
<i class="fa fa-search"></i>
|
<i class="fa fa-search"></i>
|
||||||
|
|
Loading…
Reference in a new issue