ail-framework/var/www/modules/search/Flask_search.py

261 lines
8.6 KiB
Python
Raw Normal View History

2018-05-04 11:53:29 +00:00
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
Flask functions and routes for the trending modules page
'''
import redis
import json
import os
import datetime
import flask
from flask import Flask, render_template, jsonify, request, Blueprint
import Paste
from whoosh import index
from whoosh.fields import Schema, TEXT, ID
from whoosh.qparser import QueryParser
# ============ VARIABLES ============
import Flask_config
app = Flask_config.app
cfg = Flask_config.cfg
r_serv_pasteName = Flask_config.r_serv_pasteName
r_serv_metadata = Flask_config.r_serv_metadata
max_preview_char = Flask_config.max_preview_char
max_preview_modal = Flask_config.max_preview_modal
baseindexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path"))
2018-04-17 14:06:32 +00:00
indexRegister_path = os.path.join(os.environ['AIL_HOME'],
cfg.get("Indexer", "register"))
searches = Blueprint('searches', __name__, template_folder='templates')
# ============ FUNCTIONS ============
def get_current_index():
with open(indexRegister_path, "r") as f:
allIndex = f.read()
allIndex = allIndex.split() # format [time1\ntime2]
allIndex.sort()
try:
indexname = allIndex[-1].strip('\n\r')
except IndexError as e:
indexname = "no-index"
indexpath = os.path.join(baseindexpath, indexname)
return indexpath
def get_index_list(selected_index=""):
temp = []
index_list = []
for dirs in os.listdir(baseindexpath):
if os.path.isdir(os.path.join(baseindexpath, dirs)):
value = dirs
name = to_iso_date(dirs) + " - " + \
str(get_dir_size(dirs) / (1000*1000)) + " Mb " + \
"(" + str(get_item_count(dirs)) + " Items" + ")"
flag = dirs==selected_index.split('/')[-1]
if dirs == "old_index":
temp = [value, name, flag]
else:
index_list.append([value, name, flag])
index_list.sort(reverse=True, key=lambda x: x[0])
if len(temp) != 0:
index_list.append(temp)
return index_list
def get_dir_size(directory):
cur_sum = 0
for directory, subdirs, files in os.walk(os.path.join(baseindexpath,directory)):
try:
cur_sum += sum(os.path.getsize(os.path.join(directory, name)) for name in files)
except OSError as e: #File disappeared
pass
return cur_sum
def get_item_count(dirs):
ix = index.open_dir(os.path.join(baseindexpath, dirs))
return ix.doc_count_all()
def to_iso_date(timestamp):
if timestamp == "old_index":
return "old_index"
return str(datetime.datetime.fromtimestamp(int(timestamp))).split()[0]
# ============ ROUTES ============
@searches.route("/search", methods=['POST'])
def search():
query = request.form['query']
q = []
q.append(query)
r = [] #complete path
c = [] #preview of the paste content
paste_date = []
paste_size = []
paste_tags = []
index_name = request.form['index_name']
num_elem_to_get = 50
# select correct index
if index_name is None or index_name == "0":
selected_index = get_current_index()
else:
selected_index = os.path.join(baseindexpath, index_name)
# Search filename
for path in r_serv_pasteName.smembers(q[0]):
r.append(path)
paste = Paste.Paste(path)
2018-04-17 14:06:32 +00:00
content = paste.get_p_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
c.append(content[0:content_range])
curr_date = str(paste._get_p_date())
curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:]
paste_date.append(curr_date)
paste_size.append(paste._get_p_size())
# Search full line
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
2018-05-23 14:58:56 +00:00
print(selected_index)
ix = index.open_dir(selected_index)
with ix.searcher() as searcher:
query = QueryParser("content", ix.schema).parse(" ".join(q))
results = searcher.search_page(query, 1, pagelen=num_elem_to_get)
for x in results:
r.append(x.items()[0][1])
path = x.items()[0][1]
paste = Paste.Paste(path)
2018-04-17 14:06:32 +00:00
content = paste.get_p_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
c.append(content[0:content_range])
curr_date = str(paste._get_p_date())
curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:]
paste_date.append(curr_date)
paste_size.append(paste._get_p_size())
p_tags = r_serv_metadata.smembers('tag:'+path)
l_tags = []
for tag in p_tags:
2018-05-23 14:58:56 +00:00
complete_tag = tag
tag = tag.split('=')
if len(tag) > 1:
if tag[1] != '':
tag = tag[1][1:-1]
# no value
else:
tag = tag[0][1:-1]
# use for custom tags
else:
tag = tag[0]
2018-05-23 14:58:56 +00:00
l_tags.append( (tag, complete_tag) )
paste_tags.append(l_tags)
results = searcher.search(query)
num_res = len(results)
bootstrap_label = []
bootstrap_label.append('primary')
bootstrap_label.append('success')
bootstrap_label.append('danger')
bootstrap_label.append('warning')
bootstrap_label.append('info')
bootstrap_label.append('dark')
index_min = 1
index_max = len(get_index_list())
2018-04-17 14:06:32 +00:00
return render_template("search.html", r=r, c=c,
query=request.form['query'], paste_date=paste_date,
paste_size=paste_size, char_to_display=max_preview_modal,
num_res=num_res, index_min=index_min, index_max=index_max,
bootstrap_label=bootstrap_label,
paste_tags=paste_tags,
index_list=get_index_list(selected_index)
)
@searches.route("/get_more_search_result", methods=['POST'])
def get_more_search_result():
query = request.form['query']
q = []
q.append(query)
page_offset = int(request.form['page_offset'])
index_name = request.form['index_name']
num_elem_to_get = 50
# select correct index
if index_name is None or index_name == "0":
selected_index = get_current_index()
else:
selected_index = os.path.join(baseindexpath, index_name)
path_array = []
preview_array = []
date_array = []
size_array = []
list_tags = []
bootstrap_label = []
bootstrap_label.append('primary')
bootstrap_label.append('success')
bootstrap_label.append('danger')
bootstrap_label.append('warning')
bootstrap_label.append('info')
bootstrap_label.append('dark')
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
ix = index.open_dir(selected_index)
with ix.searcher() as searcher:
query = QueryParser("content", ix.schema).parse(" ".join(q))
2018-04-17 14:06:32 +00:00
results = searcher.search_page(query, page_offset, num_elem_to_get)
for x in results:
path = x.items()[0][1]
path_array.append(path)
paste = Paste.Paste(path)
2018-04-17 14:06:32 +00:00
content = paste.get_p_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
preview_array.append(content[0:content_range])
curr_date = str(paste._get_p_date())
curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:]
date_array.append(curr_date)
size_array.append(paste._get_p_size())
p_tags = r_serv_metadata.smembers('tag:'+path)
l_tags = []
for tag in p_tags:
tag = tag.split('=')
if len(tag) > 1:
if tag[1] != '':
tag = tag[1][1:-1]
# no value
else:
tag = tag[0][1:-1]
# use for custom tags
else:
tag = tag[0]
l_tags.append(tag)
list_tags.append(l_tags)
to_return = {}
to_return["path_array"] = path_array
to_return["preview_array"] = preview_array
to_return["date_array"] = date_array
to_return["size_array"] = size_array
to_return["list_tags"] = list_tags
to_return["bootstrap_label"] = bootstrap_label
if len(path_array) < num_elem_to_get: #pagelength
to_return["moreData"] = False
else:
to_return["moreData"] = True
return jsonify(to_return)
# ========= REGISTRATION =========
app.register_blueprint(searches)