Indexer: Some index statistics added

usage: indexer_lookup.py [-h] [-q Q] [-n] [-t] [-l]

Fulltext search for AIL

optional arguments:
  -h, --help  show this help message and exit
  -q Q        query to lookup (one or more)
  -n          return number of indexed documents
  -t          dump top 500 terms
  -l          dump all terms encountered in indexed documents
This commit is contained in:
Alexandre Dulaunoy 2014-08-11 15:07:12 +02:00
parent f65a94d47b
commit 0a6664ffba

View file

@ -25,6 +25,7 @@ indexertype = cfg.get("Indexer", "type")
argParser = argparse.ArgumentParser(description='Fulltext search for AIL') argParser = argparse.ArgumentParser(description='Fulltext search for AIL')
argParser.add_argument('-q', action='append', help='query to lookup (one or more)') argParser.add_argument('-q', action='append', help='query to lookup (one or more)')
argParser.add_argument('-n', action='store_true', default=False, help='return numbers of indexed documents') argParser.add_argument('-n', action='store_true', default=False, help='return numbers of indexed documents')
argParser.add_argument('-t', action='store_true', default=False, help='dump top 500 terms')
argParser.add_argument('-l', action='store_true', default=False, help='dump all terms encountered in indexed documents') argParser.add_argument('-l', action='store_true', default=False, help='dump all terms encountered in indexed documents')
args = argParser.parse_args() args = argParser.parse_args()
@ -46,6 +47,12 @@ if args.l:
print (x) print (x)
exit(0) exit(0)
if args.t:
xr = ix.searcher().reader()
for x in xr.most_frequent_terms("content", number=500, prefix=''):
print (x)
exit(0)
if args.q is None: if args.q is None:
argParser.print_help() argParser.print_help()
exit(1) exit(1)
@ -54,5 +61,5 @@ with ix.searcher() as searcher:
query = QueryParser("content", ix.schema).parse(" ".join(args.q)) query = QueryParser("content", ix.schema).parse(" ".join(args.q))
results = searcher.search(query, limit=None) results = searcher.search(query, limit=None)
for x in results: for x in results:
print x print (x)