From f3d1ca052e2eb1a004a46fcfdf28b6e2abc7dd23 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Mon, 11 Aug 2014 14:50:35 +0200 Subject: [PATCH 1/3] Return the number of indexed documents --- bin/tests/indexer_lookup.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bin/tests/indexer_lookup.py b/bin/tests/indexer_lookup.py index 3b0a1e7a..09ae24c4 100644 --- a/bin/tests/indexer_lookup.py +++ b/bin/tests/indexer_lookup.py @@ -24,6 +24,7 @@ indexertype = cfg.get("Indexer", "type") argParser = argparse.ArgumentParser(description='Fulltext search for AIL') argParser.add_argument('-q', action='append', help='query to lookup (one or more)') +argParser.add_argument('-n', action='store_true', default=False, help='Return numbers of document indexed') args = argParser.parse_args() from whoosh import index @@ -33,6 +34,11 @@ schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) ix = index.open_dir(indexpath) from whoosh.qparser import QueryParser + +if args.n: + print ix.doc_count_all() + exit(0) + if args.q is None: argParser.print_help() exit(1) From f65a94d47b7af677576c2033296ee49646c0cc55 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Mon, 11 Aug 2014 14:56:15 +0200 Subject: [PATCH 2/3] -l added -> dumping all terms indexed --- bin/tests/indexer_lookup.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bin/tests/indexer_lookup.py b/bin/tests/indexer_lookup.py index 09ae24c4..93bbf00d 100644 --- a/bin/tests/indexer_lookup.py +++ b/bin/tests/indexer_lookup.py @@ -24,7 +24,8 @@ indexertype = cfg.get("Indexer", "type") argParser = argparse.ArgumentParser(description='Fulltext search for AIL') argParser.add_argument('-q', action='append', help='query to lookup (one or more)') -argParser.add_argument('-n', action='store_true', default=False, help='Return numbers of document indexed') +argParser.add_argument('-n', action='store_true', default=False, help='return numbers of indexed documents') +argParser.add_argument('-l', action='store_true', default=False, help='dump all terms encountered in indexed documents') args = argParser.parse_args() from whoosh import index @@ -39,6 +40,12 @@ if args.n: print ix.doc_count_all() exit(0) +if args.l: + xr = ix.searcher().reader() + for x in xr.lexicon("content"): + print (x) + exit(0) + if args.q is None: argParser.print_help() exit(1) From 0a6664ffbab58919f6766e2f3ac9d1e3ee49cd66 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Mon, 11 Aug 2014 15:07:12 +0200 Subject: [PATCH 3/3] Indexer: Some index statistics added usage: indexer_lookup.py [-h] [-q Q] [-n] [-t] [-l] Fulltext search for AIL optional arguments: -h, --help show this help message and exit -q Q query to lookup (one or more) -n return number of indexed documents -t dump top 500 terms -l dump all terms encountered in indexed documents --- bin/tests/indexer_lookup.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bin/tests/indexer_lookup.py b/bin/tests/indexer_lookup.py index 93bbf00d..305ae236 100644 --- a/bin/tests/indexer_lookup.py +++ b/bin/tests/indexer_lookup.py @@ -25,6 +25,7 @@ indexertype = cfg.get("Indexer", "type") argParser = argparse.ArgumentParser(description='Fulltext search for AIL') argParser.add_argument('-q', action='append', help='query to lookup (one or more)') argParser.add_argument('-n', action='store_true', default=False, help='return numbers of indexed documents') +argParser.add_argument('-t', action='store_true', default=False, help='dump top 500 terms') argParser.add_argument('-l', action='store_true', default=False, help='dump all terms encountered in indexed documents') args = argParser.parse_args() @@ -46,6 +47,12 @@ if args.l: print (x) exit(0) +if args.t: + xr = ix.searcher().reader() + for x in xr.most_frequent_terms("content", number=500, prefix=''): + print (x) + exit(0) + if args.q is None: argParser.print_help() exit(1) @@ -54,5 +61,5 @@ with ix.searcher() as searcher: query = QueryParser("content", ix.schema).parse(" ".join(args.q)) results = searcher.search(query, limit=None) for x in results: - print x + print (x)