ail-framework/bin/tests/indexer_lookup.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of AIL framework - Analysis Information Leak framework
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be

import ConfigParser
import argparse
import sys

configfile = '../packages/config.cfg'
cfg = ConfigParser.ConfigParser()
cfg.read(configfile)

# Indexer configuration - index dir and schema setup
indexpath = cfg.get("Indexer", "path")
indexertype = cfg.get("Indexer", "type")

argParser = argparse.ArgumentParser(description='Fulltext search for AIL')
argParser.add_argument('-q', action='append', help='query to lookup (one or more)')
argParser.add_argument('-n', action='store_true', default=False, help='return numbers of indexed documents')
argParser.add_argument('-t', action='store_true', default=False, help='dump top 500 terms')
argParser.add_argument('-l', action='store_true', default=False, help='dump all terms encountered in indexed documents')
args = argParser.parse_args()

from whoosh import index
from whoosh.fields import *
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)

ix = index.open_dir(indexpath)

from whoosh.qparser import QueryParser

if args.n:
    print ix.doc_count_all()
    exit(0)

if args.l:
    xr = ix.searcher().reader()
    for x in xr.lexicon("content"):
        print (x)
    exit(0)

if args.t:
    xr = ix.searcher().reader()
    for x in xr.most_frequent_terms("content", number=500, prefix=''):
        print (x)
    exit(0)

if args.q is None:
    argParser.print_help()
    exit(1)

with ix.searcher() as searcher:
    query = QueryParser("content", ix.schema).parse(" ".join(args.q))
    results = searcher.search(query, limit=None)
    for x in results:
        print (x)
Indexer module: script to query the index Test script to query the index generated from the Indexer module. python indexer_lookup.py -q Visa -q Mastercard 2014-08-11 10:03:27 +00:00			`#!/usr/bin/env python`
			`# -- coding: utf-8 --`
			`#`
			`# This file is part of AIL framework - Analysis Information Leak framework`
			`#`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU Affero General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be`

			`import ConfigParser`
			`import argparse`
			`import sys`

			`configfile = '../packages/config.cfg'`
			`cfg = ConfigParser.ConfigParser()`
			`cfg.read(configfile)`

			`# Indexer configuration - index dir and schema setup`
			`indexpath = cfg.get("Indexer", "path")`
			`indexertype = cfg.get("Indexer", "type")`

			`argParser = argparse.ArgumentParser(description='Fulltext search for AIL')`
			`argParser.add_argument('-q', action='append', help='query to lookup (one or more)')`
-l added -> dumping all terms indexed 2014-08-11 12:56:15 +00:00			`argParser.add_argument('-n', action='store_true', default=False, help='return numbers of indexed documents')`
Indexer: Some index statistics added usage: indexer_lookup.py [-h] [-q Q] [-n] [-t] [-l] Fulltext search for AIL optional arguments: -h, --help show this help message and exit -q Q query to lookup (one or more) -n return number of indexed documents -t dump top 500 terms -l dump all terms encountered in indexed documents 2014-08-11 13:07:12 +00:00			`argParser.add_argument('-t', action='store_true', default=False, help='dump top 500 terms')`
-l added -> dumping all terms indexed 2014-08-11 12:56:15 +00:00			`argParser.add_argument('-l', action='store_true', default=False, help='dump all terms encountered in indexed documents')`
Indexer module: script to query the index Test script to query the index generated from the Indexer module. python indexer_lookup.py -q Visa -q Mastercard 2014-08-11 10:03:27 +00:00			`args = argParser.parse_args()`

			`from whoosh import index`
			`from whoosh.fields import *`
			`schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)`

			`ix = index.open_dir(indexpath)`

			`from whoosh.qparser import QueryParser`
Return the number of indexed documents 2014-08-11 12:50:35 +00:00
			`if args.n:`
			`print ix.doc_count_all()`
			`exit(0)`

-l added -> dumping all terms indexed 2014-08-11 12:56:15 +00:00			`if args.l:`
			`xr = ix.searcher().reader()`
			`for x in xr.lexicon("content"):`
			`print (x)`
			`exit(0)`

Indexer: Some index statistics added usage: indexer_lookup.py [-h] [-q Q] [-n] [-t] [-l] Fulltext search for AIL optional arguments: -h, --help show this help message and exit -q Q query to lookup (one or more) -n return number of indexed documents -t dump top 500 terms -l dump all terms encountered in indexed documents 2014-08-11 13:07:12 +00:00			`if args.t:`
			`xr = ix.searcher().reader()`
			`for x in xr.most_frequent_terms("content", number=500, prefix=''):`
			`print (x)`
			`exit(0)`

Indexer module: script to query the index Test script to query the index generated from the Indexer module. python indexer_lookup.py -q Visa -q Mastercard 2014-08-11 10:03:27 +00:00			`if args.q is None:`
			`argParser.print_help()`
			`exit(1)`

			`with ix.searcher() as searcher:`
			`query = QueryParser("content", ix.schema).parse(" ".join(args.q))`
			`results = searcher.search(query, limit=None)`
			`for x in results:`
Indexer: Some index statistics added usage: indexer_lookup.py [-h] [-q Q] [-n] [-t] [-l] Fulltext search for AIL optional arguments: -h, --help show this help message and exit -q Q query to lookup (one or more) -n return number of indexed documents -t dump top 500 terms -l dump all terms encountered in indexed documents 2014-08-11 13:07:12 +00:00			`print (x)`
Indexer module: script to query the index Test script to query the index generated from the Indexer module. python indexer_lookup.py -q Visa -q Mastercard 2014-08-11 10:03:27 +00:00