2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2020

04/23/2003: Lucene; Bug; TestPhrasePrefixQuery in 2003.04.21 Build Has Misleading Code?

The TestPhrasePrefixQuery looks like it is searching for "blueberry pi*" and it even seems to work at first glance. However, the test data is not extensive enough to show what is really happening.

The searching technique implemented in TestPhrasePrefixQuery will find not only "blueberry pie" but also "blueberry strudel" if both exist in the documents.

The reason is that the IndexReader.terms(Term termToMatch) method looks for the first term equal or larger than termToMatch and then returns *all* terms from that point in the index to the end.

One potential solution might be something like the following:

String pattern = "pi*";
TermEnum te = ir.terms(new Term("body", pattern));
while (te.term().text().matches(pattern)) {
    termsWithPrefix.add(te.term());
    if (te.next() == false)
        break;
    }
}

Of course, the code above only works with JDK1.4 because of the pattern matching.

Comments?

04/22/2003: Examples for All of Lucene's Query Classes.

The following code has examples how to call each of the various Lucene Query classes.

/*
 * Created on Apr 21, 2003
 *
 */
package com.affy.lucene.examples;

import java.io.IOException;
import java.util.LinkedList;

import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.store.*;

public class RunAllQueries {

	public static void main(String[] args)
	throws IOException, ParseException {
		RAMDirectory indexStore = new RAMDirectory();
		Query query = null;

		String docs[] = {
			"aaa bbb ccc",
			"aaa ddd eee",
			"aaa ddd fff",
			"aaa dee fff",
			"AAA fff ggg",
			"ggg hhh iii",
			"123 123 1z2",	// document containing z for
					// WildcardQuery example.
			"999 123 123",	// document for fuzzy search.
			"9x9 123 123",	// document for fuzzy search.
			"99 123 123",	// document for fuzzy search.
			"xxx yyy zzz"
		};

		IndexWriter writer = new IndexWriter(indexStore,
			new StandardAnalyzer(), true);
		for (int j = 0; j < docs.length; j++) {
			Document d = new Document();
			d.add(Field.Text("body", docs[j]));
			writer.addDocument(d);
		}
		writer.close();

		IndexReader indexReader = IndexReader.open(indexStore);

		System.out.println("Searchable Documents: "
			 + indexReader.numDocs());
		System.out.println("");
		System.out.println("NOTE: This search is"
			+ " case-insensitive.");
		System.out.println("--------------------------------");
		System.out.println("");

		System.out.println("****** BooleanQuery Example *******");
		System.out.println("FIND DOCUMENTS WITH "
			+ "AAA, GGG, or XXX");
		System.out.println("-------------------------------");
		BooleanQuery bq = new  BooleanQuery();
		Term aaa = new Term("body", "aaa");
		Term ggg = new Term("body", "ggg");
		Term xxx = new Term("body", "xxx");
		bq.add(new TermQuery(aaa), false, false);
		bq.add(new TermQuery(ggg), false, false);
		bq.add(new TermQuery(xxx), false, false);
		RunAllQueries.runQueryAndDisplayResults(indexStore, bq);
		System.out.println("");
		System.out.println("FIND DOCUMENTS WITH AAA or Z*");
		System.out.println("------------------------------------");
		bq = new  BooleanQuery();
		bq.add(new TermQuery(new Term("body", "aaa")), false, false);
		bq.add(new PrefixQuery(new Term("body", "z")), false, false);
		RunAllQueries.runQueryAndDisplayResults(indexStore, bq);

		System.out.println("\n****** DocFreq Example ******");
		System.out.println("Frequency of 'DDD' Term: "
			+ indexReader.docFreq(new Term("body", "ddd")));

		System.out.println("\n****** FuzzyQuery Example *******");
		System.out.println("DOCUMENTS SIMILAR TO 999."
			+ " NOTE THAT 9x9 IS FOUND BUT NOT 99");
		System.out.println("------------------"
			+ "-----------------------------------------");
		query = new FuzzyQuery(new Term("body", "999"));
		RunAllQueries.runQueryAndDisplayResults(indexStore, query);
		System.out.println("\n*** PhrasePrefixQuery Example ****");
		// First, print all searchable terms.
		System.out.println("ALL SEARCHABLE TERMS");
		System.out.println("--------------------");
		LinkedList termsWithPrefix = new LinkedList();
		TermEnum te = indexReader.terms();
		do {
		  if (te.term() != null) {
		    if (te.term().text() != null) {
		      System.out.println("!" + te.term().text() + "!");
		    }
		  }
		} while (te.next());
		// Second, show that terms() returns terms
		// that from the specified term to the end
		// of the term list. So if you look for
		// pi* then paa would not be found. However,
		// phh and saa would be found.
		System.out.println("");
		System.out.println("SEARCHABLE TERMS FROM H to Z");
		System.out.println("---------------------------");
		termsWithPrefix = new LinkedList();
		te = indexReader.terms(new Term("body", "h*"));
		do {
			System.out.println(te.term().text());
		} while (te.next());
		// Use pattern-matching to find terms that start
		// with the letter h.
		System.out.println("");
		System.out.println("SEARCHABLE TERMS STARTING WITH D");
		System.out.println("--------------------------------");
		termsWithPrefix = new LinkedList();
		String pattern = "d*";
		te = indexReader.terms(new Term("body", pattern));
		while (te.term().text().matches(pattern)) {
			System.out.println(te.term().text());
			termsWithPrefix.add(te.term());
			if (te.next() == false)
				break;
		}
		// Find documents that match "aaa h*".
		PhrasePrefixQuery ppq = new PhrasePrefixQuery();
		ppq.add(new Term("body", "aaa"));
		ppq.add((Term[]) termsWithPrefix.toArray(new Term[0]));
		System.out.println("");
		RunAllQueries.runQueryAndDisplayResults(indexStore, ppq);

		System.out.println("\n****** PhraseQuery Example ******");
		System.out.println("NOTE: 2 documents are found "
			+ "with 'aaa ddd' in order.");
		System.out.println("--------------"
			+ "------------------------");
		PhraseQuery pq = new  PhraseQuery();
		pq.add(new Term("body", "aaa"));
		pq.add(new Term("body", "ddd"));
		RunAllQueries.runQueryAndDisplayResults(indexStore, pq);
		System.out.println("");
		System.out.println("NOTE: ZERO documents are"
			+ " found with 'xxx ddd' in order.");
		System.out.println("-----------"
			+ "---------------------------");
		pq = new  PhraseQuery();
		pq.add(new Term("body", "xxx"));
		pq.add(new Term("body", "ddd"));
		RunAllQueries.runQueryAndDisplayResults(indexStore, pq);

		System.out.println("\n****** PrefixQuery Example ******");
		System.out.println("DOCUMENTS STARTING WITH D");
		System.out.println("-------------------------");
		query = new PrefixQuery(new Term("body", "d"));
		RunAllQueries.runQueryAndDisplayResults(indexStore, query);

		System.out.println("\n****** RangeQuery Example ******");
		System.out.println("DOCUMENTS FROM"
			+ " AAA to FFF, not inclusive");
		System.out.println("-------------"
			+ "----------------------------------");
		Term t1 = new Term("body", "aaa");
		Term t2 = new Term("body", "fff");
		query = new RangeQuery(t1, t2, false);
		RunAllQueries.runQueryAndDisplayResults(indexStore, query);
		System.out.println("");
		System.out.println("DOCUMENTS FROM AAA"
			+ " to FFF, inclusive");
		System.out.println("---------------------------------");
		query = new RangeQuery(t1, t2, true);
		RunAllQueries.runQueryAndDisplayResults(indexStore, query);

		System.out.println("\n***** TermQuery Example *****");
		System.out.println("DOCUMENTS THAT CONTAIN AAA");
		System.out.println("--------------------------");
		query = new TermQuery(new Term("body", "aaa"));
		RunAllQueries.runQueryAndDisplayResults(indexStore, query);

		System.out.println("\n***** WildcardQuery Example *****");
		System.out.println("DOCUMENTS STARTING WITH D");
		System.out.println("-------------------------------");
		query = new WildcardQuery(new Term("body", "d*"));
		RunAllQueries.runQueryAndDisplayResults(indexStore, query);
		System.out.println("");
		System.out.println("DOCUMENTS ENDING WITH E");
		System.out.println("--------------------------------");
		query = new WildcardQuery(new Term("body", "*e"));
		RunAllQueries.runQueryAndDisplayResults(indexStore, query);
		System.out.println("");
		System.out.println("DOCUMENTS THAT CONTAIN Z");
		System.out.println("--------------------------------");
		query = new WildcardQuery(new Term("body", "*z*"));
		RunAllQueries.runQueryAndDisplayResults(indexStore, query);

		System.out.println("");
		System.out.println("Done.");
	}

	public static void
	runQueryAndDisplayResults(Directory indexStore, Query q)
	throws IOException {
		IndexSearcher searcher = new IndexSearcher(indexStore);
		Hits hits = searcher.search(q);
		int _length = hits.length();
		System.out.println("HITS: " + _length);
		for (int i = 0; i < _length; i++) {
			Document doc = hits.doc(i);
			Field field = doc.getField("body");
			System.out.println("  value: " + field.stringValue());
		}
		searcher.close();
	}
}