lucene 4.7 (2)全文檢索之查詢
阿新 • • 發佈:2019-02-13
package org.apache.lucene.demo; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.io.StringReader; import java.util.Date; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryFilter; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import thtf.ebuilder.website.search.DBIndex; /** Simple command-line based search demo. */ public class SearchFiles { private SearchFiles() {} /** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String field = "INFO_CONTENT"; String word = "舞蹈"; int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(FSDirectory.open(DBIndex._$.getIndexFile())); IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser(Version.LUCENE_47, field, DBIndex._$.analyzer); Query query = parser.parse(word); System.out.println("Searching for: " + query.toString(field)); //排序 Sort sort=new Sort(new SortField[]{new SortField("info_id", SortField.Type.INT, true)}); //過濾 BooleanQuery bqf = new BooleanQuery(); bqf.add(query,BooleanClause.Occur.SHOULD); Date start = new Date(); TopDocs tDocs=searcher.search(query,new QueryFilter(bqf),100,sort); System.out.println("查詢到:"+tDocs.scoreDocs.length); Date end = new Date(); System.out.println("Time: "+(end.getTime()-start.getTime())+"ms"); doPagingSearch(word, searcher, query, hitsPerPage); reader.close(); } /** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits. * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(String word,IndexSearcher searcher, Query query, int hitsPerPage) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); end = Math.min(hits.length, start + hitsPerPage); System.out.println(start+"-"+end); for (int i = start; i < end; i++) { Document doc = searcher.doc(hits[i].doc); SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>"); Highlighter highlighter=new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(400)); String content=doc.get("info_title"); if(content!=null){ TokenStream tokenstream=DBIndex._$.analyzer.tokenStream(word, new StringReader(content)); try { content=highlighter.getBestFragment(tokenstream, content); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } System.out.println(doc.get("info_id")+"\t"+content); } } } }