Lucene高亮顯示及中文分詞
阿新 • • 發佈:2019-01-22
直接demo,不廢話
建立一個Maven
在pom.xml貼上依賴
生成索引:<dependencies> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-smartcn</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency>
package com.java1234.lucene; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.IntField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class Indexer { private Integer ids[]={1,2,3}; private String citys[]={"青島","南京","上海"}; private String descs[]={ "青島是一個美麗的城市。", "南京是一個有文化的城市。南京是一個文化的城市南京,簡稱寧,是江蘇省會,地處中國東部地區,長江下游,瀕江近海。全市下轄11個區,總面積6597平方公里,2013年建成區面積752.83平方公里,常住人口818.78萬,其中城鎮人口659.1萬人。[1-4] “江南佳麗地,金陵帝王州”,南京擁有著6000多年文明史、近2600年建城史和近500年的建都史,是中國四大古都之一,有“六朝古都”、“十朝都會”之稱,是中華文明的重要發祥地,歷史上曾數次庇佑華夏之正朔,長期是中國南方的政治、經濟、文化中心,擁有厚重的文化底蘊和豐富的歷史遺存。[5-7] 南京是國家重要的科教中心,自古以來就是一座崇文重教的城市,有“天下文樞”、“東南第一學”的美譽。截至2013年,南京有高等院校75所,其中211高校8所,僅次於北京上海;國家重點實驗室25所、國家重點學科169個、兩院院士83人,均居中國第三。[8-10] 。", "上海是一個繁華的城市。" }; private Directory dir; /** * 獲取IndexWriter例項 * @return * @throws Exception */ private IndexWriter getWriter()throws Exception{ //Analyzer analyzer=new StandardAnalyzer(); // 標準分詞器 SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer(); IndexWriterConfig iwc=new IndexWriterConfig(analyzer); IndexWriter writer=new IndexWriter(dir, iwc); return writer; } /** * 生成索引 * @param indexDir * @throws Exception */ private void index(String indexDir)throws Exception{ dir=FSDirectory.open(Paths.get(indexDir)); IndexWriter writer=getWriter(); for(int i=0;i<ids.length;i++){ Document doc=new Document(); doc.add(new IntField("id", ids[i], Field.Store.YES)); doc.add(new StringField("city",citys[i],Field.Store.YES)); doc.add(new TextField("desc", descs[i], Field.Store.YES)); writer.addDocument(doc); // 新增文件 } writer.close(); } public static void main(String[] args) throws Exception { new Indexer().index("D:\\lucene6"); } }
開始查詢
package com.java1234.lucene; import java.io.StringReader; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class Searcher { public static void search(String indexDir,String q)throws Exception{ Directory dir=FSDirectory.open(Paths.get(indexDir)); IndexReader reader=DirectoryReader.open(dir); IndexSearcher is=new IndexSearcher(reader); // Analyzer analyzer=new StandardAnalyzer(); // 標準分詞器 SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer(); QueryParser parser=new QueryParser("desc", analyzer); Query query=parser.parse(q); long start=System.currentTimeMillis(); TopDocs hits=is.search(query, 10); long end=System.currentTimeMillis(); System.out.println("匹配 "+q+" ,總共花費"+(end-start)+"毫秒"+"查詢到"+hits.totalHits+"個記錄"); QueryScorer scorer=new QueryScorer(query); Fragmenter fragmenter=new SimpleSpanFragmenter(scorer); SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>"); Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer); highlighter.setTextFragmenter(fragmenter); for(ScoreDoc scoreDoc:hits.scoreDocs){ Document doc=is.doc(scoreDoc.doc); System.out.println(doc.get("city")); System.out.println(doc.get("desc")); String desc=doc.get("desc"); if(desc!=null){ TokenStream tokenStream=analyzer.tokenStream("desc", new StringReader(desc)); System.out.println(highlighter.getBestFragment(tokenStream, desc)); } } reader.close(); } public static void main(String[] args) { String indexDir="D:\\lucene6"; String q="南京文明"; try { search(indexDir,q); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
結果顯示:
放在網頁上效果展示更佳,ok基本完事