Lucene4.7.2 搜尋與高亮顯示
阿新 • • 發佈:2019-01-29
pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>cn.et</groupId> <artifactId>LuceneScoreSearch</artifactId> <version>0.0.1-SNAPSHOT</version> <parent> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-parent</artifactId> <version>1.5.9.RELEASE</version> </parent> <dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <dependency> <groupId>com.janeluo</groupId> <artifactId>ikanalyzer</artifactId> <version>2012_u6</version> </dependency> <dependency> <groupId> org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>4.7.2</version> </dependency> <!-- JUnit Java語言的單元測試框架 --> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> <scope>test</scope> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <configuration> <source>1.7</source> <target>1.7</target> <encoding>UTF-8</encoding> </configuration> </plugin> </plugins> </build> </project>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>cn.et</groupId> <artifactId>LuceneScoreSearch</artifactId> <version>0.0.1-SNAPSHOT</version> <parent> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-parent</artifactId> <version>1.5.9.RELEASE</version> </parent> <dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <dependency> <groupId>com.janeluo</groupId> <artifactId>ikanalyzer</artifactId> <version>2012_u6</version> </dependency> <dependency> <groupId> org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>4.7.2</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.10</version> <scope>test</scope> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <configuration> <source>1.7</source> <target>1.7</target> <encoding>UTF-8</encoding> </configuration> </plugin> </plugins> </build> </project>
package cn.et; import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.search.highlight.TextFragment; import org.apache.lucene.search.highlight.TokenSources; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.junit.Test; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import org.wltea.analyzer.lucene.IKAnalyzer; @RestController public class LueneTesting { //建立IKAnalyzer分詞器 static Analyzer analyzer = new IKAnalyzer(); //建立索引,寫入檔案 public static void write() throws Exception { //索引存放目錄 Directory directory = FSDirectory.open(new File("H:/Lucene/index")); //Lucene分詞器配置 IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer); IndexWriter iwriter = new IndexWriter(directory, config); //建立文件物件,相當於資料庫中的每條記錄(MongoDB、Oracle、MySQL...),注意:物件簡述純屬虛構,不帶任何攻擊惡意 Document doc0 = new Document(); Field doc0field1 = new Field("AGE","20",TextField.TYPE_STORED); Field doc0field2 = new Field("NAME","路橙",TextField.TYPE_STORED); Field doc0field3 = new Field("BRIEF","來自中國湖南永州,是一名初級Java開發工程師,中國網際網路技術部落格:http://blog.csdn.net/phone13144830339",TextField.TYPE_STORED); doc0.add(doc0field1); doc0.add(doc0field2); doc0.add(doc0field3); Document doc1 = new Document(); Field doc1field1 = new Field("AGE","21",TextField.TYPE_STORED); Field doc1field2 = new Field("NAME","謝飛",TextField.TYPE_STORED); Field doc1field3 = new Field("BRIEF","來自中國湖北武漢,是一名語文老師,中國教育網成員,2010年評選為中國10大優秀教師",TextField.TYPE_STORED); doc1.add(doc1field1); doc1.add(doc1field2); doc1.add(doc1field3); Document doc2 = new Document(); Field doc2field1 = new Field("AGE","22",TextField.TYPE_STORED); Field doc2field2 = new Field("NAME","鄧娟",TextField.TYPE_STORED); Field doc2field3 = new Field("BRIEF","來自中國四川綿陽,是一名幼兒園老師",TextField.TYPE_STORED); doc2.add(doc2field1); doc2.add(doc2field2); doc2.add(doc2field3); Document doc3 = new Document(); Field doc3field1 = new Field("AGE","23",TextField.TYPE_STORED); Field doc3field2 = new Field("NAME","曹焰斌",TextField.TYPE_STORED); Field doc3field3 = new Field("BRIEF","來自中國廣東廣州,是一名建築工人",TextField.TYPE_STORED); doc3.add(doc3field1); doc3.add(doc3field2); doc3.add(doc3field3); Document doc4 = new Document(); Field doc4field1 = new Field("AGE","24",TextField.TYPE_STORED); Field doc4field2 = new Field("NAME","SMISI",TextField.TYPE_STORED); Field doc4field3 = new Field("BRIEF","來自美國底特律,是一名外資企業經理",TextField.TYPE_STORED); doc4.add(doc4field1); doc4.add(doc4field2); doc4.add(doc4field3); iwriter.addDocument(doc0); iwriter.addDocument(doc1); iwriter.addDocument(doc2); iwriter.addDocument(doc3); iwriter.addDocument(doc4); iwriter.commit(); iwriter.close(); } //查詢索引,檢視得分情況 @RequestMapping("/simpleSearchScore") public static String simpleSearch() throws Exception { String content = "中國"; Directory directory = FSDirectory.open(new File("H:/Lucene/index")); //指定索引查詢目錄 DirectoryReader ireader = DirectoryReader.open(directory); IndexSearcher isearcher = new IndexSearcher(ireader); //指定查詢的field名和使用的分詞解析器 QueryParser parser = new QueryParser(Version.LUCENE_47,"BRIEF",analyzer); Query query = parser.parse(content); //搜尋得分排序的陣列,文字中包含收搜內容的數量 String resultStr = ""; TopDocs docs = isearcher.search(query, 10); for (ScoreDoc doc : docs.scoreDocs) { String str = "文件ID: " + doc.doc + "<br/>BRIEF:"+isearcher.doc(doc.doc).get("BRIEF") + "<br/>NAME:"+isearcher.doc(doc.doc).get("NAME") +"<br/>AGE:"+isearcher.doc(doc.doc).get("AGE") + "<br/>得分情況: " + doc.score + "<hr border='5px' color='red'/>"; resultStr += str; } return resultStr; } //查詢索引,收搜文件物件,頁面高亮顯示 @RequestMapping("/highlighterSearch") public List<Map<String,String>> highlighterTesting() throws Exception{ String content = "中國"; Directory directory = FSDirectory.open(new File("H:/Lucene/index")); //指定索引查詢目錄 DirectoryReader ireader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(ireader); QueryParser parser = new QueryParser(Version.LUCENE_47,"BRIEF",analyzer); Query query = parser.parse(content); TopDocs hits = searcher.search(query, 10); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<font color=red>","</font>"); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); //設定高亮處理的字元個數 highlighter.setMaxDocCharsToAnalyze(20); List<Map<String,String>> list = new ArrayList<Map<String,String>>(); int item = hits.scoreDocs.length; System.out.println(item); for (int i = 0; i < item; i++) { System.out.println(i); int id = hits.scoreDocs[i].doc; Document doc = searcher.doc(id); Map<String,String> map = new HashMap<String,String>(); map.put("name",doc.get("NAME")); String text = doc.get("BRIEF"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "BRIEF", analyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10); System.out.println(frag.length); for (int j = 0; j < frag.length; j++) { System.out.println(frag[j]); if((frag[j] != null) && (frag[j].getScore() > 0)) { String str = frag[j].toString(); System.out.println(str); map.put("brief",str); } } map.put("age",doc.get("AGE")); list.add(map); } return list; } }
package cn.et;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class SpringBootMain {
public static void main(String[] args) {
SpringApplication.run(SpringBootMain.class, args);
}
}