Lucene建立索引入門案例

阿新 • • 發佈：2019-01-25

最近在學習lucene，參考網上的資料寫了一個簡單搜尋demo；

專案jar包：

//索引關鍵類


<pre name="code" class="java">package com.lucene.index;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.lucene.vo.User;

/**
 *  * lucene 檢索記憶體索引 非常簡單的例子  *  * @author Administrator  *  
 */
public class searchIndex {
	private String[] ids = { "1", "2", "3", "4", "5", "6" };
	private String[] emails = { " 
[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]" };
//	private String[] contents = { "welcome to visited the space,I like book", "hello boy, I like pingpeng ball", "my name is cc I like game", "I like football",
//			"I like football and I like basketball too", "I like movie and swim" };
	private String[] contents = { "建立一個記憶體目錄物件，所以這裡生成的索引會放在磁碟中，而不是在記憶體中", "建立索引寫入物件，該物件既可以把索引寫入到磁碟中也可以寫入到記憶體中", "分詞器，分詞器就是將檢索的關鍵字分割成一組組片語， 它是lucene檢索查詢的一大特色之一", "這個是分詞器拆分最大長度，因為各種不同型別的分詞器拆分的字元顆粒細化程度不一樣，所以需要設定一個最長的拆分長度",
			"文件物件，在lucene中建立的索引可以看成資料庫中的一張表，表中也可以有欄位,往裡面新增內容之後可以根據欄位去匹配查詢", "I like movie and swim" };
	private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike", "jake" };
	// 建立一個記憶體目錄物件，所以這裡生成的索引會放在磁碟中，而不是在記憶體中。
	private Directory directory = null;
	//IK分詞器
	IKAnalyzer analyzer = null;
	public searchIndex() {
		try {
			directory = FSDirectory.open(new File("H:/lucene/index"));
			analyzer = new IKAnalyzer(true);
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	
	public void index() {
		/*
		 * 建立索引寫入物件，該物件既可以把索引寫入到磁碟中也可以寫入到記憶體中。
		 */
		IndexWriter writer;
		try {
			writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, analyzer));
			//建立之前先刪除
			writer.deleteAll();
			// 建立Document
			// 文件物件，在lucene中建立的索引可以看成資料庫中的一張表，表中也可以有欄位,往裡面新增內容之後可以根據欄位去匹配查詢
	
			Document doc =null;
			
			for(int i=0;i<ids.length;i++){
				doc = new Document();
				doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
				doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
				doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
				doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
				writer.addDocument(doc);
			}
			writer.close(); 
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	public List<User> search(String keyword) {
		long startTime = System.currentTimeMillis();
		System.out.println("*****************檢索開始**********************");
		List<User> userList = new ArrayList<User>();
		IndexReader reader;
		try {
			reader = IndexReader.open(directory);
			
			// 建立IndexSearcher 檢索索引的物件，裡面要傳遞上面寫入的記憶體目錄物件directory
			IndexSearcher searcher = new IndexSearcher(reader);
			// 根據搜尋關鍵字 封裝一個term組合物件，然後封裝成Query查詢物件
		
			QueryParser queryParser = new QueryParser(Version.LUCENE_36, "content", analyzer);
			Query query = queryParser.parse(keyword);
			

			// 去索引目錄中查詢，返回的是TopDocs物件，裡面存放的就是上面放的document文件物件
			TopDocs rs = searcher.search(query, null, 10);
			long endTime = System.currentTimeMillis();
			System.out.println("總共花費" + (endTime - startTime) + "毫秒，檢索到" + rs.totalHits + "條記錄。");
			User user = null;
			for (int i = 0; i < rs.scoreDocs.length; i++) {
				// rs.scoreDocs[i].doc 是獲取索引中的標誌位id, 從0開始記錄
				Document firstHit = searcher.doc(rs.scoreDocs[i].doc);
				user = new User();
				user.setId(Long.parseLong(firstHit.get("id")));
				user.setName(firstHit.get("name"));
				user.setSex(firstHit.get("sex"));
				user.setDosomething(firstHit.get("dosometing"));
				user.setEmail(firstHit.get("email"));
				user.setContent(firstHit.get("content"));
				userList.add(user);

//				System.out.println("name:" + firstHit.get("name"));
//				System.out.println("sex:" + firstHit.get("sex"));
//				System.out.println("dosomething:" + firstHit.get("dosometing"));
			}
			reader.close();
		} catch (CorruptIndexException e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
		} catch (IOException e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
		} catch (ParseException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		System.out.println("*****************檢索結束**********************");
		return userList;
	}
	
}

package com.lucene;

import java.io.IOException;
import java.util.List;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import com.lucene.index.searchIndex;
import com.lucene.vo.User;

/**
 * Servlet implementation class searchServlet
 */
public class searchServlet extends HttpServlet {
	private static final long serialVersionUID = 1L;

    /**
     * Default constructor. 
     */
    public searchServlet() {
        // TODO Auto-generated constructor stub
    }

	/**
	 * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
	 */
	protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
		// TODO Auto-generated method stub
	}

	/**
	 * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
	 */
	protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
		request.setCharacterEncoding("UTF-8");
		String keyword = request.getParameter("keyword");
		if("".equals(keyword)){
			keyword="0";
		}
		searchIndex si = new searchIndex();
		si.index();
		List<User> userList = si.search(keyword);
		request.setAttribute("userList", userList);
		request.getRequestDispatcher("search.jsp").forward(request, response);
	}

}

package com.lucene.vo;

public class User {
	private Long id;
	private String name;
	private String sex;
	private String dosomething;
	private String email;
	private String content;
	
	public Long getId() {
		return id;
	}
	public void setId(Long id) {
		this.id = id;
	}
	public String getName() {
		return name;
	}
	public void setName(String name) {
		this.name = name;
	}
	public String getSex() {
		return sex;
	}
	public void setSex(String sex) {
		this.sex = sex;
	}
	public String getDosomething() {
		return dosomething;
	}
	public void setDosomething(String dosomething) {
		this.dosomething = dosomething;
	}
	public String getEmail() {
		return email;
	}
	public void setEmail(String email) {
		this.email = email;
	}
	public String getContent() {
		return content;
	}
	public void setContent(String content) {
		this.content = content;
	}
	
}

<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>
<%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
<title>lucene 全文檢索</title>
</head>
<body style="text-align: center;">
	<form action="searchServlet.do" method="post">
		<input type="text" name="keyword" /> <input type="submit" value="搜尋" />
	</form>
	<div style="height: 10px">
	</div>
	<c:if test="${not empty userList}">
		<div>相關資訊：</div>
		<table border="1" align="center">
			<tr>
				<td>ID</td>
				<td>姓名</td>
				<td>性別</td>
				<td>郵箱</td>
				<td>愛好</td>
				<td>正文</td>
			</tr>
			<c:forEach items="${ userList}" var="user">
				<tr>
					<td>${user.id }</td>
					<td>${user.name }</td>
					<td>${user.sex }</td>
					<td>${user.email }</td>
					<td>${user.dosomething }</td>
					<td>${user.content }</td>
				</tr>
			</c:forEach>
		</table>
	</c:if>
</body>
</html>

程式碼測試：

Lucene建立索引入門案例

最近在學習lucene，參考網上的資料寫了一個簡單搜尋demo；專案jar包： //索引關鍵類 <pre name="code" class="java">package com.lucene.index; import java.io.File; i

ssm學習——Lucene建立索引

一：理論知識　　1.非結構化資料查詢方法　　　　1）順序掃描法　　　　　　太慢，效率不高。　　　　2）全文檢索法　　　　　　對需要查詢的文件建立索引，再對其進行搜尋。其實說白了就是為了使其結構化。　　2.索引建立和搜尋流程圖　　　　1）流程圖　　　　　　　　　　2）索引庫

Lucene建立索引的概念

查詢（Query）：對於全文搜尋，最終都是使用詞項指向一批document文件物件集合，利用對詞

Lucene建立索引流程

原始文件：網際網路上的網頁（爬蟲或蜘蛛）、資料庫中的資料、磁碟上的檔案建立文件物件（非結構化資料）文件物件中的屬性不叫屬性現在成為域。每個 Document 可以有多個 Field ，不同的 Document 可以有不同的 Field，同一個 Document 可以有相

html抽取文字資訊-java版（適合lucene建立索引）

import org.htmlparser.NodeFilter; import org.htmlparser.Parser; import org.htmlparser.beans.StringBean; import org.htmlparser.filters.Css

Lucene建立索引和索引的基本檢索

Author：百知教育 gaozhy 注：演示程式碼所使用jar包版本為 lucene-xxx-5.2.0.jar lucene索引操作建立索引程式碼 try {

Lucene 6.2.1入門教程（一）建立索引和基本搜尋索引

簡單說兩句，Lucene現在高版本的教程很少，網上基本是4.0以下的，目前最新版是6.2.1，所以我試著來寫寫這個版本的教程。至於那些概念什麼的，我就不多說了，大家可以參考以前的舊教程來了解Lucene的體系結構和基本原理。大致說一下Lucene就是通過建立索引這

基於lucene的案例開發：建立索引

從這篇部落格開始，不論是API介紹還是後面的案例開發，都是基於 lucene4.3.1 這個版本，Lucene4.3.1 下載請點選這裡， Lucene其他版本下載請點選這裡，Lucene4.3.1官方API文件請點選這裡。建立索引demo 在

Lucene整理--索引的建立

get 代理類從右到左創建 term 是我 bmc download family 看lucene主頁（http://lucene.apache.org/）上眼下lucene已經到4.9.0版本號了, 參考學習的書是依照2.1版本號解說的,寫的代碼樣例是用的3.

Lucene介紹及簡單入門案例（集成ik分詞器）

chinese depend 創建索引圖片 latest frame numeric id字段 div 介紹　　　　Lucene是apache軟件基金會4 jakarta項目組的一個子項目，是一個開放源代碼的全文檢索引擎工具包，但它不是一個完整的全文檢索引擎，而是一個

Lucene筆記33-Lucene的擴充套件-使用Tika建立索引並進行搜尋

一、使用Tika建立索引之前建立索引的文件都是txt檔案，現在有了Tika，我們就可以將pdf，word，html等檔案，通過Tika提取出文字，之後建立索引，建立索引的寫法和之前大致相似。只需要將content域對應的值做一下處理，之前是FileReader來讀取，現在是使用Tika.p

mongodb建立索引及索引優化效率案例

查詢監控結果監控結果儲存在一個特殊的蓋子集合system.profile裡,這個集合分配了128kb的空間,要確保監控分析資料不會消耗太多的系統性資源；蓋子集合維護了自然的插入順序,可以使用KaTeX parse error: Expected '}', got 'EOF' at end of input

一步一步跟我學習lucene（6）---lucene索引優化之多執行緒建立索引

這兩天工作有點忙，部落格更新不及時，請大家見諒；前面瞭解到lucene在索引建立的時候一個IndexWriter獲取到一個讀寫鎖，這樣勢在lucene建立大資料量的索引的時候，執行效率低下的問題；磁碟空間大小，這個直接影響索引的建立，甚至會造成索引寫入提示完成，但是沒

基於lucene建立實時索引基礎jar包

最近的工作需要自己建立各種內部產品的索引，每次都要重複一樣的工作，不斷的將一個專案下的類檔案拷貝到另一個專案中，然後修改一些配置，這種事情真的很浪費時間，因此自己就總結了這個jar包lucene_4.3.1_fat.jar，這個jar包基於lucene

lucene全文搜尋之三：生成索引欄位，建立索引文件（給索引欄位加權）基於lucene5.5.3

前言：上一章中我們已經實現了索引器的建立，但是我們沒有索引文件，本章將會講解如何生成欄位、建立索引文件，給欄位加權以及儲存文件到索引器目錄一、建立索引文件一個索引目錄可以放多個索引文件，每個索引

lucene 7.2版本（之前用的4.0 的。。發現好多都變了。。）—— 建立索引（記憶體中）

package com.shang.lucene.index.create; import com.shang.lucene.index.abstracts.IndexAbstract; import com.shang.lucene.jdbc.DataBase; imp

lucene的開發環境配置，並實現lucene功能一：建立索引；及程式碼示例

1. 配置開發環境 1.1. Lucene下載 Lucene是開發全文檢索功能的工具包，從官方網站下載Lucene4.10.3，並解壓。版本：lucene4.10.3 Jdk要求：1.7以上 IDE：Eclipse 1.2. 使用的jar包 Lucene包： lu

lucene建立pdf檔案內容全文索引

第一次寫部落格，有很多不足，也許詞不達意，也許有其他問題，作為一個新手，第一次使用lucene，貼出程式碼的目的是為了交流。 idea maven專案，pom依賴： <dependency> <groupId>org.apache.pdfb

Lucene簡單實現建立索引以及查詢

package com.rdz.test; import java.io.File; import java.io.FileReader; import java.io.IOException; import org.apache.lucene.analysis.Ana

4.Lucene3.案例介紹，建立索引，查詢等操作驗證

案例： Article.java package cn.toto.lucene.quickstart; publicclassArticle { privateint

Lucene建立索引入門案例

相關推薦