lucene索引並搜尋mysql資料庫
首先介紹一個jdbc工具類,用於得到Connection物件:
[java] view plain copy print ?
- import java.sql.Connection;
- import java.sql.DriverManager;
- import java.sql.SQLException;
- /**
- * JdbcUtil.java
- * @version 1.0
- * @createTime JDBC獲取Connection工具類
- */
- public class
- private static Connection conn = null;
- private static final String URL = "jdbc:mysql://127.0.0.1/project?autoReconnect=true&characterEncoding=utf8"
- private static final String JDBC_DRIVER = "com.mysql.jdbc.Driver";
- private static final String USER_NAME = "root";
- private static final String PASSWORD = "";
- public static Connection getConnection() {
- try {
- Class.forName(JDBC_DRIVER);
- conn = DriverManager.getConnection(URL, USER_NAME, PASSWORD);
- } catch (ClassNotFoundException e) {
- e.printStackTrace();
- } catch (SQLException e) {
- e.printStackTrace();
- }
- return conn;
- }
- }
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
/**
* JdbcUtil.java
* @version 1.0
* @createTime JDBC獲取Connection工具類
*/
public class JdbcUtil {
private static Connection conn = null;
private static final String URL = "jdbc:mysql://127.0.0.1/project?autoReconnect=true&characterEncoding=utf8";
private static final String JDBC_DRIVER = "com.mysql.jdbc.Driver";
private static final String USER_NAME = "root";
private static final String PASSWORD = "";
public static Connection getConnection() {
try {
Class.forName(JDBC_DRIVER);
conn = DriverManager.getConnection(URL, USER_NAME, PASSWORD);
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
return conn;
}
}
然後就是本文的主要內容了,對資料庫資訊進行索引與對索引進行搜尋:
[java] view plain copy print ?
- import java.io.File;
- import java.sql.Connection;
- import java.sql.ResultSet;
- import java.sql.Statement;
- import java.util.ArrayList;
- import java.util.List;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.Field.TermVector;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.*;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.util.Version;
- import org.wltea.analyzer.lucene.IKAnalyzer;
- import org.wltea.analyzer.lucene.IKSimilarity;
- /**
- * SearchLogic.java
- * @version 1.0
- * @createTime Lucene資料庫檢索
- */
- public class SearchLogic {
- private static Connection conn = null;
- private static Statement stmt = null;
- private static ResultSet rs = null;
- private String searchDir = "E:\\Test\\Index";
- private static File indexFile = null;
- private static Searcher searcher = null;
- private static Analyzer analyzer = null;
- /** 索引頁面緩衝 */
- private int maxBufferedDocs = 500;
- /**
- * 獲取資料庫資料
- * @return ResultSet
- * @throws Exception
- */
- public List<SearchBean> getResult(String queryStr) throws Exception {
- List<SearchBean> result = null;
- conn = JdbcUtil.getConnection();
- if(conn == null) {
- throw new Exception("資料庫連線失敗!");
- }
- String sql = "select id, username, password, type from account";
- try {
- stmt = conn.createStatement();
- rs = stmt.executeQuery(sql);
- this.createIndex(rs); //給資料庫建立索引,此處執行一次,不要每次執行都建立索引,以後資料有更新可以後臺呼叫更新索引
- TopDocs topDocs = this.search(queryStr);
- ScoreDoc[] scoreDocs = topDocs.scoreDocs;
- result = this.addHits2List(scoreDocs);
- } catch(Exception e) {
- e.printStackTrace();
- throw new Exception("資料庫查詢sql出錯! sql : " + sql);
- } finally {
- if(rs != null) rs.close();
- if(stmt != null) stmt.close();
- if(conn != null) conn.close();
- }
- return result;
- }
- /**
- * 為資料庫檢索資料建立索引
- * @param rs
- * @throws Exception
- */
- private void createIndex(ResultSet rs) throws Exception {
- Directory directory = null;
- IndexWriter indexWriter = null;
- try {
- indexFile = new File(searchDir);
- if(!indexFile.exists()) {
- indexFile.mkdir();
- }
- directory = FSDirectory.open(indexFile);
- analyzer = new IKAnalyzer();
- indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
- indexWriter.setMaxBufferedDocs(maxBufferedDocs);
- Document doc = null;
- while(rs.next()) {
- doc = new Document();
- Field id = new Field("id", String.valueOf(rs.getInt("id")), Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.NO);
- Field username = new Field("username", rs.getString("username") == null ? "" : rs.getString("username"), Field.Store.YES,Field.Index.ANALYZED, TermVector.NO);
- doc.add(id);
- doc.add(username);
- indexWriter.addDocument(doc);
- }
- indexWriter.optimize();
- indexWriter.close();
- } catch(Exception e) {
- e.printStackTrace();
- }
- }
- /**
- * 搜尋索引
- * @param queryStr
- * @return
- * @throws Exception
- */
- private TopDocs search(String queryStr) throws Exception {
- if(searcher == null) {
- indexFile = new File(searchDir);
- searcher = new IndexSearcher(FSDirectory.open(indexFile));
- }
- searcher.setSimilarity(new IKSimilarity());
- QueryParser parser = new QueryParser(Version.LUCENE_30,"username",new IKAnalyzer());
- Query query = parser.parse(queryStr);
- TopDocs topDocs = searcher.search(query, searcher.maxDoc());
- return topDocs;
- }
- /**
- * 返回結果並新增到List中
- * @param scoreDocs
- * @return
- * @throws Exception
- */
- private List<SearchBean> addHits2List(ScoreDoc[] scoreDocs ) throws Exception {
- List<SearchBean> listBean = new ArrayList<SearchBean>();
- SearchBean bean = null;
- for(int i=0 ; i<scoreDocs.length; i++) {
- int docId = scoreDocs[i].doc;
- Document doc = searcher.doc(docId);
- bean = new SearchBean();
- bean.setId(doc.get("id"));
- bean.setUsername(doc.get("username"));
- listBean.add(bean);
- }
- return listBean;
- }
- public static void main(String[] args) {
- SearchLogic logic = new SearchLogic();
- try {
- Long startTime = System.currentTimeMillis();
- List<SearchBean> result = logic.getResult("商家");
- int i = 0;
- for(SearchBean bean : result) {
- if(i == 10)
- break;
- System.out.println("bean.name " + bean.getClass().getName() + " : bean.id " + bean.getId()+ " : bean.username " + bean.getUsername());
- i++;
- }
- System.out.println("searchBean.result.size : " + result.size());
- Long endTime = System.currentTimeMillis();
- System.out.println("查詢所花費的時間為:" + (endTime-startTime)/1000);
- } catch (Exception e) {
- e.printStackTrace();
- System.out.println(e.getMessage());
- }
- }
- }
import java.io.File;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKSimilarity;
/**
* SearchLogic.java
* @version 1.0
* @createTime Lucene資料庫檢索
*/
public class SearchLogic {
private static Connection conn = null;
private static Statement stmt = null;
private static ResultSet rs = null;
private String searchDir = "E:\\Test\\Index";
private static File indexFile = null;
private static Searcher searcher = null;
private static Analyzer analyzer = null;
/** 索引頁面緩衝 */
private int maxBufferedDocs = 500;
/**
* 獲取資料庫資料
* @return ResultSet
* @throws Exception
*/
public List<SearchBean> getResult(String queryStr) throws Exception {
List<SearchBean> result = null;
conn = JdbcUtil.getConnection();
if(conn == null) {
throw new Exception("資料庫連線失敗!");
}
String sql = "select id, username, password, type from account";
try {
stmt = conn.createStatement();
rs = stmt.executeQuery(sql);
this.createIndex(rs); //給資料庫建立索引,此處執行一次,不要每次執行都建立索引,以後資料有更新可以後臺呼叫更新索引
TopDocs topDocs = this.search(queryStr);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
result = this.addHits2List(scoreDocs);
} catch(Exception e) {
e.printStackTrace();
throw new Exception("資料庫查詢sql出錯! sql : " + sql);
} finally {
if(rs != null) rs.close();
if(stmt != null) stmt.close();
if(conn != null) conn.close();
}
return result;
}
/**
* 為資料庫檢索資料建立索引
* @param rs
* @throws Exception
*/
private void createIndex(ResultSet rs) throws Exception {
Directory directory = null;
IndexWriter indexWriter = null;
try {
indexFile = new File(searchDir);
if(!indexFile.exists()) {
indexFile.mkdir();
}
directory = FSDirectory.open(indexFile);
analyzer = new IKAnalyzer();
indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
indexWriter.setMaxBufferedDocs(maxBufferedDocs);
Document doc = null;
while(rs.next()) {
doc = new Document();
Field id = new Field("id", String.valueOf(rs.getInt("id")), Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.NO);
Field username = new Field("username", rs.getString("username") == null ? "" : rs.getString("username"), Field.Store.YES,Field.Index.ANALYZED, TermVector.NO);
doc.add(id);
doc.add(username);
indexWriter.addDocument(doc);
}
indexWriter.optimize();
indexWriter.close();
} catch(Exception e) {
e.printStackTrace();
}
}
/**
* 搜尋索引
* @param queryStr
* @return
* @throws Exception
*/
private TopDocs search(String queryStr) throws Exception {
if(searcher == null) {
indexFile = new File(searchDir);
searcher = new IndexSearcher(FSDirectory.open(indexFile));
}
searcher.setSimilarity(new IKSimilarity());
QueryParser parser = new QueryParser(Version.LUCENE_30,"username",new IKAnalyzer());
Query query = parser.parse(queryStr);
TopDocs topDocs = searcher.search(query, searcher.maxDoc());
return topDocs;
}
/**
* 返回結果並新增到List中
* @param scoreDocs
* @return
* @throws Exception
*/
private List<SearchBean> addHits2List(ScoreDoc[] scoreDocs ) throws Exception {
List<SearchBean> listBean = new ArrayList<SearchBean>();
SearchBean bean = null;
for(int i=0 ; i<scoreDocs.length; i++) {
int docId = scoreDocs[i].doc;
Document doc = searcher.doc(docId);
bean = new SearchBean();
bean.setId(doc.get("id"));
bean.setUsername(doc.get("username"));
listBean.add(bean);
}
return listBean;
}
public static void main(String[] args) {
SearchLogic logic = new SearchLogic();
try {
Long startTime = System.currentTimeMillis();
List<SearchBean> result = logic.getResult("商家");
int i = 0;
for(SearchBean bean : result) {
if(i == 10)
break;
System.out.println("bean.name " + bean.getClass().getName() + " : bean.id " + bean.getId()+ " : bean.username " + bean.getUsername());
i++;
}
System.out.println("searchBean.result.size : " + result.size());
Long endTime = System.currentTimeMillis();
System.out.println("查詢所花費的時間為:" + (endTime-startTime)/1000);
} catch (Exception e) {
e.printStackTrace();
System.out.println(e.getMessage());
}
}
}
對了上面的類還用到了一個javabean類,如下:
[java] view plain copy print ?
- public class SearchBean {
- private String id;
- private String username;
- public String getId() {
- return id;
- }
- public void setId(String id) {
- this.id = id;
- }
- public String getUsername() {
- return username;
- }
- public void setUsername(String username) {
- this.username = username;
- }
- }
public class SearchBean {
private String id;
private String username;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
}
這些程式碼大部分都是我在網上找到的doc文件中複製貼上而來,本著“拿來主義”,我對這些程式碼修改不大,經測試,這些程式碼能夠正常執行。
寫了幾篇部落格,對lucene的使用方式也越來越清楚,在這裡也很有必要總結一下:
使用lucene包括兩個步驟,分別是索引和搜尋。
•索引過程如下:
◦ 建立一個IndexWriter用來寫索引檔案,它有幾個引數,INDEX_DIR就是索引檔案所存放的位置,Analyzer便是用來對文件進行詞法分析和語言處理的。
◦ 建立一個Document代表我們要索引的文件。
◦ 將不同的Field加入到文件中。我們知道,一篇文件有多種資訊,如題目,作者,修改時間,內容等。不同型別的資訊用不同的Field來表示。
◦ IndexWriter呼叫函式addDocument將索引寫到索引資料夾中。
•搜尋過程如下:
◦ IndexReader將磁碟上的索引資訊讀入到記憶體,INDEX_DIR就是索引檔案存放的位置。
◦ 建立IndexSearcher準備進行搜尋。
◦ 建立Analyer用來對查詢語句進行詞法分析和語言處理。
◦ 建立QueryParser用來對查詢語句進行語法分析。
◦ QueryParser呼叫parser進行語法分析,形成查詢語法樹,放到Query中。
◦ IndexSearcher呼叫search對查詢語法樹Query進行搜尋,得到結果TopScoreDocCollector。
對了,必須說一下,上面的例子還用到了一個新的jar包IKAnalyzer.jar包,它是一個開源的中文分詞器,如果不使用這個分詞器,那麼將無法解析中文,比如說我的第一篇關於Lucene的部落格就無法解析中文字串!