lucene入門使用
阿新 • • 發佈:2019-01-13
簡介:https://baike.baidu.com/item/Lucene/6753302?fr=aladdin
擴充套件停用詞和新詞:https://blog.csdn.net/u010357298/article/details/80776902
目錄結構:(跟著程式碼練習一遍,效果更好) 摘要,排序和高亮 lucene4以後有一定改變
直接上程式碼 程式碼:
一:建立javabean
/**建立javabean*/ package lucene; import com.alibaba.fastjson.JSON; public class User { private String id; private String userName; private String sal; public User() { } public User(String id, String userName, String sal) { this.id = id; this.userName = userName; this.sal = sal; } public String getId() { return id; } public void setId(String id) { this.id = id; } public String getUserName() { return userName; } public void setUserName(String userName) { this.userName = userName; } public String getSal() { return sal; } public void setSal(String sal) { this.sal = sal; } // @Override // public String toString() { // return "User[id='" + id + "',userName='" + userName + "',sal='" + sal // + "']"; // } /** 序列化 */ @Override public String toString() { // TODO Auto-generated method stub return JSON.toJSONString(this); } }
二:建立索引庫並插入資料
/**建立索引庫並插入資料 */ package lucene; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class AddIndex { public void createIndexDB(String id, String userName, String sal) throws Exception { // 把資料填充到JavaBean物件中 User user = new User(id, userName, sal); // 建立Document物件【匯入的是Lucene包下的Document物件】 Document document = new Document(); // 將JavaBean物件所有的屬性值,均放到Document物件中去,屬性名可以和JavaBean相同或不同 /** * 向Document物件加入一個欄位 引數一:欄位的關鍵字 引數二:字元的值 引數三:是否要儲存到原始記錄表中 YES表示是 NO表示否 * 引數四:是否需要將儲存的資料拆分到詞彙表中 ANALYZED表示拆分 NOT_ANALYZED表示不拆分 * * */ document.add(new Field("id", user.getId(), Field.Store.YES, Field.Index.ANALYZED)); document.add(new Field("userName", user.getUserName(), Field.Store.YES, Field.Index.ANALYZED)); document.add(new Field("sal", user.getSal(), Field.Store.YES, Field.Index.ANALYZED)); // 建立IndexWriter物件 // 目錄指定為E:/createIndexDB Directory directory = FSDirectory.open(new File("E:/createIndexDB")); // 使用標準的分詞演算法對原始記錄表進行拆分 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); // LIMITED預設是1W個 IndexWriter.MaxFieldLength maxFieldLength = IndexWriter.MaxFieldLength.LIMITED; /** * IndexWriter將我們的document物件寫到硬碟中 * * 引數一:Directory d,寫到硬碟中的目錄路徑是什麼 引數二:Analyzer a, * 以何種演算法來對document中的原始記錄表資料進行拆分成詞彙表 引數三:MaxFieldLength mfl 最多將文字拆分出多少個詞彙 * * */ IndexWriter indexWriter = new IndexWriter(directory, analyzer, maxFieldLength); // 將Document物件通過IndexWriter物件寫入索引庫中 indexWriter.addDocument(document); // 關閉IndexWriter物件 indexWriter.close(); } public static void main(String[] args) { String id = "2"; String userName = "李四"; String sal = "運維工程師"; AddIndex testIndex = new AddIndex(); try { testIndex.createIndexDB(id, userName, sal); System.out.println("新增成功:" + id); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); System.out.println("createIndexDB error"); } /** 讀取檔案內容,存入索引庫 */ try { String classPath = System.getProperties().getProperty("user.dir"); String sep = System.getProperties().getProperty("file.separator"); String pathName = classPath + sep + "date" + sep + "testtitle.txt"; File file = new File(pathName); InputStreamReader reader = new InputStreamReader( new FileInputStream(file), "gbk"); BufferedReader br = new BufferedReader(reader); String stringLine; int count = 0; while ((stringLine = br.readLine()) != null) { count += 1; testIndex.createIndexDB(count + "", stringLine.substring(0, 3), stringLine); System.out.println(stringLine); } br.close(); reader.close(); } catch (Exception e) { // TODO: handle exception System.out.println("createndexDB error"); } } }
三:查詢
package lucene; import java.io.File; import java.net.URLDecoder; import java.net.URLEncoder; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.junit.Test; public class FindIndex { @Test public void findIndexDB() throws Exception { /** * 引數一: IndexSearcher(Directory path)查詢以xxx目錄的索引庫 * * */ Directory directory = FSDirectory.open(new File("E:/createIndexDB")); // 建立IndexSearcher物件 IndexSearcher indexSearcher = new IndexSearcher(directory); // 建立QueryParser物件 /** * 引數一: Version matchVersion 版本號【和上面是一樣的】 引數二:String f,【要查詢的欄位】 * 引數三:Analyzer a【使用的拆詞演算法】 * */ Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); QueryParser queryParser = new QueryParser(Version.LUCENE_30, "sal", analyzer); // 給出要查詢的關鍵字 String keyWords = "中國"; // 建立Query物件來封裝關鍵字 Query query = queryParser.parse(keyWords); // 用IndexSearcher物件去索引庫中查詢符合條件的前100條記錄,不足100條記錄的以實際為準 TopDocs topDocs = indexSearcher.search(query, 100); // 獲取符合條件的編號 for (int i = 0; i < topDocs.scoreDocs.length; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; // 用indexSearcher物件去索引庫中查詢編號對應的Document物件 Document document = indexSearcher.doc(no); // 將Document物件中的所有屬性取出,再封裝回JavaBean物件中去 String id = document.get("id"); String userName = document.get("userName"); String sal = document.get("sal"); User user = new User(id, userName, sal); System.out.println(user); } indexSearcher.close(); } }
四:刪除
package lucene;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
public class TestIndex {
@Test
/**刪除索引庫*/
public void TestIndexDel() throws IOException {
// TODO Auto-generated constructor stub
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
FSDirectory directory = FSDirectory.open(new File("E:/createIndexDB"));
// IndexWriterConfig
IndexWriter.MaxFieldLength maxFieldLength = IndexWriter.MaxFieldLength.LIMITED;
IndexWriter indexWriter = new IndexWriter(directory, analyzer,
maxFieldLength);
indexWriter.deleteAll();
System.out.println("good--已刪除索引庫所有檔案");
indexWriter.deleteDocuments(new Term("userName", "李四"));
indexWriter.commit();
indexWriter.close();
}
}
五,優化
// 多條件搜尋--(結果會排序)
QueryParser queryParser1 = new MultiFieldQueryParser(
LuceneUtils.getVersion(), new String[] { "userName", "sal" },
LuceneUtils.getAnalyzer());
下面程式碼把lucene封裝成工具包(轉載)
package lucene;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import java.io.File;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
/**
* Created 0
*/
/**
* 使用單例事例模式
* */
public class LuceneUtils {
private static Directory directory;
private static Analyzer analyzer;
private static IndexWriter.MaxFieldLength maxFieldLength;
private LuceneUtils() {
}
static {
try {
directory = FSDirectory.open(new File("E:/createIndexDB"));
analyzer = new StandardAnalyzer(Version.LUCENE_30);
maxFieldLength = IndexWriter.MaxFieldLength.LIMITED;
} catch (Exception e) {
e.printStackTrace();
}
}
public static Directory getDirectory() {
return directory;
}
public static Analyzer getAnalyzer() {
return analyzer;
}
public static IndexWriter.MaxFieldLength getMaxFieldLength() {
return maxFieldLength;
}
/**
* @param object
* 傳入的JavaBean型別
* @return 返回Document物件
*/
public static Document javaBean2Document(Object object) {
try {
Document document = new Document();
// 得到JavaBean的位元組碼檔案物件
Class<?> aClass = object.getClass();
// 通過位元組碼檔案物件得到對應的屬性【全部的屬性,不能僅僅呼叫getFields()】
Field[] fields = aClass.getDeclaredFields();
// 得到每個屬性的名字
for (Field field : fields) {
String name = field.getName();
// 得到屬性的值【也就是呼叫getter方法獲取對應的值】
String method = "get" + name.substring(0, 1).toUpperCase()
+ name.substring(1);
// 得到對應的值【就是得到具體的方法,然後呼叫就行了。因為是get方法,沒有引數】
Method aClassMethod = aClass.getDeclaredMethod(method, null);
String value = aClassMethod.invoke(object).toString();
System.out.println(value);
// 把資料封裝到Document物件中。
document.add(new org.apache.lucene.document.Field(name, value,
org.apache.lucene.document.Field.Store.YES,
org.apache.lucene.document.Field.Index.ANALYZED));
}
return document;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
/**
* @param aClass
* 要解析的物件型別,要使用者傳入進來
* @param document
* 將Document物件傳入進來
* @return 返回一個JavaBean
*/
public static Object Document2JavaBean(Document document, Class<?> aClass) {
try {
// 建立該JavaBean物件
Object obj = aClass.newInstance();
// 得到該JavaBean所有的成員變數
Field[] fields = aClass.getDeclaredFields();
for (Field field : fields) {
// 設定允許暴力訪問
field.setAccessible(true);
String name = field.getName();
String value = document.get(name);
// 使用BeanUtils把資料封裝到Bean中
BeanUtils.setProperty(obj, name, value);
}
return obj;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
@Test
public void test() {
User user = new User();
LuceneUtils.javaBean2Document(user);
}
}