luceneAPI的簡單使用(java)
阿新 • • 發佈:2019-02-05
lucene是一個全文檢索引擎工具包,下面來簡單的介紹下Lucene常用的API介紹
1:對一個檔案下面的所有檔案進行索引建立:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache .lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache .lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
/**
* 對一個檔案裡面的內容建立索引,包括檔名和檔案的內容
* 1:建立分詞器。標準分詞器
* 2:建立IndexWriter IndexWriter是索引過程中的核心元件
* 3:建立document 建立Field
* 4:寫入
* 5:關閉資源
* @author 韓利鵬
*/
public class CreateLocal {
private static String file_dir = "D:/decstop/luceneTxt" ;// 帶索引的本地檔案目錄
// 索引存放目錄
private static String index_dir = "D:/decstop/index";
public static void index(String filedir) throws Exception {
//1:建立中文分詞器
//Analyzer analyzer = new StandardAnalyzer(); //lucene自帶的標準分詞器,對中文的分詞支援不好,做中文的分詞不建議使用
Analyzer analyzer = new IKAnalyzer(); //IKAnalyzer是一箇中文分詞器,有中文的時候建議使用這個
File indexfile = new File(index_dir);
Directory directorty = FSDirectory.open(indexfile);
IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
//2:建立indexwriter
IndexWriter writer = new IndexWriter(directorty, cfg);
// 遍歷目錄 迴圈讀取檔案 逐個建立檔案索引
File file = new File(filedir);
File[] files = file.listFiles();
for (File f : files) {
if (f.isDirectory()) {
index(f.getPath());
} else {
Document doc = new Document();
// 檔名字
// Store:如果是yes 則說明儲存到文件中
//Field name = new StringField("fileName", f.getName(), Store.YES);
Field name = new TextField("fileName", f.getName(), Store.YES);
// 檔案內容
BufferedReader reader = new BufferedReader(new FileReader(f));
String temp = null;
StringBuffer sb = new StringBuffer();
while ((temp = reader.readLine()) != null) {
sb.append(temp);
}
reader.close();
String context = sb.toString();
// 把檔案內容讀到索引中去 這樣在搜尋索引時就能看到
Field body = new TextField("context", context, Store.YES);
doc.add(name);
doc.add(body);
writer.addDocument(doc);
}
}
writer.close();
}
public static void main(String[] args) throws Exception {
index(file_dir);
}
}
2:搜尋
package lucene;
import java.io.File;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
/**
* 搜尋
* 1:建立indexSercher 指定索引庫的地址
* 2:建立Query (查詢條件)
* 3:執行查詢
* 4:遍歷得出結果
* 5:關閉資源
* @author 韓利鵬
*/
public class SearchIndexOnLocal {
public static void doSearch() throws Exception{
//建立indexSearch 指定索引庫的地址
File indexFile =new File("D:/decstop/index");
Directory directory =FSDirectory.open(indexFile);
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher search = new IndexSearcher(reader);
//建立query 引數一:搜尋域 field name 第二個引數: 分詞器 與新增的分詞器一致
QueryParser parser = new QueryParser("fileName", new StandardAnalyzer());
//通過QueryParser物件建立query 引數為lucene的查詢語句
Query query = parser.parse("text1 OR text2");
//通過indexSearcher來搜尋索引 int 條數
TopDocs topDocs = search.search(query, 10);
//根據查詢條件匹配出的記錄總數
int count= topDocs.totalHits;
System.out.println("查詢出來的記錄:"+count);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for(ScoreDoc scoreDoc:scoreDocs){
//獲取當前scoreDoc的id
int docId = scoreDoc.doc;
//通過document的id來獲取每個Field域的值
Document doc = search.doc(docId);
System.out.println("檔名字:"+doc.get("fileName"));
System.out.println("檔案的內容:"+doc.get("context"));
}
reader.close();
}
public static void main(String[] args) throws Exception {
doSearch();
}
}
3刪除和修改索引
package lucene;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/**
* 刪除和修改
*/
public class UpdateIndexOnLocal {
/**
* 根據條件刪除
*/
public static void deleteIndex() throws Exception{
//建立IndexWriter
Analyzer analyzer = new StandardAnalyzer();
File indexfile = new File("D:/decstop/index");
Directory directorty = FSDirectory.open(indexfile);// cun chu
IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
IndexWriter writer = new IndexWriter(directorty, cfg);
//writer.deleteAll(); //工作中不要用,且用且珍惜
//引數,域名 對應刪除的值
//刪除時,最好根據document 中的唯一值
writer.deleteDocuments(new Term("fileName", "text1"));
writer.close();
}
/**
* 修改索引
*/
public static void updateIndex() throws Exception{
//建立IndexWriter
Analyzer analyzer = new StandardAnalyzer();
File indexfile = new File("D:/decstop/index");
Directory directorty = FSDirectory.open(indexfile);// cun chu
IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
IndexWriter writer = new IndexWriter(directorty, cfg);
//建立一個文件域,再建立一個File域進行更新
Document doc = new Document();
doc.add(new TextField("content", "abcdefghigk",Store.YES));
//進行更新
writer.updateDocument(new Term("fileName","text1"), doc);
writer.close();
}
public static void main(String[] args) throws Exception {
//deleteIndex();
updateIndex();
}
}
Lucene的搜尋語法: