1. 程式人生 > >luceneAPI的簡單使用(java)

luceneAPI的簡單使用(java)

lucene是一個全文檢索引擎工具包,下面來簡單的介紹下Lucene常用的API介紹

1:對一個檔案下面的所有檔案進行索引建立:


import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache
.lucene.document.Field; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache
.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; /** * 對一個檔案裡面的內容建立索引,包括檔名和檔案的內容 * 1:建立分詞器。標準分詞器 * 2:建立IndexWriter IndexWriter是索引過程中的核心元件 * 3:建立document 建立Field * 4:寫入 * 5:關閉資源 * @author 韓利鵬 */ public class CreateLocal { private static String file_dir = "D:/decstop/luceneTxt"
;// 帶索引的本地檔案目錄 // 索引存放目錄 private static String index_dir = "D:/decstop/index"; public static void index(String filedir) throws Exception { //1:建立中文分詞器 //Analyzer analyzer = new StandardAnalyzer(); //lucene自帶的標準分詞器,對中文的分詞支援不好,做中文的分詞不建議使用 Analyzer analyzer = new IKAnalyzer(); //IKAnalyzer是一箇中文分詞器,有中文的時候建議使用這個 File indexfile = new File(index_dir); Directory directorty = FSDirectory.open(indexfile); IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer); //2:建立indexwriter IndexWriter writer = new IndexWriter(directorty, cfg); // 遍歷目錄 迴圈讀取檔案 逐個建立檔案索引 File file = new File(filedir); File[] files = file.listFiles(); for (File f : files) { if (f.isDirectory()) { index(f.getPath()); } else { Document doc = new Document(); // 檔名字 // Store:如果是yes 則說明儲存到文件中 //Field name = new StringField("fileName", f.getName(), Store.YES); Field name = new TextField("fileName", f.getName(), Store.YES); // 檔案內容 BufferedReader reader = new BufferedReader(new FileReader(f)); String temp = null; StringBuffer sb = new StringBuffer(); while ((temp = reader.readLine()) != null) { sb.append(temp); } reader.close(); String context = sb.toString(); // 把檔案內容讀到索引中去 這樣在搜尋索引時就能看到 Field body = new TextField("context", context, Store.YES); doc.add(name); doc.add(body); writer.addDocument(doc); } } writer.close(); } public static void main(String[] args) throws Exception { index(file_dir); } }

2:搜尋

package lucene;

import java.io.File;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/**
 * 搜尋
 * 1:建立indexSercher 指定索引庫的地址
 * 2:建立Query (查詢條件)
 * 3:執行查詢   
 * 4:遍歷得出結果
 * 5:關閉資源
 * @author 韓利鵬
 */
public class SearchIndexOnLocal {
    public static void doSearch() throws Exception{
        //建立indexSearch     指定索引庫的地址
        File indexFile =new File("D:/decstop/index");
        Directory directory =FSDirectory.open(indexFile);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher search = new IndexSearcher(reader);


        //建立query    引數一:搜尋域 field name    第二個引數: 分詞器 與新增的分詞器一致
        QueryParser parser = new QueryParser("fileName", new StandardAnalyzer());
        //通過QueryParser物件建立query     引數為lucene的查詢語句
        Query query = parser.parse("text1 OR text2");

        //通過indexSearcher來搜尋索引     int 條數
        TopDocs topDocs = search.search(query, 10);
        //根據查詢條件匹配出的記錄總數
        int count= topDocs.totalHits;
        System.out.println("查詢出來的記錄:"+count);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs; 
        for(ScoreDoc scoreDoc:scoreDocs){
            //獲取當前scoreDoc的id
            int docId = scoreDoc.doc;
            //通過document的id來獲取每個Field域的值
            Document doc = search.doc(docId);
            System.out.println("檔名字:"+doc.get("fileName"));
            System.out.println("檔案的內容:"+doc.get("context"));
        }
        reader.close();
    }

    public static void main(String[] args) throws Exception {
        doSearch();
    }

}

3刪除和修改索引

package lucene;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
 * 刪除和修改
 */
public class UpdateIndexOnLocal {
    /**
     * 根據條件刪除
     */
    public static void deleteIndex() throws Exception{
        //建立IndexWriter
        Analyzer analyzer = new StandardAnalyzer();
        File indexfile = new File("D:/decstop/index");
        Directory directorty = FSDirectory.open(indexfile);// cun chu
        IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
        IndexWriter writer = new IndexWriter(directorty, cfg);
        //writer.deleteAll();    //工作中不要用,且用且珍惜
        //引數,域名     對應刪除的值
        //刪除時,最好根據document 中的唯一值
        writer.deleteDocuments(new Term("fileName", "text1"));
        writer.close();
    }

    /**
     * 修改索引
     */
    public static void updateIndex() throws Exception{
        //建立IndexWriter
        Analyzer analyzer = new StandardAnalyzer();
        File indexfile = new File("D:/decstop/index");
        Directory directorty = FSDirectory.open(indexfile);// cun chu
        IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
        IndexWriter writer = new IndexWriter(directorty, cfg);

        //建立一個文件域,再建立一個File域進行更新
        Document doc = new Document();
        doc.add(new TextField("content", "abcdefghigk",Store.YES));
        //進行更新
        writer.updateDocument(new Term("fileName","text1"), doc);
        writer.close();

    }

    public static void main(String[] args) throws Exception {
        //deleteIndex();
        updateIndex();
    }

}

Lucene的搜尋語法: