luceneAPI的簡單使用（java）

阿新 • • 發佈：2019-02-05

lucene是一個全文檢索引擎工具包，下面來簡單的介紹下Lucene常用的API介紹

1：對一個檔案下面的所有檔案進行索引建立：


import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache 
.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache 
.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
/**
 * 對一個檔案裡面的內容建立索引，包括檔名和檔案的內容
 * 1：建立分詞器。標準分詞器
 * 2：建立IndexWriter    IndexWriter是索引過程中的核心元件
 * 3：建立document     建立Field 
 * 4：寫入
 * 5：關閉資源
 * @author 韓利鵬
 */
public class CreateLocal {

    private static String file_dir = "D:/decstop/luceneTxt" 
;// 帶索引的本地檔案目錄
    // 索引存放目錄
    private static String index_dir = "D:/decstop/index";

    public static void index(String filedir) throws Exception {
        //1：建立中文分詞器
        //Analyzer analyzer = new StandardAnalyzer();   //lucene自帶的標準分詞器，對中文的分詞支援不好，做中文的分詞不建議使用
        Analyzer analyzer = new IKAnalyzer();   //IKAnalyzer是一箇中文分詞器，有中文的時候建議使用這個
        File indexfile = new File(index_dir);
        Directory directorty = FSDirectory.open(indexfile);
        IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
        //2:建立indexwriter
        IndexWriter writer = new IndexWriter(directorty, cfg);

        // 遍歷目錄 迴圈讀取檔案 逐個建立檔案索引
        File file = new File(filedir);
        File[] files = file.listFiles();
        for (File f : files) {
            if (f.isDirectory()) {
                index(f.getPath());
            } else {
                Document doc = new Document();
                // 檔名字
                // Store:如果是yes 則說明儲存到文件中
                //Field name = new StringField("fileName", f.getName(), Store.YES);
                Field name = new TextField("fileName", f.getName(), Store.YES);
                // 檔案內容
                BufferedReader reader = new BufferedReader(new FileReader(f));
                String temp = null;
                StringBuffer sb = new StringBuffer();
                while ((temp = reader.readLine()) != null) {
                    sb.append(temp);
                }
                reader.close();
                String context = sb.toString();

                // 把檔案內容讀到索引中去 這樣在搜尋索引時就能看到
                Field body = new TextField("context", context, Store.YES);
                doc.add(name);
                doc.add(body);
                writer.addDocument(doc);
            }
        }
        writer.close();

    }
    public static void main(String[] args) throws Exception {

        index(file_dir);
    }
}

2：搜尋

package lucene;

import java.io.File;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/**
 * 搜尋
 * 1：建立indexSercher 指定索引庫的地址
 * 2：建立Query （查詢條件）
 * 3：執行查詢   
 * 4：遍歷得出結果
 * 5：關閉資源
 * @author 韓利鵬
 */
public class SearchIndexOnLocal {
    public static void doSearch() throws Exception{
        //建立indexSearch     指定索引庫的地址
        File indexFile =new File("D:/decstop/index");
        Directory directory =FSDirectory.open(indexFile);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher search = new IndexSearcher(reader);


        //建立query    引數一：搜尋域 field name    第二個引數： 分詞器 與新增的分詞器一致
        QueryParser parser = new QueryParser("fileName", new StandardAnalyzer());
        //通過QueryParser物件建立query     引數為lucene的查詢語句
        Query query = parser.parse("text1 OR text2");

        //通過indexSearcher來搜尋索引     int 條數
        TopDocs topDocs = search.search(query, 10);
        //根據查詢條件匹配出的記錄總數
        int count= topDocs.totalHits;
        System.out.println("查詢出來的記錄："+count);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs; 
        for(ScoreDoc scoreDoc:scoreDocs){
            //獲取當前scoreDoc的id
            int docId = scoreDoc.doc;
            //通過document的id來獲取每個Field域的值
            Document doc = search.doc(docId);
            System.out.println("檔名字："+doc.get("fileName"));
            System.out.println("檔案的內容："+doc.get("context"));
        }
        reader.close();
    }

    public static void main(String[] args) throws Exception {
        doSearch();
    }

}

3刪除和修改索引

package lucene;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
 * 刪除和修改
 */
public class UpdateIndexOnLocal {
    /**
     * 根據條件刪除
     */
    public static void deleteIndex() throws Exception{
        //建立IndexWriter
        Analyzer analyzer = new StandardAnalyzer();
        File indexfile = new File("D:/decstop/index");
        Directory directorty = FSDirectory.open(indexfile);// cun chu
        IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
        IndexWriter writer = new IndexWriter(directorty, cfg);
        //writer.deleteAll();    //工作中不要用，且用且珍惜
        //引數，域名     對應刪除的值
        //刪除時，最好根據document 中的唯一值
        writer.deleteDocuments(new Term("fileName", "text1"));
        writer.close();
    }

    /**
     * 修改索引
     */
    public static void updateIndex() throws Exception{
        //建立IndexWriter
        Analyzer analyzer = new StandardAnalyzer();
        File indexfile = new File("D:/decstop/index");
        Directory directorty = FSDirectory.open(indexfile);// cun chu
        IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
        IndexWriter writer = new IndexWriter(directorty, cfg);

        //建立一個文件域，再建立一個File域進行更新
        Document doc = new Document();
        doc.add(new TextField("content", "abcdefghigk",Store.YES));
        //進行更新
        writer.updateDocument(new Term("fileName","text1"), doc);
        writer.close();

    }

    public static void main(String[] args) throws Exception {
        //deleteIndex();
        updateIndex();
    }

}

Lucene的搜尋語法：

luceneAPI的簡單使用（java）

lucene是一個全文檢索引擎工具包，下面來簡單的介紹下Lucene常用的API介紹 1：對一個檔案下面的所有檔案進行索引建立： import java.io.BufferedReader; import java.io.File; import java

（java）selenium webdriver學習---實現簡單的翻頁，將頁面內容的標題和標題鏈接取出

prop imp current inter 並且常見問題 activity num div selenium webdriver學習---實現簡單的翻頁，將頁面內容的標題和標題鏈接取出；該情況適合能能循環page=1~n,並且每個網頁隨著循環可以打開的情況，註意一定

簡單易用的參數校驗北京PK10平臺出租和版本校驗方式（java）

else runtime boot java pri conf utils 短信 con 步驟是：配置數據校驗規則（多條）>>>>在控制器上添加校驗註解>>>>使用攔截器攔截校驗參數(獲取註解，初始化校驗規則（第一次），取出參

MQTT簡單demo（java）

停用隊列設置 msg 的區別多個指定 throwable ica 　　上次已經簡單的談了一些MQTT協議的一些知識，今天就來就上次的知識具體的Java實現。　　現在就來具體說說實現這一步吧。中間的時間也是有點久。　　MQTT消息的發送和訂閱都是依賴MQTT服務器

（Java）IDEA 建立一個簡單的 “Hello world”

早就聽說 IDEA 好用，今天晚上終於下載下來了，下載---安裝--破解一氣呵成~ 起飛的地址看下面，這哥們的部落格也很帥~ https://www.cnblogs.com/jajian/p/79890

簡單的折半查詢演算法（Java）

折半查詢的優點是能夠最大減少查詢的時間複雜度，但也對查詢的目標陣列或集合提出一定要求，即陣列或集合中的數字排序必須是有序的，否則不能用折半查詢，只能用遍歷。以下是簡單的折半查詢演算法： public class test { public static void main

簡單的模擬電話簿程式（java）

要求：編寫一個模擬電話簿程式，實現對聯絡人的增刪查提示： HashMap儲存聯絡人，Key為聯絡人姓名，Value為其電話號碼在console控制檯中模擬增刪查改

簡單氣泡排序演算法（Java）

PS:這是個人學習中的一些記錄，貼出來和大家一起交流，不妥之處，請多指教，共同進步！ public class Arithmetic { public static void main(String[] args) { int[] arr = { 12, 25, 22, 3

簡單密碼驗證（java）

2018/2/3 java 要求 1.使用者名稱最多25個字元，不能包含空格，單雙引號，問號等特殊符號 2.密碼要6到18位，只能包含字母數字，特殊符號注意判斷密碼強度： 1.只為純數字純字母為2星 2.為字母加數字，字母加符號，符號加數字為4星 3.三者都有為

簡單工廠（Java）

簡單工廠不是一個標準的設計模式，但是因其常用，簡單而又神奇，故把它放到設計模式中。一：介面的回顧1：Java中介面的概念在Java中介面是一種特殊的抽象類，根一般的抽象類相比，接口裡面的所有方法都是抽象方法，接口裡面的所有屬性都是常量。即接口裡面只有方法定義而沒有任何方法實現

簡單易用的引數校驗和版本校驗方式（java）

步驟是：配置資料校驗規則（多條）>>>>在控制器上新增校驗註解>>>>使用攔截器攔截校驗引數(獲取註解，初始化校驗規則（第一次），取出引數) 我的專案中使用的MVC框架為Jfinal框架，其他框架也適用只需要將攔截器部分修改即

安卓（java）計算器簡單實現

博主q q 656358805 歡迎線上交流！以下兩圖是計算器的結果展示：好了，那麼今天我們來講一下安卓計算器的簡單實現，對於廣大愁於

排序算法之高速排序（Java）

大於一個數大小 main div 移動 swap 交換 system //高速排序 public class Quick_Sort { // 排序的主要算法 private int Partition(int[] data, int start, int en

《劍指offer》面試題39 二叉樹的深度（java）

設計模式博客 rgs 歷史存在復制 pri 取值今天摘要：今天翻到了《劍指offer》面試題39，題目二中的解法二是在函數的參數列表中通過指針的方式進行傳值，而java是沒有指針的，所以函數要進行改造。然而我翻了下別人的java版本（我就想看看有什麽高大上的改造

selenium測試（Java）--元素操作（五）

ear 獲取驗證碼 wid 輸入 static pri 元素 sta email 元素的操作有 1. 清除文本 2. 模擬按鍵輸入 3. 單擊元素 4. 返回元素尺寸 5. 獲取文本 6. 獲取屬性值 7. 判斷是否可見 8. 提交下面通過操

算法筆記_219:泊松分酒（Java）

ava import 輸入 block -s 數學步驟 ner 命名目錄 1 問題描述 2 解決方案 1 問題描述泊松是法國數學家、物理學家和力學家。他一生致力科學事業，成果頗多。有許多著名的公式定理以他的名字命名，比如概率論中著名的泊松分布。有一次閑

算法筆記_220:猜算式（Java）

情況 name 描述 -s out 解決 string check 問題目錄 1 問題描述 2 解決方案 1 問題描述看下面的算式： □□ x □□ = □□ x □□□ 它表示：兩個兩位數相乘等於一個兩位數乘以一個三位數。如果沒有限定條件，這樣的例子很多

《編程導論（Java）·2.1.2 啊，我看到了多態》-什麽是多態(polymorphism)

img page ria 使用方法 ride idt void one 標題 1.不明覺厲很多人學習多態時，會認為。之所以不明覺厲，由於多態的定義：事物存在的多種表現形態；而後，有人將重載(overload)、改寫(override)、多態變量和泛型歸結於同一個

《劍指offer》面試題43 n個骰子的點數（java）

r+ nal ret 次循環分而治之源碼 ava 面試 ble 引言：寫這篇文章的初衷只是想做個筆記，因為這道題代碼量有點大，有點抽象，而書上並沒有詳細的註釋。為了加深印象和便於下次復習，做個記錄。原題：把n個骰子扔到地上，所有骰子朝上一面的點數之後為s. 輸入n，打

如何打印一棵樹（Java）

.get stat color util emp println style ldl 多叉樹有一棵多叉樹，將它打印出來。 import java.util.LinkedList; /** * 需求：按層打印一棵樹 * 說明：樹是保存在一個鏈表中 *

luceneAPI的簡單使用（java）

相關推薦