1. 程式人生 > >Lucene簡單總結

Lucene簡單總結

public 離開 api 分割 red 字符串 exception 獲取 let

Lucene

API

Document

Document:文檔對象,是一條原始數據

文檔編號 文檔內容
1 谷歌地圖之父跳槽FaceBook
2 谷歌地圖之父加盟FaceBook
3 谷歌地圖創始人拉斯離開谷歌加盟Facebook
4 谷歌地圖之父跳槽Facebook與Wave項目取消有關
5 谷歌地圖之父拉斯加盟社交網站Facebook

==一條記錄就是一個document,document的每一個字段就是一個Field==
技術分享圖片

Field

技術分享圖片

創建索引

private final static File INDEX_FILE = new File("E:\\DevelopTools\\indexDir");

public static void indexCreate(Document doc) throws Exception {
    // 創建目錄對象,指定索引庫的存放位置;FSDirectory文件系統;RAMDirectory內存
    Directory dir = FSDirectory.open(INDEX_FILE);
    // 創建分詞器對象
    Analyzer analyzer = new IKAnalyzer();
    // 創建索引寫入器配置對象,第一個參數版本VerSion.LATEST,第二個參數分詞器
    IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, analyzer);
    // 創建索引寫入器
    IndexWriter indexWriter = new IndexWriter(dir, conf);
    // 向索引庫寫入文檔對象
    indexWriter.addDocument(doc);
    // 提交
    indexWriter.commit();
    // 關閉
    indexWriter.close();
}
@Test
public void createTest() throws Exception {
    //
    Document doc = new Document();
    doc.add(new LongField("id", 1, Store.YES));
    doc.add(new TextField("title", "谷歌地圖之父跳槽FaceBook", Store.YES));
    doc.add(new TextField("context", "據國外媒體報道,曾先後負責谷歌地圖和Wave開發工作的拉斯·拉斯姆森(Lars Rasmussen)已經離開谷歌,並將加盟Facebook。", Store.YES));
    indexCreate(doc);
}

查詢索引

public static void indexSearcher(Query query, Integer n) throws IOException {
    // 初始化索引庫對象
    Directory dir = FSDirectory.open(INDEX_FILE);
    // 索引讀取工具
    IndexReader indexReader = DirectoryReader.open(dir);
    // 索引搜索對象       
    IndexSearcher indexSeracher = new IndexSearcher(indexReader);   
    // 執行搜索操作,返回值topDocs    
    TopDocs topDocs = indexSeracher.search(query, n);
    // 匹配搜索條件的總記錄數
    System.out.println("一共命中:" + topDocs.totalHits + "條數據");
    // 獲得得分文檔數組對象,得分文檔對象包含得分和文檔編號
    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    for (ScoreDoc scoreDoc : scoreDocs) {
        int docID = scoreDoc.doc;
        float score = scoreDoc.score;
        System.out.println("文檔編號:" + docID);
        System.out.println("文檔得分:" + score);
        // 獲取文檔對象,通過索引讀取工具
        Document document = indexReader.document(docID);
        System.out.println("id:" + document.get("id"));
        System.out.println("title:" + document.get("title"));
        System.out.println("context:" + document.get("context"));
    }
    indexReader.close();
}
@Test
public void searchTest() throws Exception {
    //單一字段的查詢解析器
    // 創建查詢解析器對象
    QueryParser parser = new QueryParser("title", new IKAnalyzer());
    // 創建查詢對象
    Query query = parser.parse("谷歌");
    //根據Query搜索,返回評分最高的n條記錄
    indexSearcher(query, 10);
    
    /*多字段的查詢解析器
    MultiFieldQueryParser parser = new MultiFieldQueryParser(new String[]{"id","title"}, new IKAnalyzer());
    Query query = parser.parse("1");*/
}

各種其他查詢方式

//詞條查詢,查詢條件必須是最小粒度不可再分割的內容
Query query = new TermQuery(new Term("title", "谷歌"));

//通配符查詢, ?:匹配一個字符, *:匹配多個字符
Query query = new WildcardQuery(new Term("title", "*歌*"));

//模糊查詢, 參數:1-詞條,查詢字段及關鍵詞,關鍵詞允許寫錯;2-允許寫錯的最大編輯距離,並且不能大於2(0~2)
Query query = new FuzzyQuery(new Term("title", "facebool"), 1);

//數值範圍查詢,查詢非String類型的數據或者說是一些繼承Numeric類的對象的查詢,參數1-字段;2-最小值;3-最大值;4-是否包含最小值;5-是否包含最大值
Query query = NumericRangeQuery.newLongRange("id", 2l, 4l, true, true);

//組合查詢, 交集: Occur.MUST + Occur.MUST, 並集:Occur.SHOULD + Occur.SHOULD, 非:Occur.MUST_NOT
BooleanQuery query = new BooleanQuery();
Query query1 = NumericRangeQuery.newLongRange("id", 2l, 4l, true, true);
Query query2 = new WildcardQuery(new Term("title", "*歌*"));
query.add(query1, Occur.SHOULD);
query.add(query2, Occur.SHOULD);

修改索引

//本質先刪除再添加,先刪除所有滿足條件的文檔,再創建文檔, 因此,修改索引通常要根據唯一字段
public static void indexUpdate(Term term, Document doc) throws IOException {
    Directory dir = FSDirectory.open(INDEX_FILE);
    Analyzer analyzer = new IKAnalyzer();
    IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, analyzer);
    IndexWriter indexWriter = new IndexWriter(dir, conf);
    indexWriter.updateDocument(term, doc);
    indexWriter.commit();
    indexWriter.close();
    }
@Test
public void updateTest() throws Exception {
    Term term = new Term("title", "facebook");
    Document doc = new Document();
    doc.add(new LongField("id", 1L, Store.YES));
    doc.add(new TextField("title", "谷歌地圖之父跳槽FaceBook", Store.YES));
    doc.add(new TextField("context", "河馬程序員加盟FaceBook", Store.YES));
    indexUpdate(term, doc);
}

刪除索引

// 執行刪除操作(根據詞條),要求id字段必須是字符串類型
public static void indexDelete(Term term) throws IOException {
    Directory dir = FSDirectory.open(INDEX_FILE);
    Analyzer analyzer = new IKAnalyzer();
    IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, analyzer);
    IndexWriter indexWriter = new IndexWriter(dir, conf);
    indexWriter.deleteDocuments(term);
    indexWriter.commit();
    indexWriter.close();
    }

public static void indexDeleteAll() throws IOException {
    Directory dir = FSDirectory.open(INDEX_FILE);
    Analyzer analyzer = new IKAnalyzer();
    IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, analyzer);
    IndexWriter indexWriter = new IndexWriter(dir, conf);
    indexWriter.deleteAll();
    indexWriter.commit();
    indexWriter.close();
    }
@Test
public void deleteTest() throws Exception {
    /*
     * Term term = new Term("context", "facebook"); indexDelete(term);
     */
    indexDeleteAll();
}

Lucene簡單總結