Lucene4.7.2簡單例子二

阿新 • • 發佈：2019-01-25

/*
* Copyright (C) 2015 ShenZhen tianlang Co.,Ltd All Rights Reserved.
* 未經本公司正式書面同意，其他任何個人、團體不得使用、複製、修改或釋出本軟體.
* 版權所有深圳天狼服務有限公司 www.tianlang.com.
*/
package com.tg.lucene.userdemo;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

/**
*
* @version 2016年11月25日上午10:24:12
* @author wuliu
*/
public class Demo {
    // 原始檔所在位置
    private static String filePath = "G:/lucene/a.txt";
    // 索引目錄
    private static String indexDirectory = "G:/lucene/index2";
    // lucene版本
    private static final Version VERSION = Version.LUCENE_47;

    public static void createIndex() throws Throwable {
        // 建立Directory關聯原始檔
        Directory directory = FSDirectory.open(new File(indexDirectory));
        // 建立一個分詞器
        Analyzer analyzer = new StandardAnalyzer(VERSION);
        // 建立索引的配置資訊
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(VERSION, analyzer);
        IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
        indexWriter.deleteAll();
        Document document = new Document();
        //讀取txt檔案中的內容
        String fileContent = fileToStr(filePath);
        //標題StringField索引儲存不分詞
        Field field1 = new StringField("title", "lucene測試", Store.YES);
        //內容TextField索引分詞(Store.NO不儲存查詢出來就是null)
        Field field2 = new TextField("content", fileContent, Store.YES);
        Field field3 = new DoubleField("version", 1.2, Store.YES);
        Field field4 = new IntField("score", 90, Store.YES);
        document.add(field1);
        document.add(field2);
        document.add(field3);
        document.add(field4);
        indexWriter.addDocument(document);
        indexWriter.commit();
        indexWriter.close();
    }

    /**
     * 讀取檔案內容
     *
     * @version 2016年11月25日上午10:33:03
     * @author wuliu
     * @param path
     * @return
     * @throws Throwable
     */
    private static String fileToStr(String path) throws Throwable{
        StringBuffer sb = new StringBuffer();
        InputStream is = new FileInputStream(path);
        InputStreamReader isr = new InputStreamReader(is, "GBK");//GBK和GB2312可以成功顯示中文
        BufferedReader br = new BufferedReader(isr);
        String line = "";
        while ((line = br.readLine()) != null) {
            sb.append(line);
        }
        is.close();
        isr.close();
        br.close();
        return sb.toString();
    }

    /**
     * 查詢
     *
     * @version 2016年11月25日上午10:42:45
     * @author wuliu
     */
    public static void search(String keyword) throws Throwable{
        IndexReader indexReader = DirectoryReader.open(FSDirectory.open(new File(indexDirectory)));
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        QueryParser queryParser = new QueryParser(VERSION, "content", new StandardAnalyzer(VERSION));
        Query query = queryParser.parse(keyword);
        TopDocs topDocs = indexSearcher.search(query, 100);
        System.out.println("查詢結果總數:" + topDocs.totalHits);
        ScoreDoc scoreDoc[] = topDocs.scoreDocs;
        for(int i = 0;i < scoreDoc.length; i++){
            //得到文件id
            int id = scoreDoc[i].doc;
            Document document = indexSearcher.doc(id);
            System.out.println("內容:"+document.get("content"));
            System.out.println("標題:"+document.get("title"));
            System.out.println("版本:"+document.get("version"));
            System.out.println("評分:"+document.get("score"));
        }
    }

    public static void test() throws Throwable{
        String str = "我是中國人";
        //標準分詞器(是把每個字都拆分開了)
//        Analyzer analyzer = new StandardAnalyzer(VERSION);
        //簡單分詞器(沒做任何改變)
//        Analyzer analyzer = new SimpleAnalyzer(VERSION);
        //二元切分(將相鄰的兩個字做為一個詞)
//        Analyzer analyzer = new CJKAnalyzer(VERSION);
        //語意分詞
        Analyzer analyzer = new IKAnalyzer(false);
        //生成一個分詞流
        TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(str));
        //為token設定屬性類
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        //重新設定
        tokenStream.reset();
        while(tokenStream.incrementToken()){//遍歷得到的token
            System.out.println(new String(charTermAttribute.buffer(),0,charTermAttribute.length())+" ");
        }
    }

    public static void main(String[] args) throws Throwable {
//        createIndex();
//        search("xml");
        test();
    }

}

Lucene4.7.2簡單例子二

Lucene4.7.2簡單例子二

7-2 是否完全二叉搜尋樹（30 分）

Lucene4.7.2 搜尋與高亮顯示

PTA 7-2 二叉搜索樹的結構（26 分）

C#中使用1.7版本驅動操作MongoDB簡單例子

python2.7練習小例子（二十六）

python2.7練習小例子（二十四）

python2.7練習小例子（二十五）

python2.7練習小例子（二十七）

python2.7練習小例子（二十八）

CentOS 7.2 下 mysql 8.0.11二進制安裝(親測)

java學習-zxing生成二維碼矩陣的簡單例子

Jsoup簡單例子2.0——多執行緒爬取網頁內的郵箱

jquery 1.7.2原始碼解析（二）構造jquery物件

第六章樹和二叉樹--樹和森林-計算機17級 7-2 家譜處理（30 分）

輸入某二叉樹的前序遍歷和中序遍歷的結果，請重建出該二叉樹。假設輸入的前序遍歷和中序遍歷的結果中都不含重複的數字。例如輸入前序遍歷序列{1,2,4,7,3,5,6,8}和中序遍歷序列{4,7,2,1,5

Spark2.x學習筆記：2、Scala簡單例子

K-近鄰演算法的一個簡單例子（二）

SignalR 2.x入門（一）：SignalR簡單例子

7.2 ipu_device.c分析（二）---具體函式分析

Lucene4.7.2簡單例子二

相關推薦