Lucene4.7.2簡單例子二
阿新 • • 發佈:2019-01-25
/*
* Copyright (C) 2015 ShenZhen tianlang Co.,Ltd All Rights Reserved.
* 未經本公司正式書面同意,其他任何個人、團體不得使用、複製、修改或釋出本軟體.
* 版權所有深圳天狼服務有限公司 www.tianlang.com.
*/
package com.tg.lucene.userdemo;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
/**
*
* @version 2016年11月25日上午10:24:12
* @author wuliu
*/
public class Demo {
// 原始檔所在位置
private static String filePath = "G:/lucene/a.txt";
// 索引目錄
private static String indexDirectory = "G:/lucene/index2";
// lucene版本
private static final Version VERSION = Version.LUCENE_47;
public static void createIndex() throws Throwable {
// 建立Directory關聯原始檔
Directory directory = FSDirectory.open(new File(indexDirectory));
// 建立一個分詞器
Analyzer analyzer = new StandardAnalyzer(VERSION);
// 建立索引的配置資訊
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(VERSION, analyzer);
IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
indexWriter.deleteAll();
Document document = new Document();
//讀取txt檔案中的內容
String fileContent = fileToStr(filePath);
//標題StringField索引儲存不分詞
Field field1 = new StringField("title", "lucene測試", Store.YES);
//內容TextField索引分詞(Store.NO不儲存查詢出來就是null)
Field field2 = new TextField("content", fileContent, Store.YES);
Field field3 = new DoubleField("version", 1.2, Store.YES);
Field field4 = new IntField("score", 90, Store.YES);
document.add(field1);
document.add(field2);
document.add(field3);
document.add(field4);
indexWriter.addDocument(document);
indexWriter.commit();
indexWriter.close();
}
/**
* 讀取檔案內容
*
* @version 2016年11月25日上午10:33:03
* @author wuliu
* @param path
* @return
* @throws Throwable
*/
private static String fileToStr(String path) throws Throwable{
StringBuffer sb = new StringBuffer();
InputStream is = new FileInputStream(path);
InputStreamReader isr = new InputStreamReader(is, "GBK");//GBK和GB2312可以成功顯示中文
BufferedReader br = new BufferedReader(isr);
String line = "";
while ((line = br.readLine()) != null) {
sb.append(line);
}
is.close();
isr.close();
br.close();
return sb.toString();
}
/**
* 查詢
*
* @version 2016年11月25日上午10:42:45
* @author wuliu
*/
public static void search(String keyword) throws Throwable{
IndexReader indexReader = DirectoryReader.open(FSDirectory.open(new File(indexDirectory)));
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
QueryParser queryParser = new QueryParser(VERSION, "content", new StandardAnalyzer(VERSION));
Query query = queryParser.parse(keyword);
TopDocs topDocs = indexSearcher.search(query, 100);
System.out.println("查詢結果總數:" + topDocs.totalHits);
ScoreDoc scoreDoc[] = topDocs.scoreDocs;
for(int i = 0;i < scoreDoc.length; i++){
//得到文件id
int id = scoreDoc[i].doc;
Document document = indexSearcher.doc(id);
System.out.println("內容:"+document.get("content"));
System.out.println("標題:"+document.get("title"));
System.out.println("版本:"+document.get("version"));
System.out.println("評分:"+document.get("score"));
}
}
public static void test() throws Throwable{
String str = "我是中國人";
//標準分詞器(是把每個字都拆分開了)
// Analyzer analyzer = new StandardAnalyzer(VERSION);
//簡單分詞器(沒做任何改變)
// Analyzer analyzer = new SimpleAnalyzer(VERSION);
//二元切分(將相鄰的兩個字做為一個詞)
// Analyzer analyzer = new CJKAnalyzer(VERSION);
//語意分詞
Analyzer analyzer = new IKAnalyzer(false);
//生成一個分詞流
TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(str));
//為token設定屬性類
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
//重新設定
tokenStream.reset();
while(tokenStream.incrementToken()){//遍歷得到的token
System.out.println(new String(charTermAttribute.buffer(),0,charTermAttribute.length())+" ");
}
}
public static void main(String[] args) throws Throwable {
// createIndex();
// search("xml");
test();
}
}
* Copyright (C) 2015 ShenZhen tianlang Co.,Ltd All Rights Reserved.
* 未經本公司正式書面同意,其他任何個人、團體不得使用、複製、修改或釋出本軟體.
* 版權所有深圳天狼服務有限公司 www.tianlang.com.
*/
package com.tg.lucene.userdemo;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
/**
*
* @version 2016年11月25日上午10:24:12
* @author wuliu
*/
public class Demo {
// 原始檔所在位置
private static String filePath = "G:/lucene/a.txt";
// 索引目錄
private static String indexDirectory = "G:/lucene/index2";
// lucene版本
private static final Version VERSION = Version.LUCENE_47;
public static void createIndex() throws Throwable {
// 建立Directory關聯原始檔
Directory directory = FSDirectory.open(new File(indexDirectory));
// 建立一個分詞器
Analyzer analyzer = new StandardAnalyzer(VERSION);
// 建立索引的配置資訊
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(VERSION, analyzer);
IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
indexWriter.deleteAll();
Document document = new Document();
//讀取txt檔案中的內容
String fileContent = fileToStr(filePath);
//標題StringField索引儲存不分詞
Field field1 = new StringField("title", "lucene測試", Store.YES);
//內容TextField索引分詞(Store.NO不儲存查詢出來就是null)
Field field2 = new TextField("content", fileContent, Store.YES);
Field field3 = new DoubleField("version", 1.2, Store.YES);
Field field4 = new IntField("score", 90, Store.YES);
document.add(field1);
document.add(field2);
document.add(field3);
document.add(field4);
indexWriter.addDocument(document);
indexWriter.commit();
indexWriter.close();
}
/**
* 讀取檔案內容
*
* @version 2016年11月25日上午10:33:03
* @author wuliu
* @param path
* @return
* @throws Throwable
*/
private static String fileToStr(String path) throws Throwable{
StringBuffer sb = new StringBuffer();
InputStream is = new FileInputStream(path);
InputStreamReader isr = new InputStreamReader(is, "GBK");//GBK和GB2312可以成功顯示中文
BufferedReader br = new BufferedReader(isr);
String line = "";
while ((line = br.readLine()) != null) {
sb.append(line);
}
is.close();
isr.close();
br.close();
return sb.toString();
}
/**
* 查詢
*
* @version 2016年11月25日上午10:42:45
* @author wuliu
*/
public static void search(String keyword) throws Throwable{
IndexReader indexReader = DirectoryReader.open(FSDirectory.open(new File(indexDirectory)));
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
QueryParser queryParser = new QueryParser(VERSION, "content", new StandardAnalyzer(VERSION));
Query query = queryParser.parse(keyword);
TopDocs topDocs = indexSearcher.search(query, 100);
System.out.println("查詢結果總數:" + topDocs.totalHits);
ScoreDoc scoreDoc[] = topDocs.scoreDocs;
for(int i = 0;i < scoreDoc.length; i++){
//得到文件id
int id = scoreDoc[i].doc;
Document document = indexSearcher.doc(id);
System.out.println("內容:"+document.get("content"));
System.out.println("標題:"+document.get("title"));
System.out.println("版本:"+document.get("version"));
System.out.println("評分:"+document.get("score"));
}
}
public static void test() throws Throwable{
String str = "我是中國人";
//標準分詞器(是把每個字都拆分開了)
// Analyzer analyzer = new StandardAnalyzer(VERSION);
//簡單分詞器(沒做任何改變)
// Analyzer analyzer = new SimpleAnalyzer(VERSION);
//二元切分(將相鄰的兩個字做為一個詞)
// Analyzer analyzer = new CJKAnalyzer(VERSION);
//語意分詞
Analyzer analyzer = new IKAnalyzer(false);
//生成一個分詞流
TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(str));
//為token設定屬性類
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
//重新設定
tokenStream.reset();
while(tokenStream.incrementToken()){//遍歷得到的token
System.out.println(new String(charTermAttribute.buffer(),0,charTermAttribute.length())+" ");
}
}
public static void main(String[] args) throws Throwable {
// createIndex();
// search("xml");
test();
}
}