Lucene簡單總結
阿新 • • 發佈:2018-06-13
public 離開 api 分割 red 字符串 exception 獲取 let
Lucene
API
Document
Document:文檔對象,是一條原始數據
文檔編號 | 文檔內容 |
---|---|
1 | 谷歌地圖之父跳槽FaceBook |
2 | 谷歌地圖之父加盟FaceBook |
3 | 谷歌地圖創始人拉斯離開谷歌加盟Facebook |
4 | 谷歌地圖之父跳槽Facebook與Wave項目取消有關 |
5 | 谷歌地圖之父拉斯加盟社交網站Facebook |
==一條記錄就是一個document,document的每一個字段就是一個Field==
Field
創建索引
private final static File INDEX_FILE = new File("E:\\DevelopTools\\indexDir"); public static void indexCreate(Document doc) throws Exception { // 創建目錄對象,指定索引庫的存放位置;FSDirectory文件系統;RAMDirectory內存 Directory dir = FSDirectory.open(INDEX_FILE); // 創建分詞器對象 Analyzer analyzer = new IKAnalyzer(); // 創建索引寫入器配置對象,第一個參數版本VerSion.LATEST,第二個參數分詞器 IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, analyzer); // 創建索引寫入器 IndexWriter indexWriter = new IndexWriter(dir, conf); // 向索引庫寫入文檔對象 indexWriter.addDocument(doc); // 提交 indexWriter.commit(); // 關閉 indexWriter.close(); }
@Test public void createTest() throws Exception { // Document doc = new Document(); doc.add(new LongField("id", 1, Store.YES)); doc.add(new TextField("title", "谷歌地圖之父跳槽FaceBook", Store.YES)); doc.add(new TextField("context", "據國外媒體報道,曾先後負責谷歌地圖和Wave開發工作的拉斯·拉斯姆森(Lars Rasmussen)已經離開谷歌,並將加盟Facebook。", Store.YES)); indexCreate(doc); }
查詢索引
public static void indexSearcher(Query query, Integer n) throws IOException { // 初始化索引庫對象 Directory dir = FSDirectory.open(INDEX_FILE); // 索引讀取工具 IndexReader indexReader = DirectoryReader.open(dir); // 索引搜索對象 IndexSearcher indexSeracher = new IndexSearcher(indexReader); // 執行搜索操作,返回值topDocs TopDocs topDocs = indexSeracher.search(query, n); // 匹配搜索條件的總記錄數 System.out.println("一共命中:" + topDocs.totalHits + "條數據"); // 獲得得分文檔數組對象,得分文檔對象包含得分和文檔編號 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { int docID = scoreDoc.doc; float score = scoreDoc.score; System.out.println("文檔編號:" + docID); System.out.println("文檔得分:" + score); // 獲取文檔對象,通過索引讀取工具 Document document = indexReader.document(docID); System.out.println("id:" + document.get("id")); System.out.println("title:" + document.get("title")); System.out.println("context:" + document.get("context")); } indexReader.close(); }
@Test
public void searchTest() throws Exception {
//單一字段的查詢解析器
// 創建查詢解析器對象
QueryParser parser = new QueryParser("title", new IKAnalyzer());
// 創建查詢對象
Query query = parser.parse("谷歌");
//根據Query搜索,返回評分最高的n條記錄
indexSearcher(query, 10);
/*多字段的查詢解析器
MultiFieldQueryParser parser = new MultiFieldQueryParser(new String[]{"id","title"}, new IKAnalyzer());
Query query = parser.parse("1");*/
}
各種其他查詢方式
//詞條查詢,查詢條件必須是最小粒度不可再分割的內容
Query query = new TermQuery(new Term("title", "谷歌"));
//通配符查詢, ?:匹配一個字符, *:匹配多個字符
Query query = new WildcardQuery(new Term("title", "*歌*"));
//模糊查詢, 參數:1-詞條,查詢字段及關鍵詞,關鍵詞允許寫錯;2-允許寫錯的最大編輯距離,並且不能大於2(0~2)
Query query = new FuzzyQuery(new Term("title", "facebool"), 1);
//數值範圍查詢,查詢非String類型的數據或者說是一些繼承Numeric類的對象的查詢,參數1-字段;2-最小值;3-最大值;4-是否包含最小值;5-是否包含最大值
Query query = NumericRangeQuery.newLongRange("id", 2l, 4l, true, true);
//組合查詢, 交集: Occur.MUST + Occur.MUST, 並集:Occur.SHOULD + Occur.SHOULD, 非:Occur.MUST_NOT
BooleanQuery query = new BooleanQuery();
Query query1 = NumericRangeQuery.newLongRange("id", 2l, 4l, true, true);
Query query2 = new WildcardQuery(new Term("title", "*歌*"));
query.add(query1, Occur.SHOULD);
query.add(query2, Occur.SHOULD);
修改索引
//本質先刪除再添加,先刪除所有滿足條件的文檔,再創建文檔, 因此,修改索引通常要根據唯一字段
public static void indexUpdate(Term term, Document doc) throws IOException {
Directory dir = FSDirectory.open(INDEX_FILE);
Analyzer analyzer = new IKAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, analyzer);
IndexWriter indexWriter = new IndexWriter(dir, conf);
indexWriter.updateDocument(term, doc);
indexWriter.commit();
indexWriter.close();
}
@Test
public void updateTest() throws Exception {
Term term = new Term("title", "facebook");
Document doc = new Document();
doc.add(new LongField("id", 1L, Store.YES));
doc.add(new TextField("title", "谷歌地圖之父跳槽FaceBook", Store.YES));
doc.add(new TextField("context", "河馬程序員加盟FaceBook", Store.YES));
indexUpdate(term, doc);
}
刪除索引
// 執行刪除操作(根據詞條),要求id字段必須是字符串類型
public static void indexDelete(Term term) throws IOException {
Directory dir = FSDirectory.open(INDEX_FILE);
Analyzer analyzer = new IKAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, analyzer);
IndexWriter indexWriter = new IndexWriter(dir, conf);
indexWriter.deleteDocuments(term);
indexWriter.commit();
indexWriter.close();
}
public static void indexDeleteAll() throws IOException {
Directory dir = FSDirectory.open(INDEX_FILE);
Analyzer analyzer = new IKAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, analyzer);
IndexWriter indexWriter = new IndexWriter(dir, conf);
indexWriter.deleteAll();
indexWriter.commit();
indexWriter.close();
}
@Test
public void deleteTest() throws Exception {
/*
* Term term = new Term("context", "facebook"); indexDelete(term);
*/
indexDeleteAll();
}
Lucene簡單總結