Lucene筆記25-Lucene的使用-根據域進行評分設定
阿新 • • 發佈:2018-11-04
一、需求
根據檔名來設定評分規則,或者根據文件的修改時間,將最近一年的評分加倍,一年以外的評分降低,等等。
二、具體實現
這裡根據檔名來修改評分規則,檔名中包含“JRE”和“SYSTEM”的評分加倍,其餘減倍。重點就是怎麼獲取到檔名,在customScore()方法中,有一個doc變數,我們可以通過這個doc來獲取文件,從而獲取檔名。另外還有一點就是Lucene有域快取,只要IndexReader沒有關閉,所有的資料都會存在域快取中,我們可以利用這個特性,將檔名這個域的內容取出來。
package com.wsy; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; import org.apache.lucene.search.function.CustomScoreProvider; import org.apache.lucene.search.function.CustomScoreQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.Random; public class MyScoreQuery { private static Directory directory; private static IndexReader indexReader; static { try { directory = FSDirectory.open(new File("E:\\Lucene\\IndexLibrary")); indexReader = IndexReader.open(directory); } catch (IOException e) { e.printStackTrace(); } } public void index(boolean update) { IndexWriter indexWriter = null; try { indexWriter = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); if (update) { indexWriter.deleteAll(); } File[] files = new File("E:\\Lucene\\SearchSource").listFiles(); Random random = new Random(); for (File file : files) { int score = random.nextInt(100); Document document = new Document(); document.add(new Field("content", new FileReader(file))); document.add(new Field("fileName", file.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.add(new Field("path", file.getAbsolutePath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.add(new NumericField("date", Field.Store.YES, true).setLongValue(file.lastModified())); document.add(new NumericField("size", Field.Store.YES, true).setIntValue((int) (file.length()))); document.add(new NumericField("score", Field.Store.YES, true).setIntValue(score)); indexWriter.addDocument(document); } } catch (IOException e) { e.printStackTrace(); } finally { if (indexWriter != null) { try { indexWriter.close(); } catch (IOException e) { e.printStackTrace(); } } } } public void searchByFileScoreQuery() { try { IndexSearcher indexSearcher = new IndexSearcher(indexReader); Query query = new TermQuery(new Term("content", "java")); // 根據評分域和原有的Query建立自定義的Query物件 FileNameScoreQuery fileNameScoreQuery = new FileNameScoreQuery(query); TopDocs topDocs = indexSearcher.search(fileNameScoreQuery, 100); ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { Document document = indexSearcher.doc(scoreDoc.doc); System.out.println(scoreDoc.doc + "-->" + scoreDoc.score + "-->" + document.get("fileName") + "-->" + document.get("score") + "-->" + document.get("size")); } indexSearcher.close(); } catch (IOException e) { e.printStackTrace(); } } private class FileNameScoreQuery extends CustomScoreQuery { public FileNameScoreQuery(Query subQuery) { super(subQuery); } @Override protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException { // return super.getCustomScoreProvider(reader); return new FileNameScoreProvider(indexReader); } } private class FileNameScoreProvider extends CustomScoreProvider { String[] fileNames = null; public FileNameScoreProvider(IndexReader reader) { super(reader); try { // 從域快取中獲取資料,這裡獲取域值為fileName的資訊 fileNames = FieldCache.DEFAULT.getStrings(indexReader, "fileName"); } catch (IOException e) { e.printStackTrace(); } } @Override public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException { // 在IndexReader沒有關閉之前,所有的資料都會儲存在域快取中,可以通過域快取獲取到有用的資訊 // 優點:速度快;缺點:會佔用大量的記憶體 // 根據doc獲取fileName的值 String fileName = fileNames[doc]; if (fileName.contains("JRE") || fileName.contains("SYSTEM")) { return 10 * subQueryScore; } return subQueryScore / 10; // return super.customScore(doc, subQueryScore, valSrcScore); } } public static void main(String[] args) { MyScoreQuery myScoreQuery = new MyScoreQuery(); myScoreQuery.index(true); myScoreQuery.searchByFileScoreQuery(); } }