1. 程式人生 > >lucene構建restful風格的簡單搜索引擎服務

lucene構建restful風格的簡單搜索引擎服務

arr -i analyzer ota true tope fig close null

來自於本人博客: lucene構建restful風格的簡單搜索引擎服務


本人的博客如今也要改成使用lucene進行全文檢索的功能,因此在這裏把代碼貼出來與大家分享

一,文件夾結構:

技術分享圖片

二,配置文件:

總共同擁有四個配置文件:bonecp-config.xml,IKAnalyzer.cfg.xml,log4j.properties,system-config.xml

1.bonecp-config.xml是配置jdbc連接池用的,不用這個配置也行,bonecp包有默認配置

2.IKAnalyzer.cfg.xml是IKAnalyzer分詞要用的字典配置文件

這裏也能夠不用配置
<?xml version="1.0" encoding="UTF-8"?

> <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd"> <properties> <comment>IK Analyzer 擴展配置</comment> <!--用戶能夠在這裏配置自己的擴展字典 --> <entry key="ext_dict">/data/lucene/dict/1_dict.txt;/data/lucene/dict/2_dict.txt;/data/lucene/dict/3_dict.txt;/data/lucene/dict/4_dict.txt;/data/lucene/dict/5_dict.txt;/data/lucene/dict/6_dict.txt;/data/lucene/dict/7_dict.txt;/data/lucene/dict/8_dict.txt;</entry> <!--用戶能夠在這裏配置自己的擴展停止詞字典 <entry key="ext_stopwords">/data/lucene/dict/stopword.dic</entry> --> </properties>

3.log4j.properties這個不用多說了

4.system-config.xml是一些系統的配置參數


<?xml version="1.0" encoding="UTF-8"?>
<configs>
        <mysql>
            <port>3306</port>
            <user>test</user>
            <password>test</password>
            <partitionCount>6</partitionCount>
            <maxWait>3600</maxWait>
            <driverClass>com.mysql.jdbc.Driver</driverClass>
            <idleMaxAge>1800</idleMaxAge>
            <idleConnectionTestPeriod>300</idleConnectionTestPeriod>
            <host>jdbc:mysql://localhost/blog?

characterEncode=UTF-8</host> </mysql> <search> <!--這裏的路徑能夠自己修改--> <indexPath>/data/lucene/index</indexPath> <recommendNetIndexPath>/data/lucene/index/recommendNet</recommendNetIndexPath> <searcNum>10</searcNum> <resultNum>10000</resultNum> </search> </configs>

三,監聽器SystemStartupListener,實現了ServletContextListener

package com.blog.listener;

import java.io.File;
import java.net.URL;
import java.sql.SQLException;
import java.util.List;

import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;

import org.apache.log4j.Logger;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

import com.blog.db.DBFactory;
import com.blog.search.BlogSearch;
import com.blog.search.index.BlogIndex;

public class SystemStartupListener implements ServletContextListener {
    private static Logger log = Logger.getLogger(SystemStartupListener.class);
    public void contextDestroyed(ServletContextEvent arg0) {
        DBFactory.shutDown();
    }

    public void contextInitialized(ServletContextEvent arg0) {
        SAXReader reader = new SAXReader();
        try {
            URL url = this.getClass().getClassLoader().getResource("system-config.xml");
            String  path = url.getFile();
            Document doc = reader.read(new File(path));
            Element rootEle = doc.getRootElement();
            List list = rootEle.elements("mysql");
            if(list.size() > 0) {
                Element mysqlEle = (Element) list.get(0);
                if(null != mysqlEle) {
                    String host = mysqlEle.elementText("host");
                    String port = mysqlEle.elementText("port");
                    String user = mysqlEle.elementText("user");
                    String password = mysqlEle.elementText("password");
                    Integer partitionCount = Integer.parseInt(mysqlEle.elementText("partitionCount"));
                    Integer maxWait = Integer.parseInt(mysqlEle.elementText("maxWait"));
                    String driverClass = mysqlEle.elementText("driverClass");
                    Integer idleMaxAge = Integer.parseInt(mysqlEle.elementText("idleMaxAge"));
                    Integer idleConnectionTestPeriod = Integer.parseInt(mysqlEle.elementText("idleConnectionTestPeriod"));
                    DBFactory.init(driverClass, host, user, password, partitionCount, maxWait, idleMaxAge, idleConnectionTestPeriod);
                }
            } else {
                throw new RuntimeException("初始化失敗....");
                    
            }
            list = rootEle.elements("search");
            if(list.size() > 0) {
                Element searchEle = (Element) list.get(0);
                String indexPath = searchEle.elementText("indexPath");   //索引文件的存放位置
                String searcNum = searchEle.elementText("searcNum");  //一次搜索結果數
                String resultNum = searchEle.elementText("resultNum");
                String recommendNetIndexPath = searchEle.elementText("recommendNetIndexPath");
                System.setProperty("searcNum", searcNum);
                System.setProperty("resultNum", resultNum);
                System.setProperty("indexFilePath", indexPath);
                System.setProperty("recommendNetIndexPath", recommendNetIndexPath);
                BlogIndex.buildIndex(recommendNetIndexPath);
            } else {
                throw new RuntimeException("初始化失敗....");
            }
            
            log.info("初始化搜索.....");
            BlogSearch.init();
        } catch (DocumentException e) {
            log.error("解析配置文件出錯.....",e);
        } catch(Exception e) {
            log.error("出現未知錯誤....",e);
        }
    }
}

四。util包中的Constant常量類

package com.blog.util;

public class Constant {
    public static final Integer searcNum = Integer.parseInt(System.getProperty("searcNum"));
    public static final Integer resultNum = Integer.parseInt(System.getProperty("resultNum"));
}

util包中的DataToJson類:

package com.blog.util;

import java.util.List;

import com.google.gson.JsonArray;
import com.google.gson.JsonObject;

public class DataToJson {
    
    public static String parseDataToJson(List<Long> ids, int totalCount) {
        JsonObject json = new JsonObject();
        json.addProperty("totalCount", totalCount);
        JsonArray array = new JsonArray();
        if(ids.size() > 0) {
            for(Long id : ids) {
                JsonObject obj = new JsonObject();
                obj.addProperty("id", id);
                array.add(obj);
            }
        }
        json.add("data", array);
        return json.toString();
    }
    
}

五。entity包中的實體類:

Dashboard:

package com.blog.search.entity;

public class Dashboard {
    private Long id;
    private String content;
    private String title;
    public Long getId() {
        return id;
    }
    public void setId(Long id) {
        this.id = id;
    }
    public String getContent() {
        return content;
    }
    public void setContent(String content) {
        this.content = content;
    }
    public String getTitle() {
        return title;
    }
    public void setTitle(String title) {
        this.title = title;
    }
}

六,lucene相關的索引和檢索類:

index包中的BlogIndex:

package com.blog.search.index;

import java.io.File;
import java.io.IOException;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.blog.search.entity.Dashboard;

public class BlogIndex {
    private static final String indexFilePath = System.getProperty("indexFilePath");
    private static Logger log = Logger.getLogger(BlogIndex.class);
    public BlogIndex() {

    }
    //這種方法在沒有索引的時候須要在初始化時調用
    public static void buildIndex(String path) {
        File file = new File(path);
        if(file.isDirectory() && file.listFiles().length == 0){  
            Directory dir;
            try {
                dir = FSDirectory.open(new File(path));
            
                Analyzer analyzer = new IKAnalyzer(true);
                //配置類
                IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
                iwc.setOpenMode(OpenMode.CREATE);
                IndexWriter writer = new IndexWriter(dir, iwc);
                writer.deleteAll();
                writer.close();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        
    }
    
    @SuppressWarnings("deprecation")
    private Document getDocument(Dashboard dashboard) throws Exception {
        Document doc = new Document();
        doc.add(new Field("title", dashboard.getTitle(), Field.Store.YES, Field.Index.ANALYZED));
        doc.add(new Field("content", dashboard.getContent(),Field.Store.NO,Field.Index.ANALYZED));
        Field idField = new StringField("id",dashboard.getId().toString(), Field.Store.YES);
        doc.add(idField);
        return doc;
    }
    
    public void writeToIndex(Dashboard dashboard) throws Exception {
        Document doc = getDocument(dashboard);
        IndexWriter writer = null;
        try {
            
            Directory dir = FSDirectory.open(new File(indexFilePath));
            //分析器
            Analyzer analyzer = new IKAnalyzer(true);
            //配置類
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
            writer = new IndexWriter(dir, iwc);
        } catch(Exception e) {
            e.printStackTrace();
        }
        writer.addDocument(doc);
        writer.commit();
        writer.close();
    }
    
    public void deleteIndex(Long id) {
        IndexWriter writer = null;
        try {
            Directory dir = FSDirectory.open(new File(indexFilePath));
            Analyzer analyzer = new IKAnalyzer(true);
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
            writer = new IndexWriter(dir, iwc);
            writer.deleteDocuments(new Term("id",id.toString()));
            writer.commit();
            
        } catch(Exception e) {
            log.error("刪除索引出錯.....");
        } finally {
            if(writer != null) {
                try {
                    writer.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
    }
    
    public void updateIndex(Dashboard dashboard) throws Exception {
        Document doc = getDocument(dashboard);
        IndexWriter writer = null;
        try {
            
            Directory dir = FSDirectory.open(new File(indexFilePath));
            //分析器
            Analyzer analyzer = new IKAnalyzer(true);
            //配置類
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
            //iwc.setOpenMode(OpenMode.CREATE);
            writer = new IndexWriter(dir, iwc);
        } catch(Exception e) {
            e.printStackTrace();
        }
        writer.updateDocument(new Term("id", dashboard.getId().toString()), doc);
        writer.commit();
        writer.close();
    }
}

七,search包以下的BlogSearch類:

package com.blog.search;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.blog.util.Constant;
import com.blog.util.DataToJson;

public class BlogSearch {
    private static Logger log = Logger.getLogger(BlogSearch.class);
    
    private static final String indexFilePath = System.getProperty("indexFilePath");
    private static String[] field = {"title","content"};
    private IndexSearcher searcher;
    //存儲初始化的IndexReader,節省每次又一次打開索引文件的性能開銷
    private static Map<String, IndexReader> readers = new ConcurrentHashMap<String, IndexReader>();
    private static Object lock = new Object();
    public static void init() {
        try {
            IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexFilePath)));
            readers.put("blogsearch", reader);
            log.info(readers.toString());
        } catch (IOException e) {
            log.error("初始化搜索器出錯.......",e);
        }
        
    }
    
    public TopDocs search(String keyword) {
        try {
            Analyzer analyzer = new IKAnalyzer(true);
            
            QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_43, field,analyzer);
            parser.setDefaultOperator(Operator.AND);
            // 將關鍵字包裝成Query對象
            Query query = parser.parse(keyword);
            
            //加鎖為了防止在一個線程讀取IndexReader之後。可是還沒有運行查詢之前。索引改變了,
            //導致IndexReader對象被關閉後又一次創建,可能導致關閉異常的問題
            synchronized(lock) {  
                IndexReader reader = readers.get("blogsearch");
                IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader)reader);
                if(newReader == null) {  //假設為空。表示索引沒有變化
                    newReader = reader;
                } else {
                    readers.put("blogsearch", newReader);
                    reader.close();
                }
                searcher = new IndexSearcher(newReader);
            }
            //newReader = DirectoryReader.open(FSDirectory.open(new File(indexFilePath)));
            TopDocs results = searcher.search(query, Constant.resultNum);
            return results;
        } catch(Exception e) {
            log.error("搜索關鍵字出錯......",e);
            return null;
        }
    }
    
    public String getResult(String keyword, int pageSize) {
        TopDocs td = search(keyword);
        int totalCount = td.totalHits;
        ScoreDoc[] h = td.scoreDocs;
        List<Long> ids = new ArrayList<Long>(h.length);
        if(h.length == 0) {
            log.debug("no result data");
        } else {
            int start = Constant.searcNum*(pageSize - 1);
            int end = Constant.searcNum*pageSize;
            if(start >= totalCount) {
                start = 0;
                end = totalCount;
            } 
            if(end > totalCount) {
                end = totalCount;
                
            }
            for(int i = start; i < end; i++) {
                try {
                    Document doc = searcher.doc(h[i].doc);
                    ids.add(Long.parseLong(doc.get("id")));
                    //log.debug("這是第" + (i + 1) + "個檢索到的結果,id為:" + doc.get("id")+",  " + doc.get("title"));
                } catch(Exception e) {
                    e.printStackTrace();
                    log.error("start=" +start + ", end=" + end + ", " + h.length);
                }
            }
        }
        return DataToJson.parseDataToJson(ids, totalCount);
    }
}

八。service包下的BlogSearchService,這是jersey的入口,由這個類向外界提供api:

package com.blog.search.service;

import javax.ws.rs.FormParam;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import javax.ws.rs.QueryParam;
import javax.ws.rs.core.MediaType;

import com.blog.search.BlogSearch;
import com.blog.search.entity.Dashboard;
import com.blog.search.index.BlogIndex;
import com.google.gson.JsonObject;

@Path("/blogSearch/")
public class BlogSearchService {

    @GET
    @Path("/queryByKeyword")
    @Produces(MediaType.APPLICATION_JSON)
    public String queryIdsByKeyword(@QueryParam("keyword") String keyword, @QueryParam("pageSize") Integer pageSize) {
        return new BlogSearch().getResult(keyword, pageSize);
    }
    
    @POST
    @Path("/buildByContent")
    @Produces(MediaType.APPLICATION_JSON)
    public String buildIndexByContent(@FormParam("content") String content,@FormParam("title")String title, @FormParam("id") Long id) {
        BlogIndex bi = new BlogIndex();
        Dashboard dashboard = new Dashboard();
        dashboard.setContent(content);
        dashboard.setTitle(title);
        dashboard.setId(id);
        JsonObject json = new JsonObject();
        try {
            bi.writeToIndex(dashboard);
            json.addProperty("result", "200");
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            json.addProperty("result", "500");
        } finally {
            //index();
            return json.toString();
        }
        
    }
    
    @POST
    @Path("/deleteById")
    @Produces(MediaType.APPLICATION_JSON)
    public String deleteIndexById(@FormParam("id") Long id) {
        BlogIndex bi = new BlogIndex();
        JsonObject json = new JsonObject();
        try {
            bi.deleteIndex(id);
            json.addProperty("result", 200);
        } catch(Exception e) {
            json.addProperty("result", 500);
        } finally {
            return json.toString();
        }
    }
    
    @POST
    @Path("/update")
    @Produces(MediaType.APPLICATION_JSON)
    public String updateIndex(@FormParam("id") Long id, @FormParam("content") String content, @FormParam("title") String title) {
        BlogIndex bi = new BlogIndex();
        JsonObject json = new JsonObject();
        try {
            Dashboard dashboard = new Dashboard();
            dashboard.setContent(content);
            dashboard.setTitle(title);
            dashboard.setId(id);
            bi.updateIndex(dashboard);
            json.addProperty("result", 200);
        } catch(Exception e) {
            json.addProperty("result", 500);
        } finally {
            return json.toString();
        }
    }
    
}

九,web.xml的配置:

<?

xml version="1.0" encoding="UTF-8"?

> <web-app version="2.5" xmlns="http://java.sun.com/xml/ns/javaee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd"> <display-name></display-name> <welcome-file-list> <welcome-file>index.jsp</welcome-file> </welcome-file-list> <servlet> <servlet-name>JerseyServlet</servlet-name> <servlet-class> com.sun.jersey.spi.container.servlet.ServletContainer </servlet-class> <init-param> <param-name>com.sun.jersey.config.property.packages</param-name> <!-- 系統啟動時掃描的包的路徑--> <param-value>com.blog.search.service</param-value> </init-param> <load-on-startup>1</load-on-startup> </servlet> <servlet-mapping> <servlet-name>JerseyServlet</servlet-name> <url-pattern>/search/*</url-pattern> </servlet-mapping> <listener> <listener-class>com.blog.listener.SystemStartupListener</listener-class> </listener> </web-app>

十,程序依賴包:

技術分享圖片

self4j-nop-1.7.5.jar

好了。完畢之後,tomcat的配置好之後,假設你是用myeclipse的自帶tomcat公布的,則訪問http://localhost:port/項目名稱/search/blogSearch/buildByContent?後面就是參數傳遞,查詢也跟這個url類似

就這樣。我們創建了一個簡單的restful風格的簡單搜索引擎。裏面的配置大家依照自己的需求改改就好


lucene構建restful風格的簡單搜索引擎服務