hbase與solr的架構整合
阿新 • • 發佈:2019-02-19
大資料架構-使用HBase和Solr將儲存與索引放在不同的機器上
摘要:HBase可以通過協處理器Coprocessor的方式向Solr發出請求,Solr對於接收到的資料可以做相關的同步:增、刪、改索引的操作,這樣就可以同時使用HBase儲存量大和Solr檢索效能高的優點了,更何況HBase和Solr都可以叢集。這對海量資料儲存、檢索提供了一種方式,將儲存與索引放在不同的機器上,是大資料架構的必須品。 關鍵詞:HBase, Solr, Coprocessor, 大資料, 架構 HBase和Solr可以通過協處理器Coprocessor的方式向Solr發出請求,Solr對於接收到的資料可以做相關的同步:增、刪、改索引的操作。將儲存與索引放在不同的機器上,這是大資料架構的必須品,但目前還有很多不懂得此道的同學,他們對於這種思想感到很新奇,不過,這絕對是好的方向,所以不懂得抓緊學習吧。
/*
*版權:王安琪
*描述:監視HBase,一有資料postPut就向Solr傳送,本類要作為觸發器新增到HBase
*修改時間:2014-05-27
*修改內容:新增
*/
package solrHbase.test;
import java.io.UnsupportedEncodingException;
import ***;
publicclass SorlIndexCoprocessorObserver extends BaseRegionObserver {
private static final Logger LOG = LoggerFactory |
publicstatic SolrInputDocument getInputDoc(Put put) { SolrInputDocument doc = new SolrInputDocument(); doc.addField("test_ID", Bytes.toString(put.getRow())); for (KeyValue c : put.getFamilyMap().get(Bytes.toBytes(columnFamily))) { String key = Bytes.toString(c.getKey()); String value = Bytes.toString(c.getValue()); if (value.isEmpty()) { continue; } String fieldName = key.substring(key.indexOf(columnFamily) + 3, key.indexOf("")).trim(); doc.addField(fieldName, value); } return doc; } |
/* *版權:王安琪 *描述:測試HBaseInsert,HBase插入效能 *修改時間:2014-05-27 *修改內容:新增 */ package solrHbase.test; import hbaseInput.HbaseInsert; import ***; publicclass TestHBaseMain { private static Configuration config; private static String tableName = "angelHbase"; private static HTable table = null; private static final String columnFamily = "wanganqi"; /** * @param args */ public static void main(String[] args) { config = HBaseConfiguration.create(); config.set("hbase.zookeeper.quorum", "192.103.101.104"); HbaseInsert.createTable(config, tableName, columnFamily); try { table = new HTable(config, Bytes.toBytes(tableName)); for (int k = 0; k < 1; k++) { Thread t = new Thread() { public void run() { for (int i = 0; i < 100000; i++) { HbaseInsert.inputData(table, PutCreater.createPuts(1000, columnFamily)); Calendar c = Calendar.getInstance(); String dateTime = c.get(Calendar.YEAR) + "-" + c.get(Calendar.MONTH) + "-" + c.get(Calendar.DATE) + "T" + c.get(Calendar.HOUR) + ":" + c.get(Calendar.MINUTE) + ":" + c.get(Calendar.SECOND) + ":" + c.get(Calendar.MILLISECOND) + "Z 寫入: " + i * 1000; System.out.println(dateTime); } } }; t.start(); } } catch (IOException e1) { e1.printStackTrace(); } } } |
/* *版權:王安琪 *描述:與HBase相關操作,建表與插入資料 *修改時間:2014-05-27 *修改內容:新增 */ package hbaseInput; import ***; import org.apache.hadoop.hbase.client.Put; publicclass HbaseInsert { public static void createTable(Configuration config, String tableName, String columnFamily) { HBaseAdmin hBaseAdmin; try { hBaseAdmin = new HBaseAdmin(config); if (hBaseAdmin.tableExists(tableName)) { return; } HTableDescriptor tableDescriptor = new HTableDescriptor(tableName); tableDescriptor.addFamily(new HColumnDescriptor(columnFamily)); hBaseAdmin.createTable(tableDescriptor); hBaseAdmin.close(); } catch (MasterNotRunningException e) { e.printStackTrace(); } catch (ZooKeeperConnectionException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static void inputData(HTable table, ArrayList<Put> puts) { try { table.put(puts); table.flushCommits(); puts.clear(); } catch (IOException e) { e.printStackTrace(); } } } |
publicstatic Put createPut(String columnFamily) { String ss = getSentence(); byte[] family = Bytes.toBytes(columnFamily); byte[] rowKey = Bytes.toBytes("" + Math.abs(r.nextLong())); Put put = new Put(rowKey); put.add(family, Bytes.toBytes("DeviceID"), Bytes.toBytes("" + Math.abs(r.nextInt()))); ****** put.add(family, Bytes.toBytes("Company_mmsegsm"), Bytes.toBytes("ss")); return put; } |
privatestaticvoid sendConcurrentUpdateSolrServer(final String url, final int count) throws SolrServerException, IOException { SolrServer solrServer = new ConcurrentUpdateSolrServer(url, 10000, 20); for (int i = 0; i < count; i++) { solrServer.add(getInputDoc(PutCreater.createPut(columnFamily))); } } |