Hbase過濾器(一):比較過濾器API
過濾器(filter)
目錄
一:行過濾器(rowFilter)
解析:行過濾器基於rowkey來過濾資料。使用多種運算子返回符合條件的行鍵,同時過濾掉不符合條件的rowkey。
package compareFilter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.filter.*; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Test; import java.io.IOException; public class rowfilter { private Configuration configuration = null; private Connection connection = null; /* * 根據rowkey查詢 */ @Test public void rowfilter() throws IOException { System.out.print("begin\n"); //建立Hbase配置檔案 configuration = HBaseConfiguration.create(); //建立連線 connection = ConnectionFactory.createConnection(configuration); //根據表名獲取表實體 Table table = connection.getTable(TableName.valueOf("ns1:t1")); //建立掃描實體 Scan scan = new Scan(); //新增掃描的列族 引數1.列族 引數2.列名 scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name")); //新增掃描的列族 引數1.列族 引數2.列名 scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("age")); //建立過濾器實體 Filter filter = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,new BinaryComparator(Bytes.toBytes("row3"))); //將filter實體放入掃描實體 scan.setFilter(filter); //建立掃描返回類 ResultScanner resultScanner = table.getScanner(scan); for (Result result:resultScanner){ System.out.println(result); String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name"))); Integer age = Bytes.toInt(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("age"))); System.out.println(name); System.out.println(age); } resultScanner.close(); table.close(); System.out.print("end\n"); } /* * 根據rowkey正則表示式查詢 */ @Test public void rowRegexfilter() throws IOException { System.out.print("begin\n"); //建立Hbase配置檔案 configuration = HBaseConfiguration.create(); //建立連線 connection = ConnectionFactory.createConnection(configuration); //根據表名獲取表實體 Table table = connection.getTable(TableName.valueOf("ns1:t1")); //建立掃描實體 Scan scan = new Scan(); //新增掃描的列族 引數1.列族 引數2.列名 scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name")); //新增掃描的列族 引數1.列族 引數2.列名 scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("age")); //建立過濾器實體 Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator(".3")); //將filter實體放入掃描實體 scan.setFilter(filter); //建立掃描返回類 ResultScanner resultScanner = table.getScanner(scan); for (Result result:resultScanner){ System.out.println(result); String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name"))); System.out.println(name); } resultScanner.close(); table.close(); System.out.print("end\n"); } /* * 根據rowkey字串查詢 */ @Test public void rowSubStringfilter() throws IOException { System.out.print("begin\n"); //建立Hbase配置檔案 configuration = HBaseConfiguration.create(); //建立連線 connection = ConnectionFactory.createConnection(configuration); //根據表名獲取表實體 Table table = connection.getTable(TableName.valueOf("ns1:t1")); //建立掃描實體 Scan scan = new Scan(); //新增掃描的列族 引數1.列族 引數2.列名 scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name")); //新增掃描的列族 引數1.列族 引數2.列名 scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("age")); //建立過濾器實體 Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator("3")); //將filter實體放入掃描實體 scan.setFilter(filter); //建立掃描返回類 ResultScanner resultScanner = table.getScanner(scan); for (Result result:resultScanner){ System.out.println(result); String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name"))); System.out.println(name); } resultScanner.close(); table.close(); System.out.print("end\n"); } }
二:列族過濾器(FamilyFilter)
解析:列族過濾器於行過濾器相似,不過它是通過比較列族而不是比較rowkey來返回結果的。通過使用不同組合的運算子和比較器,使用者可以在列族一級篩選所需的資料。
package compareFilter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.filter.*; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Test; import java.io.IOException; public class familyfilter { private Configuration configuration = null; private Connection connection = null; /* * 根據列族查詢 */ @Test public void familyfilter() throws IOException { System.out.print("begin\n"); //建立Hbase配置檔案 configuration = HBaseConfiguration.create(); //建立連線 connection = ConnectionFactory.createConnection(configuration); //根據表名獲取表實體 Table table = connection.getTable(TableName.valueOf("ns1:t1")); //建立掃描實體 Scan scan = new Scan(); //建立過濾器實體 Filter filter = new FamilyFilter(CompareFilter.CompareOp.LESS,new BinaryComparator(Bytes.toBytes("cf2"))); //將filter實體放入掃描實體 scan.setFilter(filter); //建立掃描返回類 ResultScanner resultScanner = table.getScanner(scan); for (Result result:resultScanner){ System.out.println(result); String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name"))); System.out.println(name); } Get get = new Get(Bytes.toBytes("row1")); get.setFilter(filter); Result result = table.get(get); System.out.println("result:"+result); resultScanner.close(); table.close(); System.out.print("end\n"); } }
三:列名過濾器(QualifierFilter)
解析:使用列名進行篩選的類似邏輯,這種操作可以幫助使用者篩選特定的列。
package compareFilter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.filter.BinaryComparator; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.QualifierFilter; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Test; import java.io.IOException; public class qualifierfilter { private Configuration configuration = null; private Connection connection = null; /* * 根據列名查詢 */ @Test public void familynamefilter() throws IOException { System.out.print("begin\n"); //建立Hbase配置檔案 configuration = HBaseConfiguration.create(); //建立連線 connection = ConnectionFactory.createConnection(configuration); //根據表名獲取表實體 Table table = connection.getTable(TableName.valueOf("ns1:t1")); //建立掃描實體 Scan scan = new Scan(); //建立過濾器實體 Filter filter = new QualifierFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,new BinaryComparator(Bytes.toBytes("age"))); //將filter實體放入掃描實體 scan.setFilter(filter); //建立掃描返回類 ResultScanner resultScanner = table.getScanner(scan); for (Result result:resultScanner){ System.out.println(result); String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name"))); System.out.println(name); } Get get = new Get(Bytes.toBytes("row1")); get.setFilter(filter); Result result = table.get(get); System.out.println("result:"+result); resultScanner.close(); table.close(); System.out.print("end\n"); } }
四:值過濾器(ValueFilter)
解析:這個過濾器可以幫助使用者篩選某個特定值得單元格,與RegexStringComparator配合使用,可以使用功能強大的表示式來進行篩選,需要注意的是,在使用特定比較器的時候,只能與部分運算子配合使用。
package compareFilter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.junit.Test;
import java.io.IOException;
public class valuefilter {
private Configuration configuration = null;
private Connection connection = null;
/*
* 根據值查詢
*/
@Test
public void valueFilter() throws IOException {
//建立Hbase配置檔案
configuration = HBaseConfiguration.create();
//建立連線
connection = ConnectionFactory.createConnection(configuration);
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
Scan scan = new Scan();
Filter filter = new ValueFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator(".4"));
//建立掃描返回類
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
for (KeyValue kv :result.raw())
{
System.out.println(kv);
System.out.println(kv.getValue());
}
}
resultScanner.close();
table.close();
}
}
五:參考列過濾器(DependentColumnFilter)
解析:DependentColumnFilter主要根據所選列的時間戳的時間過濾所要查詢的資料
此過濾器提供了四種建構函式:
(1)DependentColumnFilter()
(2)DependentColumnFilter(byte[] family,byte[] qulifier)
(3)DependentColumnFilter(byte[] family,byte[] qulifier,boolean dropDependentColumn)
(4)DependentColumnFilter(byte[]family,byte[]qulifier,boolean dropDependentColumn,CompareOp valueCompareOp, WritableByteArrayComparable valueComparator)
相關引數:
boolean dropDependentColumn -- 決定參考列被返回還是丟棄,為true時表示參考列被返回,為false時表示被丟棄
CompareOp valueCompareOp -- 比較運算子
WritableByteArrayComparable valueComparator -- 比較器
package compareFilter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import java.io.IOException;
public class valuefilter {
private Configuration configuration = null;
private Connection connection = null;
/*
* 根據參考列查詢
*/
@Test
public void valueFilter() throws IOException {
//建立Hbase配置檔案
configuration = HBaseConfiguration.create();
//建立連線
connection = ConnectionFactory.createConnection(configuration);
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
Scan scan = new Scan();
Filter filter = new DependentColumnFilter(Bytes.toBytes("cf1"),Bytes.toBytes("name"),false);
//建立掃描返回類
scan.setFilter(filter);
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
System.out.println(result);
}
resultScanner.close();
table.close();
}
}