過濾器查詢
引言:過濾器的類型很多,但是可以分為兩大類——比較過濾器,專用過濾器
過濾器的作用是在服務端判斷數據是否滿足條件,然后只將滿足條件的數據返回給客戶端;
hbase過濾器的比較運算符:
LESS < LESS_OR_EQUAL <= EQUAL = NOT_EQUAL <> GREATER_OR_EQUAL >= GREATER > NO_OP 排除所有 |
Hbase過濾器的比較器(指定比較機制):
BinaryComparator 按字節索引順序比較指定字節數組,采用Bytes.compareTo(byte[]) BinaryPrefixComparator 跟前面相同,只是比較左端的數據是否相同 NullComparator 判斷給定的是否為空 BitComparator 按位比較 RegexStringComparator 提供一個正則的比較器,僅支持 EQUAL 和非EQUAL SubstringComparator 判斷提供的子串是否出現在value中。 |
Hbase的過濾器分類
- 比較過濾器
1.1 行鍵過濾器RowFilter
Filter filter1 = new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("row-22"))); scan.setFilter(filter1); |
1.2 列族過濾器FamilyFilter
Filter filter1 = new FamilyFilter(CompareFilter.CompareOp.LESS, new BinaryComparator(Bytes.toBytes("colfam3"))); scan.setFilter(filter1); |
1.3 列過濾器QualifierFilter
filter = new QualifierFilter(CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("col-2"))); scan.setFilter(filter1); |
1.4 值過濾器 ValueFilter
Filter filter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator(".4") ); scan.setFilter(filter1); |
- 專用過濾器
2.1 單列值過濾器 SingleColumnValueFilter ----會返回滿足條件的整行
SingleColumnValueFilter filter = new SingleColumnValueFilter( Bytes.toBytes("colfam1"), Bytes.toBytes("col-5"), CompareFilter.CompareOp.NOT_EQUAL, new SubstringComparator("val-5")); filter.setFilterIfMissing(true); //如果不設置為true,則那些不包含指定column的行也會返回 scan.setFilter(filter1); |
2.2 SingleColumnValueExcludeFilter
與上相反
2.3 前綴過濾器 PrefixFilter----針對行鍵
Filter filter = new PrefixFilter(Bytes.toBytes("row1")); scan.setFilter(filter1); |
2.4 列前綴過濾器 ColumnPrefixFilter
Filter filter = new ColumnPrefixFilter(Bytes.toBytes("qual2")); scan.setFilter(filter1); |
2.4分頁過濾器 PageFilter
public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "spark01:2181,spark02:2181,spark03:2181");
String tableName = "testfilter"; String cfName = "f1"; final byte[] POSTFIX = new byte[] { 0x00 }; HTable table = new HTable(conf, tableName); Filter filter = new PageFilter(3); byte[] lastRow = null; int totalRows = 0; while (true) { Scan scan = new Scan(); scan.setFilter(filter); if(lastRow != null){ //注意這里添加了POSTFIX操作,用來重置掃描邊界 byte[] startRow = Bytes.add(lastRow,POSTFIX); scan.setStartRow(startRow); } ResultScanner scanner = table.getScanner(scan); int localRows = 0; Result result; while((result = scanner.next()) != null){ System.out.println(localRows++ + ":" + result); totalRows ++; lastRow = result.getRow(); } scanner.close(); if(localRows == 0) break; } System.out.println("total rows:" + totalRows); } |
/** * 多種過濾條件的使用方法 * @throws Exception */ @Test public void testScan() throws Exception{ HTable table = new HTable(conf, "person_info".getBytes()); Scan scan = new Scan(Bytes.toBytes("person_rk_bj_zhang_000001"), Bytes.toBytes("person_rk_bj_zhang_000002"));
//前綴過濾器----針對行鍵 Filter filter = new PrefixFilter(Bytes.toBytes("rk"));
//行過濾器 ---針對行鍵 ByteArrayComparable rowComparator = new BinaryComparator(Bytes.toBytes("person_rk_bj_zhang_000001")); RowFilter rf = new RowFilter(CompareOp.LESS_OR_EQUAL, rowComparator);
/** * 假設rowkey格式為:創建日期_發布日期_ID_TITLE * 目標:查找 發布日期 為 2014-12-21 的數據 * sc.textFile("path").flatMap(line=>line.split("\t")).map(x=>(x,1)).reduceByKey(_+_).map((_(2),_(1))).sortByKey().map((_(2),_(1))).saveAsTextFile("") * * */ rf = new RowFilter(CompareOp.EQUAL , new SubstringComparator("_2014-12-21_"));
//單值過濾器1完整匹配字節數組 new SingleColumnValueFilter("base_info".getBytes(), "name".getBytes(), CompareOp.EQUAL, "zhangsan".getBytes()); //單值過濾器2 匹配正則表達式 ByteArrayComparable comparator = new RegexStringComparator("zhang."); new SingleColumnValueFilter("info".getBytes(), "NAME".getBytes(), CompareOp.EQUAL, comparator);
//單值過濾器3匹配是否包含子串,大小寫不敏感 comparator = new SubstringComparator("wu"); new SingleColumnValueFilter("info".getBytes(), "NAME".getBytes(), CompareOp.EQUAL, comparator);
//鍵值對元數據過濾-----family過濾----字節數組完整匹配 FamilyFilter ff = new FamilyFilter( CompareOp.EQUAL , new BinaryComparator(Bytes.toBytes("base_info")) //表中不存在inf列族,過濾結果為空 ); //鍵值對元數據過濾-----family過濾----字節數組前綴匹配 ff = new FamilyFilter( CompareOp.EQUAL , new BinaryPrefixComparator(Bytes.toBytes("inf")) //表中存在以inf打頭的列族info,過濾結果為該列族所有行 );
//鍵值對元數據過濾-----qualifier過濾----字節數組完整匹配
filter = new QualifierFilter( CompareOp.EQUAL , new BinaryComparator(Bytes.toBytes("na")) //表中不存在na列,過濾結果為空 ); filter = new QualifierFilter( CompareOp.EQUAL , new BinaryPrefixComparator(Bytes.toBytes("na")) //表中存在以na打頭的列name,過濾結果為所有行的該列數據 );
//基於列名(即Qualifier)前綴過濾數據的ColumnPrefixFilter filter = new ColumnPrefixFilter("na".getBytes());
//基於列名(即Qualifier)多個前綴過濾數據的MultipleColumnPrefixFilter byte[][] prefixes = new byte[][] {Bytes.toBytes("na"), Bytes.toBytes("me")}; filter = new MultipleColumnPrefixFilter(prefixes);
//為查詢設置過濾條件 scan.setFilter(filter);
scan.addFamily(Bytes.toBytes("base_info")); //一行 // Result result = table.get(get); //多行的數據 ResultScanner scanner = table.getScanner(scan); for(Result r : scanner){ /** for(KeyValue kv : r.list()){ String family = new String(kv.getFamily()); System.out.println(family); String qualifier = new String(kv.getQualifier()); System.out.println(qualifier); System.out.println(new String(kv.getValue())); } */ //直接從result中取到某個特定的value byte[] value = r.getValue(Bytes.toBytes("base_info"), Bytes.toBytes("name")); System.out.println(new String(value)); } table.close(); } |