HBase 过滤器 Java API
在IDEA中导入Maven依赖
<dependencies>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.4.6</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.4.6</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.8.2</version>
</dependency>
</dependencies>
批量插入学生表stu数据
@Test
public void PutStu() throws IOException {
TableName stu = TableName.valueOf("stu");
Admin admin = conn.getAdmin();
if (!admin.tableExists(stu)) {
admin.createTable(new HTableDescriptor(stu).addFamily(new HColumnDescriptor("info")));
}
Table stuTable = conn.getTable(stu);
ArrayList<Put> puts = new ArrayList<>();
BufferedReader br = new BufferedReader(new FileReader("data/students.txt"));
int cnt = 0;
String line;
while ((line = br.readLine()) != null) {
String[] split = line.split(",");
String id = split[0];
String name = split[1];
String age = split[2];
String gender = split[3];
String clazz = split[4];
Put put = new Put(id.getBytes());
put.addColumn("info".getBytes(),"name".getBytes(),name.getBytes());
put.addColumn("info".getBytes(),"age".getBytes(),age.getBytes());
put.addColumn("info".getBytes(),"gender".getBytes(),gender.getBytes());
put.addColumn("info".getBytes(),"clazz".getBytes(),clazz.getBytes());
puts.add(put);
cnt += 1;
if (cnt == 100) {
stuTable.put(puts);
puts.clear();
cnt = 0;
}
}
if (!puts.isEmpty()) {
stuTable.put(puts);
}
br.close();
}
代码封装
定义全局变量
Connection conn;
Table stu;
定义全局方法 执行扫描操作
public ResultScanner getScannerWithFilter(Filter filter) throws IOException {
Scan scan = new Scan();
scan.setFilter(filter);
return stu.getScanner(scan);
}
遍历扫描的数据 (所有行)
public void printScanner(Filter filter) throws IOException {
for (Result rs : getScannerWithFilter(filter)) {
String rk = Bytes.toString(rs.getRow());
String name = Bytes.toString(rs.getValue("info".getBytes(), "name".getBytes()));
String age = Bytes.toString(rs.getValue("info".getBytes(), "age".getBytes()));
String gender = Bytes.toString(rs.getValue("info".getBytes(), "gender".getBytes()));
String clazz = Bytes.toString(rs.getValue("info".getBytes(), "clazz".getBytes()));
System.out.println(rk + "," + name + "," + age + "," + gender + "," + clazz);
}
}
使用CellUtil进行打印
public void printScannerWithCellUtil(Filter filter) throws IOException {
for (Result rs : getScannerWithFilter(filter)) {
for (Cell cell : rs.listCells()) {
String rowkey = Bytes.toString(CellUtil.cloneRow(cell));
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println(rowkey + "," + value);
}
}
}
建立连接
@Before
public void init() throws IOException {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "master:2181,node1:2181,node2:2181");
conn = ConnectionFactory.createConnection(conf);
stu = conn.getTable(TableName.valueOf("stu"));
}
关闭连接
@After
public void close() throws IOException {
conn.close();
}
过滤器
过滤出Rowkey(id)中,包含8的学生信息
@Test
public void RowFileterWithSubString() throws IOException {
SubstringComparator comparator = new SubstringComparator("8");
RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, comparator);
printScanner(rowFilter);
}
运行结果:
过滤出stu表列簇名为info下的所有列的数据
@Test
public void FamilyFilterWithCom() throws IOException {
Scan scan = new Scan();
FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator("info".getBytes()));
scan.setFilter(familyFilter);
Table test3 = conn.getTable(TableName.valueOf("stu"));
ResultScanner sc = test3.getScanner(scan);
for (Result rs : sc) {
for (Cell cell : rs.listCells()) {
String rowkey = Bytes.toString(CellUtil.cloneRow(cell));
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println(rowkey + "," + value);
}
}
}
运行结果:
stu表中列名包含a的所有列的数据,使用正则表达式
@Test
public void QualifierFilterWithRegex() throws IOException {
QualifierFilter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(".*a.*"));
printScannerWithCellUtil(qualifierFilter);
}
运行结果:
过滤出 数据中包含 文 的所有数据
@Test
public void ValueFilterWithSubString() throws IOException {
ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("文"));
printScannerWithCellUtil(valueFilter);
}
运行结果:
过滤出 数据中包含 文 的所有数据
@Test
public void ValueFilterWithSubString() throws IOException {
ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("文"));
printScannerWithCellUtil(valueFilter);
}
运行结果:
过滤出班级是 文科班 的学生的所有信息
@Test
public void SingleColumnValueFilterWithBinaryPrefix() throws IOException {
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes()
, "clazz".getBytes()
, CompareFilter.CompareOp.EQUAL
, new BinaryPrefixComparator("文科".getBytes())
);
printScanner(singleColumnValueFilter);
}
运行结果:
过滤出班级是 文科班 的学生的所有信息,最终结果没有 clazz 列
@Test
public void SingleColumnValueExcludeFilterWithBinaryPrefix() throws IOException {
SingleColumnValueExcludeFilter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter("info".getBytes()
, "clazz".getBytes()
, CompareFilter.CompareOp.EQUAL
, new BinaryPrefixComparator("文科".getBytes())
);
printScanner(singleColumnValueExcludeFilter);
}
运行结果:
过滤出年龄是 奇数 的学生的所有信息
@Test
public void SingleColumnValueFilterWithRegex() throws IOException {
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes()
, "age".getBytes()
, CompareFilter.CompareOp.EQUAL
, new RegexStringComparator("^[0-9]{0,1}[13579]$")
);
printScanner(singleColumnValueFilter);
}
运行结果:
查询以150010008开头的所有前缀的rowkey
@Test
public void PrefixFilter() throws IOException {
PrefixFilter prefixFilter = new PrefixFilter("150010008".getBytes());
printScanner(prefixFilter);
System.out.println("==================");
RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator("150010008".getBytes()));
printScanner(rowFilter);
}
运行结果:
多过滤器综合查询
过滤出 理科班 中的 女生 年龄为奇数 的所有信息
@Test
public void ComnineFilter() throws IOException {
SingleColumnValueFilter filter1 = new SingleColumnValueFilter("info".getBytes()
, "clazz".getBytes()
, CompareFilter.CompareOp.EQUAL
, new BinaryPrefixComparator("理科".getBytes())
);
SingleColumnValueFilter filter2 = new SingleColumnValueFilter("info".getBytes()
, "gender".getBytes()
, CompareFilter.CompareOp.EQUAL
, "女".getBytes()
);
SingleColumnValueFilter filter3 = new SingleColumnValueFilter("info".getBytes()
, "age".getBytes()
, CompareFilter.CompareOp.EQUAL
, new RegexStringComparator("^[0-9]{0,1}[13579]$")
);
FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE);
filterList.addFilter(filter1);
filterList.addFilter(filter2);
filterList.addFilter(filter3);
printScanner(filterList);
}
运行结果:
完整代码
package com.liangzai.hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
public class Demo04Filter {
Connection conn;
Table stu;
public ResultScanner getScannerWithFilter(Filter filter) throws IOException {
Scan scan = new Scan();
scan.setFilter(filter);
return stu.getScanner(scan);
}
public void printScanner(Filter filter) throws IOException {
for (Result rs : getScannerWithFilter(filter)) {
String rk = Bytes.toString(rs.getRow());
String name = Bytes.toString(rs.getValue("info".getBytes(), "name".getBytes()));
String age = Bytes.toString(rs.getValue("info".getBytes(), "age".getBytes()));
String gender = Bytes.toString(rs.getValue("info".getBytes(), "gender".getBytes()));
String clazz = Bytes.toString(rs.getValue("info".getBytes(), "clazz".getBytes()));
System.out.println(rk + "," + name + "," + age + "," + gender + "," + clazz);
}
}
public void printScannerWithCellUtil(Filter filter) throws IOException {
for (Result rs : getScannerWithFilter(filter)) {
for (Cell cell : rs.listCells()) {
String rowkey = Bytes.toString(CellUtil.cloneRow(cell));
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println(rowkey + "," + value);
}
}
}
@Before
public void init() throws IOException {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "master:2181,node1:2181,node2:2181");
conn = ConnectionFactory.createConnection(conf);
stu = conn.getTable(TableName.valueOf("stu"));
}
@Test
public void RowFileterWithSubString() throws IOException {
SubstringComparator comparator = new SubstringComparator("8");
RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, comparator);
printScanner(rowFilter);
}
@Test
public void FamilyFilterWithCom() throws IOException {
Scan scan = new Scan();
FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator("info".getBytes()));
scan.setFilter(familyFilter);
Table test3 = conn.getTable(TableName.valueOf("stu"));
ResultScanner sc = test3.getScanner(scan);
for (Result rs : sc) {
for (Cell cell : rs.listCells()) {
String rowkey = Bytes.toString(CellUtil.cloneRow(cell));
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println(rowkey + "," + value);
}
}
}
@Test
public void QualifierFilterWithRegex() throws IOException {
QualifierFilter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(".*a.*"));
printScannerWithCellUtil(qualifierFilter);
}
@Test
public void ValueFilterWithSubString() throws IOException {
ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("文"));
printScannerWithCellUtil(valueFilter);
}
@Test
public void SingleColumnValueFilterWithBinaryPrefix() throws IOException {
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes()
, "clazz".getBytes()
, CompareFilter.CompareOp.EQUAL
, new BinaryPrefixComparator("文科".getBytes())
);
printScanner(singleColumnValueFilter);
}
@Test
public void SingleColumnValueExcludeFilterWithBinaryPrefix() throws IOException {
SingleColumnValueExcludeFilter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter("info".getBytes()
, "clazz".getBytes()
, CompareFilter.CompareOp.EQUAL
, new BinaryPrefixComparator("文科".getBytes())
);
printScanner(singleColumnValueExcludeFilter);
}
@Test
public void SingleColumnValueFilterWithRegex() throws IOException {
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes()
, "age".getBytes()
, CompareFilter.CompareOp.EQUAL
, new RegexStringComparator("^[0-9]{0,1}[13579]$")
);
printScanner(singleColumnValueFilter);
}
@Test
public void PrefixFilter() throws IOException {
PrefixFilter prefixFilter = new PrefixFilter("150010008".getBytes());
printScanner(prefixFilter);
System.out.println("==================");
RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator("150010008".getBytes()));
printScanner(rowFilter);
}
@Test
public void ComnineFilter() throws IOException {
SingleColumnValueFilter filter1 = new SingleColumnValueFilter("info".getBytes()
, "clazz".getBytes()
, CompareFilter.CompareOp.EQUAL
, new BinaryPrefixComparator("理科".getBytes())
);
SingleColumnValueFilter filter2 = new SingleColumnValueFilter("info".getBytes()
, "gender".getBytes()
, CompareFilter.CompareOp.EQUAL
, "女".getBytes()
);
SingleColumnValueFilter filter3 = new SingleColumnValueFilter("info".getBytes()
, "age".getBytes()
, CompareFilter.CompareOp.EQUAL
, new RegexStringComparator("^[0-9]{0,1}[13579]$")
);
FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE);
filterList.addFilter(filter1);
filterList.addFilter(filter2);
filterList.addFilter(filter3);
printScanner(filterList);
}
@After
public void close() throws IOException {
conn.close();
}
}
到底啦!关注靓仔学习更多的大数据知识!😊
|