大数据笔记(十四)——HBase的过滤器与Mapreduce

Posted lingluo2017

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了大数据笔记(十四)——HBase的过滤器与Mapreduce相关的知识,希望对你有一定的参考价值。

一. HBase过滤器

package demo;

import javax.swing.RowFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.FilterList.Operator;
import org.apache.hadoop.hbase.filter.MultipleColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import net.spy.memcached.ops.OperationErrorType;

public class TestHBaseFilter {

    /**
     * 列值过滤器:SingleColumnValueFilter
     */
    @Test
    public void testSingleColumnValueFilter() throws Exception{
        //查询工资等于3000的员工
        //select * from emp where sal=3000
        //配置ZK的地址信息
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "192.168.153.11");
        
        //得到HTable客户端
        HTable client  = new HTable(conf, "emp");
        //定义一个列值过滤器
        SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("empinfo"),//列族
                Bytes.toBytes("sal"), //工资
                CompareOp.EQUAL,       // =
                Bytes.toBytes("3000"));//?
        
        //定义一个扫描器
        Scan scan = new Scan();
        scan.setFilter(filter);
        
        //通过过滤器查询数据
        ResultScanner rs = client.getScanner(scan);
        for (Result result : rs) {
            String name = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename")));
            System.out.println(name);
        }
        
        client.close();
    }
    
    /**
     * 列名前缀过滤器:ColumnPrefixFilter
     */
    @Test
    public void testColumnPrefixFilter() throws Exception{
        //列名前缀过滤器
        //select ename from emp
        //配置ZK的地址信息
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "192.168.153.11");
        
        //得到HTable客户端
        HTable client  = new HTable(conf, "emp");
        
        //定义一个列名前缀过滤器
        ColumnPrefixFilter filter = new ColumnPrefixFilter(Bytes.toBytes("ename"));
        
        //定义一个扫描器
        Scan scan = new Scan();
        scan.setFilter(filter);
        
        //通过过滤器查询数据
        ResultScanner rs = client.getScanner(scan);
        for (Result result : rs) {
            String name = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename")));
            System.out.println(name);
        }
        
        client.close();
    }
    
    /**
     * 多个列名前缀过滤器:MultipleColumnPrefixFilter
     */
    @Test
    public void testMultipleColumnPrefixFilter() throws Exception{

        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "192.168.153.11");
        
        HTable client  = new HTable(conf, "emp");
        //员工姓名 薪资
        byte[][] names = {Bytes.toBytes("ename"),Bytes.toBytes("sal")};
        
        MultipleColumnPrefixFilter filter = new MultipleColumnPrefixFilter(names);
    
        Scan scan = new Scan();
        scan.setFilter(filter);
        
        ResultScanner rs = client.getScanner(scan);
        for (Result result : rs) {
            String name = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename")));
            String sal = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal")));
            System.out.println(name+"\t"+sal);
        }
        
        client.close();
    }
    
    /**
     * 行键过滤器:RowFilter
     */
    @Test
    public void testRowFilter() throws Exception{
        
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "192.168.153.11");
        
        HTable client  = new HTable(conf, "emp");
        
        //定义一个行键过滤器
        org.apache.hadoop.hbase.filter.RowFilter filter = new org.apache.hadoop.hbase.filter.RowFilter(
                CompareOp.EQUAL, //=
                new RegexStringComparator("7839"));
        
        //定义一个扫描器
        Scan scan = new Scan();
        scan.setFilter(filter);
        
        //通过过滤器查询数据
        ResultScanner rs = client.getScanner(scan);
        for (Result result : rs) {
            String name = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename")));
            String sal = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal")));
            System.out.println(name+"\t"+sal);
        }
        
        client.close();
    }
    
    /**
     * 组合过滤器
     */
    @Test
    public void testFilter() throws Exception{

        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "192.168.153.11");
        
        HTable client  = new HTable(conf, "emp");
        
        //工资=3000
        SingleColumnValueFilter filter1 = new SingleColumnValueFilter(Bytes.toBytes("empinfo"), 
                Bytes.toBytes("sal"), CompareOp.EQUAL, Bytes.toBytes("3000"));
        //名字
        ColumnPrefixFilter filter2 = new ColumnPrefixFilter(Bytes.toBytes("ename"));
        
        FilterList filterList = new FilterList(Operator.MUST_PASS_ALL);
        filterList.addFilter(filter1);
        filterList.addFilter(filter2);
        
        Scan scan = new Scan();
        scan.setFilter(filterList);
        
        ResultScanner rs = client.getScanner(scan);
        for (Result result : rs) {
            String name = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename")));
            String sal = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal")));
            System.out.println(name+"\t"+sal);
        }
        
        client.close();
    }
}

二. HDFS上的mapreduce

建立表

   create ‘word‘,‘content‘

   put ‘word‘,‘1‘,‘content:info‘,‘I love Beijing‘

   put ‘word‘,‘2‘,‘content:info‘,‘I love China‘

   put ‘word‘,‘3‘,‘content:info‘,‘Beijing is the capital of China‘

 

以上是关于大数据笔记(十四)——HBase的过滤器与Mapreduce的主要内容,如果未能解决你的问题,请参考以下文章

大数据入门第十四天——Hbase详解hbase基本原理与MR操作Hbase

大数据之Hbase:HBase与Hive的对比

HBASE入门笔记

2021年大数据HBase(十四):HBase的原理及其相关的工作机制

2021年大数据HBase(十四):HBase的原理及其相关的工作机制

学习笔记大数据原理与技术 —— HBase大数据数据库