hbase的wordcount

Posted 求知cvip

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了hbase的wordcount相关的知识,希望对你有一定的参考价值。

package com.neworigin.HBaseMR;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;

public class HbaseMRTest {
    static Configuration conf=null;
    static{
//        配置configuration的三种方法
//        ①直接将hbase-site.xml拿来放到src下面
        conf=HBaseConfiguration.create();
//        ②设置服务器和端口        
//        conf.set("hbase.zookeeper.quorum", "s100:2181,s101:2181,s102:2181");
//        ③扥开设置服务器和端口
        conf.set("hbase.zookeeper.quorum", "s100,s101,s102");
        conf.set("hbase.zookeeper.property.clientPort", "2181");
    }
    
////    表信息
//    public static final String tablename="wordtest";//表一
//    public static final String colf="content";//表列族
//    public static final String col="info";////    
//    public static final String tablename2="stat";//表二
//    
//public static void initTB(){
//    
//}    
public static class HBmapper extends TableMapper<Text,IntWritable>/*输出类型*/{
    private static IntWritable one=new IntWritable(1);
    private static Text word =new Text();
//    输入类型,key:row key  value:一行数据的结果集 result
    protected void map(ImmutableBytesWritable key, Result value,
            Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context)
            throws IOException, InterruptedException {
        for(Cell cell:value.rawCells())
        {
            word.set(CellUtil.cloneValue(cell));//读取值
            context.write(word, one);//输出:单词----1
        }
        
    }
}
public static class HBreducer extends TableReducer<Text,IntWritable,ImmutableBytesWritable>{

    @Override
    protected void reduce(Text key, Iterable<IntWritable> values,
            Reducer<Text, IntWritable, ImmutableBytesWritable, Mutation>.Context context)
            throws IOException, InterruptedException {
        
        int sum=0;
        //叠加单词个数
        for(IntWritable value:values)
        {
            sum+=value.get();
        }
        Put put = new Put(Bytes.toBytes(key.toString()));//设置row key为单词
        put.add(Bytes.toBytes("content"), Bytes.toBytes("info"), Bytes.toBytes(String.valueOf(sum)));
        //写到hbase中的需要指定rowkey和put
        context.write(new ImmutableBytesWritable(Bytes.toBytes(key.toString())), put);
    }
    
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Job job=new Job(conf,"HBaseMR");
    Scan scan =new Scan();
    TableMapReduceUtil.initTableMapperJob("wordtest",scan, HBmapper.class, Text.class, IntWritable.class, job);
    TableMapReduceUtil.initTableReducerJob("stat", HBreducer.class, job);
    
    job.waitForCompletion(true);
    System.out.println("finished");
}
}

 

以上是关于hbase的wordcount的主要内容,如果未能解决你的问题,请参考以下文章

分享知识-快乐自己:运行(wordcount)案例

spark 例子wordcount topk

kafka+storm+hbase

2.3 基于IDEA开发第一个MapReduce大数据程序WordCount

Hbase 用mr-hdfs hdfs-mr

MapReduce编写wordcount程序代码实现