用Mapreduce 方式生成HFile,导入HBase

Posted zhangl

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了用Mapreduce 方式生成HFile,导入HBase相关的知识,希望对你有一定的参考价值。

详细代码信息

package com.tansun.di.hbase.put;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/** 
 * @author 作者 E-mail: 
 * @version 创建时间:2016年6月1日 下午5:16:14 
 * 类说明 
 */
public class CreateHfileByMapReduce {
    

    public static class MyBulkMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue>{
        @Override
        protected void setup( Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue>.Context context )
            throws IOException, InterruptedException {
           
            super.setup( context );
        }
        @Override
        protected void map( LongWritable key, Text value,
                            Context context )
            throws IOException, InterruptedException {
            String[] split = value.toString().split("\t"); // 根据实际情况修改
            if (split.length == 4){
                byte[] rowkey = split[0].getBytes();
                ImmutableBytesWritable imrowkey = new ImmutableBytesWritable( rowkey );
                context.write(imrowkey, new KeyValue(rowkey, Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(split[1])));
                context.write(imrowkey, new KeyValue(rowkey, Bytes.toBytes("info"), Bytes.toBytes("age"), Bytes.toBytes(split[2])));
                context.write(imrowkey, new KeyValue(rowkey, Bytes.toBytes("info"), Bytes.toBytes("phone"), Bytes.toBytes(split[3])));
            }
        }
    }
    
    @SuppressWarnings( "deprecation" )
    public static void main( String[] args ) {
       /* if (args.length != 4){
            System.err.println("Usage: CreateHfileByMapReduce <table_name><data_input_path><hfile_output_path> ");
            System.exit(2);
        }*/
        
     /*   String tableName = args[0];
        String inputPath  = args[1];
        String outputPath = args[2];
        */
        String tableName = "student";
        String inputPath  = "hdfs://ts.node1.com:8022/datas/test1";
        String outputPath = "hdfs://ts.node1.com:8022/datas/output";
        HTable hTable = null;
        Configuration conf = HBaseConfiguration.create();
        try {
           hTable  = new HTable(conf, tableName);
           Job job = Job.getInstance( conf, "CreateHfileByMapReduce");
           job.setJarByClass( CreateHfileByMapReduce.class );
           job.setMapperClass(MyBulkMapper.class);
           job.setInputFormatClass(org.apache.hadoop.mapreduce.lib.input.TextInputFormat.class);
           //
           HFileOutputFormat.configureIncrementalLoad(job, hTable);
           FileInputFormat.addInputPath( job, new Path(inputPath) );
           FileOutputFormat.setOutputPath( job, new Path(outputPath) );
 
           System.exit( job.waitForCompletion(true)? 0: 1 );
           
        }
        catch ( Exception e ) {
            
            e.printStackTrace();
        }
    }
}

  异常信息:

Application application_1463973945893_0006 failed 2 times due to AM Container for appattempt_1463973945893_0006_000002 exited with exitCode: 1
For more detailed output, check application tracking page:http://ts.node1.com:8088/proxy/application_1463973945893_0006/Then, click on links to logs of each attempt.
Diagnostics: Exception from container-launch.
Container id: container_1463973945893_0006_02_000001
Exit code: 1
Exception message: /yarn/nm/usercache/hdfs/appcache/application_1463973945893_0006/container_1463973945893_0006_02_000001/launch_container.sh: line 11: %PWD%;$HADOOP_CONF_DIR;/usr/hdp/current/hadoop-client/*;/usr/hdp/current/hadoop-client/lib/*;/usr/hdp/current/hadoop-hdfs-client/*;/usr/hdp/current/hadoop-hdfs-client/lib/*;/usr/hdp/current/hadoop-yarn-client/*;/usr/hdp/current/hadoop-yarn-client/lib/*;$PWD/mr-framework/hadoop/share/hadoop/mapreduce/*:$PWD/mr-framework/hadoop/share/hadoop/mapreduce/lib/*:$PWD/mr-framework/hadoop/share/hadoop/common/*:$PWD/mr-framework/hadoop/share/hadoop/common/lib/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/lib/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/lib/*:$PWD/mr-framework/hadoop/share/hadoop/tools/lib/*:/usr/hdp/${hdp.version}/hadoop/lib/hadoop-lzo-0.6.0.${hdp.version}.jar:/etc/hadoop/conf/secure;job.jar/job.jar;job.jar/classes/;job.jar/lib/*;%PWD%/*: bad substitution
/yarn/nm/usercache/hdfs/appcache/application_1463973945893_0006/container_1463973945893_0006_02_000001/launch_container.sh: line 125: %JAVA_HOME%/bin/java -Dlog4j.configuration=container-log4j.properties -Dyarn.app.container.log.dir=/var/log/hadoop-yarn/container/application_1463973945893_0006/container_1463973945893_0006_02_000001 -Dyarn.app.container.log.filesize=0 -Dhadoop.root.logger=INFO,CLA -Dhdp.version=${hdp.version} -Xmx204m -Dhdp.version=${hdp.version} org.apache.hadoop.mapreduce.v2.app.MRAppMaster 1>/var/log/hadoop-yarn/container/application_1463973945893_0006/container_1463973945893_0006_02_000001/stdout 2>/var/log/hadoop-yarn/container/application_1463973945893_0006/container_1463973945893_0006_02_000001/stderr : bad substitution
Stack trace: ExitCodeException exitCode=1: /yarn/nm/usercache/hdfs/appcache/application_1463973945893_0006/container_1463973945893_0006_02_000001/launch_container.sh: line 11: %PWD%;$HADOOP_CONF_DIR;/usr/hdp/current/hadoop-client/*;/usr/hdp/current/hadoop-client/lib/*;/usr/hdp/current/hadoop-hdfs-client/*;/usr/hdp/current/hadoop-hdfs-client/lib/*;/usr/hdp/current/hadoop-yarn-client/*;/usr/hdp/current/hadoop-yarn-client/lib/*;$PWD/mr-framework/hadoop/share/hadoop/mapreduce/*:$PWD/mr-framework/hadoop/share/hadoop/mapreduce/lib/*:$PWD/mr-framework/hadoop/share/hadoop/common/*:$PWD/mr-framework/hadoop/share/hadoop/common/lib/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/lib/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/lib/*:$PWD/mr-framework/hadoop/share/hadoop/tools/lib/*:/usr/hdp/${hdp.version}/hadoop/lib/hadoop-lzo-0.6.0.${hdp.version}.jar:/etc/hadoop/conf/secure;job.jar/job.jar;job.jar/classes/;job.jar/lib/*;%PWD%/*: bad substitution
/yarn/nm/usercache/hdfs/appcache/application_1463973945893_0006/container_1463973945893_0006_02_000001/launch_container.sh: line 125: %JAVA_HOME%/bin/java -Dlog4j.configuration=container-log4j.properties -Dyarn.app.container.log.dir=/var/log/hadoop-yarn/container/application_1463973945893_0006/container_1463973945893_0006_02_000001 -Dyarn.app.container.log.filesize=0 -Dhadoop.root.logger=INFO,CLA -Dhdp.version=${hdp.version} -Xmx204m -Dhdp.version=${hdp.version} org.apache.hadoop.mapreduce.v2.app.MRAppMaster 1>/var/log/hadoop-yarn/container/application_1463973945893_0006/container_1463973945893_0006_02_000001/stdout 2>/var/log/hadoop-yarn/container/application_1463973945893_0006/container_1463973945893_0006_02_000001/stderr : bad substitution
at org.apache.hadoop.util.Shell.runCommand(Shell.java:543)
at org.apache.hadoop.util.Shell.run(Shell.java:460)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:720)
at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:211)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Container exited with a non-zero exit code 1
Failing this attempt. Failing the application.

 

以上是关于用Mapreduce 方式生成HFile,导入HBase的主要内容,如果未能解决你的问题,请参考以下文章

HBase 批量加载 MapReduce HFile 异常(netty jar)

写一个读取hfile的mapreduce之获取HFile内容

深入Hbase原理(超详细)

贝壳基于Spark的HiveToHBase实践

使用BulkLoad从HDFS批量导入数据到HBase

Hive 数据导入 HBase