大数据系列使用api修改hadoop的副本数和块大小

Posted 2020-10-17 霓裳梦竹
tags:
篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了大数据系列使用api修改hadoop的副本数和块大小相关的知识，希望对你有一定的参考价值。
package com.slp.hdfs;

import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;

import java.io.IOException;

/**
 * @author sanglp
 * @create 2017-12-08 11:26
 * @desc hdfs测试
 **/
public class TestHdfs {

    /**
     * 正常输出
     * i am a girl
     * i want to be a super man
     * but i cannot still now
     * resource下的core-site.xml中s201如果没有在本地映射会报unknow host
     * 如果读取的文件不存在会报 File not exists
     */
    @Test
    public void testSave(){
        /**
         * 加载操作源码
         * static {
         * deprecationContext = new AtomicReference(new Configuration.DeprecationContext((Configuration.DeprecationContext)null, defaultDeprecations));
         * ClassLoader cL = Thread.currentThread().getContextClassLoader();
         * if(cL == null) {
         * cL = Configuration.class.getClassLoader();
         * }
         *
         * if(cL.getResource("hadoop-site.xml") != null) {
         * LOG.warn("DEPRECATED: hadoop-site.xml found in the classpath. Usage of hadoop-site.xml is deprecated. Instead use core-site.xml, mapred-site.xml and hdfs-site.xml to override properties of core-default.xml, mapred-default.xml and hdfs-default.xml respectively");
         * }
         *
         * addDefaultResource("core-default.xml");
         * addDefaultResource("core-site.xml");
         * }
         */
        Configuration configuration = new Configuration();//加载类路径下的文件
        try{
            FileSystem fs = FileSystem.get(configuration);
            Path path = new Path("hdfs://192.168.181.201/user/sanglp/hadoop/hello.txt");//本地未配置s201解析 java.lang.IllegalArgumentException: java.net.UnknownHostException: s201  文件不存在java.io.FileNotFoundException: File does not exist: /user/sanglp/hadoop/hello.txt

            FSDataInputStream fis = fs.open(path);
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            IOUtils.copyBytes(fis,baos,1024);
            fis.close();
            System.out.print(new String(baos.toByteArray()));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }


    /**
     * 权限配置：
     * org.apache.hadoop.security.AccessControlException: Permission denied: user=hadoop, access=WRITE, inode="/user/sanglp/hadoop":sanglp:supergroup:drwxr-xr-x
     * hdfs dfs -chmod o+w /user/sanglp/hadoop
     */
    @Test
    public void testWrite(){
        Configuration configuration = new Configuration();
        try {
            FileSystem fs = FileSystem.get(configuration);
            Path path = new Path("hdfs://192.168.181.201/user/sanglp/hello.txt");
            FSDataOutputStream fsDataOutputStream = fs.create(new Path("/user/sanglp/hadoop/a.txt"));
            fsDataOutputStream.write("how are you".getBytes());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 定制副本数和blocksize
     * 设置块过小
     * org.apache.hadoop.ipc.RemoteException(java.io.IOException): Specified block size is less than configured minimum value (dfs.namenode.fs-limits.min-block-size): 5 < 1048576
     * hdfs-site.xml
     *  <property>
     * <name>dfs.namenode.fs-limits.min-block-size</name>
     * <value>5</value>
     * </property>

     */
    @Test
    public void testWrite2(){
        Configuration configuration = new Configuration();
        try {
            FileSystem fs = FileSystem.get(configuration);
            Path path = new Path("hdfs://192.168.181.201/user/sanglp/hello.txt");
            //public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication, long blockSize)
            FSDataOutputStream fsDataOutputStream = fs.create(new Path("/user/sanglp/hadoop/a.txt"),true,1024,(short)2,5);
            fsDataOutputStream.write("how are you".getBytes());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
以上是关于大数据系列使用api修改hadoop的副本数和块大小的主要内容，如果未能解决你的问题，请参考以下文章