HDFS的Java API操作
Posted 脚丫先生
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了HDFS的Java API操作相关的知识,希望对你有一定的参考价值。
大家好,我是脚丫先生 (o^^o)
前面介绍了HDFS的基本概念,那么接下来就对HDFS分布式文件系统进行实践。
好了,我们开始今天的正文。
一、HDFS的shell(命令行客户端)操作
- 1.基本语法
bin/hadoop fs 具体命令 或者 bin/hdfs dfs 具体命令 - 2.命令大全
$ bin/hadoop fs
[-appendToFile <localsrc> ... <dst>]
[-cat [-ignoreCrc] <src> ...]
[-checksum <src> ...]
[-chgrp [-R] GROUP PATH...]
[-chmod [-R] <MODE[,MODE]... | OCTALMODE> PATH...]
[-chown [-R] [OWNER][:[GROUP]] PATH...]
[-copyFromLocal [-f] [-p] <localsrc> ... <dst>]
[-copyToLocal [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]
[-count [-q] <path> ...]
[-cp [-f] [-p] <src> ... <dst>]
[-createSnapshot <snapshotDir> [<snapshotName>]]
[-deleteSnapshot <snapshotDir> <snapshotName>]
[-df [-h] [<path> ...]]
[-du [-s] [-h] <path> ...]
[-expunge]
[-get [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]
[-getfacl [-R] <path>]
[-getmerge [-nl] <src> <localdst>]
[-help [cmd ...]]
[-ls [-d] [-h] [-R] [<path> ...]]
[-mkdir [-p] <path> ...]
[-moveFromLocal <localsrc> ... <dst>]
[-moveToLocal <src> <localdst>]
[-mv <src> ... <dst>]
[-put [-f] [-p] <localsrc> ... <dst>]
[-renameSnapshot <snapshotDir> <oldName> <newName>]
[-rm [-f] [-r|-R] [-skipTrash] <src> ...]
[-rmdir [--ignore-fail-on-non-empty] <dir> ...]
[-setfacl [-R] [{-b|-k} {-m|-x <acl_spec>} <path>]|[--set <acl_spec> <path>]]
[-setrep [-R] [-w] <rep> <path> ...]
[-stat [format] <path> ...]
[-tail [-f] <file>]
[-test -[defsz] <path>]
[-text [-ignoreCrc] <src> ...]
[-touchz <path> ...]
[-usage [cmd ...]]
- 3.常用命令实操
(0) 启动Hadoop集群
$ sbin/start-dfs.sh
$ sbin/start-yarn.sh
(1) -help:输出这个命令参数
$ hadoop fs -help rm
(2) -ls: 显示目录信息
$ hadoop fs -ls /
(3) -mkdir:在HDFS上创建目录
$ hadoop fs -mkdir -p /sanguo/shuguo
(4) -moveFromLocal:从本地剪切粘贴到HDFS
$ touch kongming.txt
$ hadoop fs -moveFromLocal ./kongming.txt /sanguo/shuguo
(5) -appendToFile:追加一个文件到已经存在的文件末尾
$ touch liubei.txt
$ vi liubei.txt
输入
san gu mao lu
$ hadoop fs -appendToFile liubei.txt /sanguo/shuguo/kongming.txt
(6) -cat:显示文件内容
$ hadoop fs -cat /sanguo/shuguo/kongming.txt
(7) -chgrp 、-chmod、-chown:Linux文件系统中的用法一样,修改文件所属权限
$ hadoop fs -chmod 777 /sanguo/shuguo/kongming.txt
$ hadoop fs -chown atguigu:atguigu /sanguo/shuguo/kongming.txt
(8) -copyFromLocal:从本地文件系统中拷贝文件到HDFS路径去
$ hadoop fs -copyFromLocal README.txt /
(9) -copyToLocal:从HDFS拷贝到本地
$ hadoop fs -copyToLocal /sanguo/shuguo/kongming.txt ./
(10) -cp :从HDFS的一个路径拷贝到HDFS的另一个路径
$ hadoop fs -cp /sanguo/shuguo/kongming.txt /zhuge.txt
(11) -mv:在HDFS目录中移动文件
$ hadoop fs -mv /zhuge.txt /sanguo/shuguo/
(12) -get:等同于copyToLocal,就是从HDFS下载文件到本地
$ hadoop fs -get /sanguo/shuguo/kongming.txt ./
(13) -getmerge:合并下载多个文件,比如HDFS的目录 /user/atguigu/test下有多个文件:log.1, log.2,log.3,…
$ hadoop fs -getmerge /user/atguigu/test/* ./zaiyiqi.txt
(14) -put:等同于copyFromLocal
$ hadoop fs -put ./zaiyiqi.txt /user/atguigu/test/
(15) -tail:显示一个文件的末尾
$ hadoop fs -tail /sanguo/shuguo/kongming.txt
(16) -rm:删除文件或文件夹
$ hadoop fs -rm /user/atguigu/test/jinlian2.txt
(17) -rmdir:删除空目录
$ hadoop fs -rmdir /test
(18) 删除不为空的目录
$ hadoop fs -rm -r -f /test
(19) -du统计文件夹的大小信息
$ hadoop fs -du -s -h /user/atguigu/test
2.7 K /user/atguigu/test
$ hadoop fs -du -h /user/atguigu/test
1.3 K /user/atguigu/test/README.txt
/user/atguigu/test/jinlian.txt
1.4 K /user/atguigu/test/zaiyiqi.txt
(20) -setrep:设置HDFS中文件的副本数量
$ hadoop fs -setrep /sanguo/shuguo/kongming.txt
二、HDFS的java api操作
主
package hdfs;
import org.apache.hadoop.conf.Configuration;
import java.io.IOException;
/**
* @date : 14:06 2020/11/29
*/
public class HdfsOperotor {
public static void main(String[] args) throws IOException {
// 1 获取文件系统
Configuration conf = new Configuration();
//配置在集群上运行
conf.set("fs.defaultFS", "hdfs://192.168.239.128:8020");
//设置用户(设置为集群的用户,才会有权限)
System.setProperty("HADOOP_USER_NAME","bigdata");
//Path path = new Path("hdfs://192.168.239.128:9000/bigdata/shujuelin");
HDFSUtil.mkdir(conf, "hdfs://192.168.239.128:8020/bigdata");
}
}
工具类
package hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
/**
*
* @date : 13:57 2020/11/29
* HDFS 工具类
*/
public class HDFSUtil {
private HDFSUtil() {
}
/**
* 新建文件
*
* @param conf
* @param filePath
* @param data
* @throws IOException
*/
public static void createFile(Configuration conf, String filePath,
byte[] data) throws IOException {
//JavaApi对HDFS的操作只要或得一个fileSystem对象就可以了
FileSystem fs = FileSystem.get(conf);
//FileSystem 使用FSDataOutputStream 及FSDataInputStream来读写流的内容
FSDataOutputStream outputStream = fs.create(new Path(filePath));
outputStream.write(data);
//字节输出流
outputStream.close();
fs.close();
}
/**
* 新建文件
*
* @param conf
* @param filePath
* @param data
* @throws IOException
*/
public static void createFile(Configuration conf, String filePath,
String data) throws IOException {
//data.getBytes()获取字节
createFile(conf, filePath, data.getBytes());
}
/**
* 从本地上传到HDFS
* @param conf
* @param localPath 本地文件路径
* @param remotePath HDF文件S路径
* @throws IllegalArgumentException
* @throws IOException
*/
public static void copyFileFromLocal(Configuration conf,String localPath,String remotePath) throws IllegalArgumentException, IOException{
FileSystem fs = FileSystem.get(conf);
fs.copyFromLocalFile(new Path(localPath), new Path(remotePath));
}
/**
* 归删除文件
* @param conf
* @param filePath
* recursive 是否递归
* @return
* @throws IllegalArgumentException
* @throws IOException
*/
public static boolean deleteFileRecursive(Configuration conf, String filePath) throws IllegalArgumentException, IOException{
return deleteFile(conf,filePath,true);
}
/**
* 非递归删除文件
* @param conf
* @param filePath
* @return
* @throws IllegalArgumentException
* @throws IOException
*/
public static boolean deleteFile(Configuration conf, String filePath) throws IllegalArgumentException, IOException{
return deleteFile(conf,filePath,false);
}
/**
* 删除文件
* @param conf
* @param filePath
* @param recursive
* @return
* @throws IllegalArgumentException
* @throws IOException
*/
private static boolean deleteFile(Configuration conf, String filePath,boolean recursive) throws IllegalArgumentException, IOException{
FileSystem fs = FileSystem.get(conf);
return fs.delete(new Path(filePath),recursive);
}
/**
* 创建文件夹
*
* @param conf
* @param dirPath
* @return
* @throws IllegalArgumentException
* @throws IOException
*/
public static boolean mkdir(Configuration conf, String dirPath)
throws IllegalArgumentException, IOException {
FileSystem fs = FileSystem.get(conf);
return fs.mkdirs(new Path(dirPath));
}
/**
* 读取文件内容
* @param conf
* @param filePath
* @return
* @throws IOException
*/
public static String readFile(Configuration conf, String filePath)
throws IOException {
String res = null;
FileSystem fs = null;
FSDataInputStream inputStream = null;
ByteArrayOutputStream outputStream = null;
try {
fs = FileSystem.get(conf);
//字节输入流
inputStream = fs.open(new Path(filePath));
//字节输出流
outputStream = new ByteArrayOutputStream(inputStream.available());
IOUtils.copyBytes(inputStream, outputStream, conf);
res = outputStream.toString();
} finally {
if (inputStream != null)
IOUtils.closeStream(inputStream);
if (outputStream != null)
IOUtils.closeStream(outputStream);
}
return res;
}
/**
* 判断路径在HDFS上是否存在
*
* @param conf
* 配置
* @param path
* 路径
* @return
* @throws IOException
*/
public static boolean exits(Configuration conf, String path)
throws IOException {
FileSystem fs = FileSystem.get(conf);
return fs.exists(new Path(path));
}
}
以上是关于HDFS的Java API操作的主要内容,如果未能解决你的问题,请参考以下文章