HDFS API编程
Posted Bigrosemary
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了HDFS API编程相关的知识,希望对你有一定的参考价值。
利用HDFS API实现第一个程序
# 采用maven构建项目,使用的ide为intelliJ
# pom.xml文件需要引入hadoop-client依赖,具体如下(引入junit主要为了做单元测试,选择4.0以上版本,使用注解方式调试程序,不用自己编写测试类)
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.rose</groupId>
<artifactId>hadoop-test</artifactId>
<version>1.0-SNAPSHOT</version>
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0-cdh5.9.2</version>
</dependency>
</dependencies>
</project>
# Java代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class hdfsApp {
public static void main(String[] args) throws IOException, URISyntaxException, InterruptedException {
//创建配置文件对象
Configuration configuration = new Configuration();
//获取文件系统
FileSystem fileSystem = FileSystem.get(new URI("hdfs://192.168.1.20:9000"),configuration,"root");
//hdfs创建的路径
Path path = new Path("/hdfs");
//返回值为布尔类型
boolean result = fileSystem.mkdirs(path);
//判断返回值即可观察是否创建成功
System.out.println(result);
}
}
# 对上述代码进行改进(使用junit封装)
public static final String HDFS_URI = "hdfs://192.168.1.20:9000";
FileSystem fileSystem = null;
Configuration configuration = null;
@Before
public void bef() throws URISyntaxException, IOException, InterruptedException {
Configuration configuration = new Configuration();
/*
构造访问HDFS的客户端对象
*/
fileSystem = FileSystem.get(new URI(HDFS_URI),configuration,"root");
System.out.println("-------before-----------");
}
@Test
public void mkdir() throws Exception {
fileSystem.mkdirs(new Path("/rosemary"));
}
@After
public void aft(){
configuration = null;
fileSystem = null;
System.out.println("------release-------");
}
# 查看结果
[root@zabbix ~]# hadoop fs -ls /
19/01/15 04:41:20 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Found 5 items
drwxr-xr-x - root supergroup 0 2019-01-14 00:51 /app
drwxr-xr-x - root supergroup 0 2019-01-15 04:16 /hdfs
-rw-r--r-- 1 root supergroup 191753373 2019-01-14 00:47 /jdk-8u191-linux-x64.tar.gz
drwxr-xr-x - root supergroup 0 2019-01-15 04:16 /opt
drwxr-xr-x - root supergroup 0 2019-01-15 04:40 /rosemary
# 读取hadoop中文件内容
@Test
public void read() throws IOException {
FSDataInputStream fsDataInputStream = fileSystem.open(new Path("/hdfs/anaconda-ks.cfg"));
IOUtils.copyBytes(fsDataInputStream, System.out,1024);
}
# 创建文件并写入
@Test
public void commit() throws IOException {
FSDataOutputStream fsDataOutputStream = fileSystem.create(new Path("/hdfs/demo.txt"));
fsDataOutputStream.writeUTF("HELLO WORLD");
//由于会缓存,因此flush下
fsDataOutputStream.flush();
fsDataOutputStream.close();
}
# rename
@Test
public void rename() throws IOException {
Path src = new Path("/hdfs/anaconda-ks.cfg");
Path des = new Path("/hdfs/ana.cfg");
fileSystem.rename(src,des);
# 上传本地文件,带进度
@Test
public void upload() throws IOException {
InputStream in = new BufferedInputStream(new FileInputStream(new File("/Users/bigface/Desktop/hadoop-2.6.0-cdh5.9.2.tar.gz")));
FSDataOutputStream out = fileSystem.create(new Path("/hdfs/hadoop.tgz"), new Progressable() {
public void progress() {
System.out.print("*");
}
});
IOUtils.copyBytes(in,out,4096);
}
以上是关于HDFS API编程的主要内容,如果未能解决你的问题,请参考以下文章
Hadoop HDFS编程 API入门系列之从本地上传文件到HDFS
Hadoop HDFS编程 API入门系列之路径过滤上传多个文件到HDFS