Hadoop 文件系统命令行基础

Posted 2022-04-24 sheepcore

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了Hadoop 文件系统命令行基础相关的知识，希望对你有一定的参考价值。

1. file system

2.command line

/**
 * License to Sheep Core
 */

package src.com.sheepcore.wordcount.mapreduce;


import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.*;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


/**
 * Configure the job, and build the runtime environment for mapreduce.
 */
public class MRRunJob 
    public static void main (String ... args) throws IOException 
        Configuration conf = new Configuration();
        //the ip address of namenode

        conf.set("fs.defaultFS", "hdfs://172.28.4.191:9000");
        conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
        //initialize file system
        FileSystem fs = null;
        try 
            fs = FileSystem.get(conf);
         catch (IOException ex1) 
            ex1.printStackTrace();
        
        System.out.println(fs.exists(new Path(
                "/data/input")));
        Job job = null;
        try 
            //create a new job to run your mapreduce program
            job = Job.getInstance(conf, "My first mapreduce program!");
         catch (IOException ex2) 
            ex2.printStackTrace();
        
        //set main class entry
        job.setJarByClass(MRRunJob.class);
        //set mapper class entry
        job.setMapperClass(WordCountMapper.class);
        //set reducer class entry
        job.setReducerClass(WordCountReducer.class);
        //set the type of output key of mapper
        job.setOutputKeyClass(Text.class);
        //set the type of output value of mapper
        job.setOutputValueClass(IntWritable.class);

        //set input path
        Path inPath =
                new Path("/data/input");

        //set output path, notice that it should not be already exists!
        Path outPath =
                new Path("/data/output");

        try 

            //add input path into job
            FileInputFormat.addInputPath(job, inPath);
            //add output path into job
            if(fs.exists(outPath))
                fs.delete(outPath, true);
            FileOutputFormat.setOutputPath(job, outPath);
            boolean finish = job.waitForCompletion(true);
         catch (IOException ex3) 
            ex3.printStackTrace();
         catch (Exception e) 
            e.printStackTrace();

以上是关于Hadoop 文件系统命令行基础的主要内容，如果未能解决你的问题，请参考以下文章