Hadoop小项目练习 更新中

Posted AI数据

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Hadoop小项目练习 更新中相关的知识,希望对你有一定的参考价值。

 

一、Hadoop Java API

 静态类实现Mapper类

@Public
@Stable
public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> {
    public Mapper() {
    }

    protected void setup(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException {
    }

    protected void map(KEYIN key, VALUEIN value, Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException {
        context.write(key, value);
    }

    protected void cleanup(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException {
    }

    public void run(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException {
        this.setup(context);

        try {
            while(context.nextKeyValue()) {
                this.map(context.getCurrentKey(), context.getCurrentValue(), context);
            }
        } finally {
            this.cleanup(context);
        }

    }

    public abstract class Context implements MapContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> {
        public Context() {
        }
    }
}

Context的接口

@Public
@Evolving
public interface MapContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> extends TaskInputOutputContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> {
    InputSplit getInputSplit();
}

继承了

@Public
@Evolving
public interface TaskInputOutputContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> extends TaskAttemptContext {
    boolean nextKeyValue() throws IOException, InterruptedException;

    KEYIN getCurrentKey() throws IOException, InterruptedException;

    VALUEIN getCurrentValue() throws IOException, InterruptedException;

    void write(KEYOUT var1, VALUEOUT var2) throws IOException, InterruptedException;

    OutputCommitter getOutputCommitter();
}

继承了

@Public
@Evolving
public interface TaskAttemptContext extends JobContext, Progressable {
    TaskAttemptID getTaskAttemptID();

    void setStatus(String var1);

    String getStatus();

    float getProgress();

    Counter getCounter(Enum<?> var1);

    Counter getCounter(String var1, String var2);
}

继承了

Public
@Evolving
public interface JobContext extends MRJobConfig {
    Configuration getConfiguration();

    Credentials getCredentials();

    JobID getJobID();

    int getNumReduceTasks();

    Path getWorkingDirectory() throws IOException;

    Class<?> getOutputKeyClass();

    Class<?> getOutputValueClass();

    Class<?> getMapOutputKeyClass();

    Class<?> getMapOutputValueClass();

    String getJobName();

    Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException;

    Class<? extends Mapper<?, ?, ?, ?>> getMapperClass() throws ClassNotFoundException;

    Class<? extends Reducer<?, ?, ?, ?>> getCombinerClass() throws ClassNotFoundException;

    Class<? extends Reducer<?, ?, ?, ?>> getReducerClass() throws ClassNotFoundException;

    Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException;

    Class<? extends Partitioner<?, ?>> getPartitionerClass() throws ClassNotFoundException;

    RawComparator<?> getSortComparator();

    String getJar();

    RawComparator<?> getCombinerKeyGroupingComparator();

    RawComparator<?> getGroupingComparator();

    boolean getJobSetupCleanupNeeded();

    boolean getTaskCleanupNeeded();

    boolean getProfileEnabled();

    String getProfileParams();

    IntegerRanges getProfileTaskRange(boolean var1);

    String getUser();

    /** @deprecated */
    @Deprecated
    boolean getSymlink();

    Path[] getArchiveClassPaths();

    URI[] getCacheArchives() throws IOException;

    URI[] getCacheFiles() throws IOException;

    /** @deprecated */
    @Deprecated
    Path[] getLocalCacheArchives() throws IOException;

    /** @deprecated */
    @Deprecated
    Path[] getLocalCacheFiles() throws IOException;

    Path[] getFileClassPaths();

    String[] getArchiveTimestamps();

    String[] getFileTimestamps();

    int getMaxMapAttempts();

    int getMaxReduceAttempts();
}
JobContext
@Private
@Evolving
public interface MRJobConfig {
    String MAP_SORT_CLASS = "map.sort.class";
    String INPUT_FORMAT_CLASS_ATTR = "mapreduce.job.inputformat.class";
    String MAP_CLASS_ATTR = "mapreduce.job.map.class";
    String MAP_OUTPUT_COLLECTOR_CLASS_ATTR = "mapreduce.job.map.output.collector.class";
    String COMBINE_CLASS_ATTR = "mapreduce.job.combine.class";
    String REDUCE_CLASS_ATTR = "mapreduce.job.reduce.class";
    String OUTPUT_FORMAT_CLASS_ATTR = "mapreduce.job.outputformat.class";
    String PARTITIONER_CLASS_ATTR = "mapreduce.job.partitioner.class";
    String SETUP_CLEANUP_NEEDED = "mapreduce.job.committer.setup.cleanup.needed";
    String TASK_CLEANUP_NEEDED = "mapreduce.job.committer.task.cleanup.needed";
    String TASK_LOCAL_WRITE_LIMIT_BYTES = "mapreduce.task.local-fs.write-limit.bytes";
    long DEFAULT_TASK_LOCAL_WRITE_LIMIT_BYTES = -1L;
    String JAR = "mapreduce.job.jar";
    String ID = "mapreduce.job.id";
    String JOB_NAME = "mapreduce.job.name";
    String JAR_UNPACK_PATTERN = "mapreduce.job.jar.unpack.pattern";
    String USER_NAME = "mapreduce.job.user.name";
    String PRIORITY = "mapreduce.job.priority";
    String QUEUE_NAME = "mapreduce.job.queuename";
    String JOB_NODE_LABEL_EXP = "mapreduce.job.node-label-expression";
    String AM_NODE_LABEL_EXP = "mapreduce.job.am.node-label-expression";
    String MAP_NODE_LABEL_EXP = "mapreduce.map.node-label-expression";
    String REDUCE_NODE_LABEL_EXP = "mapreduce.reduce.node-label-expression";
    String AM_STRICT_LOCALITY = "mapreduce.job.am.strict-locality";
    String RESERVATION_ID = "mapreduce.job.reservation.id";
    String JOB_TAGS = "mapreduce.job.tags";
    String JVM_NUMTASKS_TORUN = "mapreduce.job.jvm.numtasks";
    String SPLIT_FILE = "mapreduce.job.splitfile";
    String SPLIT_METAINFO_MAXSIZE = "mapreduce.job.split.metainfo.maxsize";
    long DEFAULT_SPLIT_METAINFO_MAXSIZE = 10000000L;
    String NUM_MAPS = "mapreduce.job.maps";
    String MAX_TASK_FAILURES_PER_TRACKER = "mapreduce.job.maxtaskfailures.per.tracker";
    String COMPLETED_MAPS_FOR_REDUCE_SLOWSTART = "mapreduce.job.reduce.slowstart.completedmaps";
    String NUM_REDUCES = "mapreduce.job.reduces";
    String SKIP_RECORDS = "mapreduce.job.skiprecords";
    String SKIP_OUTDIR = "mapreduce.job.skip.outdir";
    /** @deprecated */
    @Deprecated
    String SPECULATIVE_SLOWNODE_THRESHOLD = "mapreduce.job.speculative.slownodethreshold";
    String SPECULATIVE_SLOWTASK_THRESHOLD = "mapreduce.job.speculative.slowtaskthreshold";
    /** @deprecated */
    @Deprecated
    String SPECULATIVECAP = "mapreduce.job.speculative.speculativecap";
    String SPECULATIVECAP_RUNNING_TASKS = "mapreduce.job.speculative.speculative-cap-running-tasks";
    double DEFAULT_SPECULATIVECAP_RUNNING_TASKS = 0.1D;
    String SPECULATIVECAP_TOTAL_TASKS = "mapreduce.job.speculative.speculative-cap-total-tasks";
    double DEFAULT_SPECULATIVECAP_TOTAL_TASKS = 0.01D;
    String SPECULATIVE_MINIMUM_ALLOWED_TASKS = "mapreduce.job.speculative.minimum-allowed-tasks";
    int DEFAULT_SPECULATIVE_MINIMUM_ALLOWED_TASKS = 10;
    String SPECULATIVE_RETRY_AFTER_NO_SPECULATE = "mapreduce.job.speculative.retry-after-no-speculate";
    long DEFAULT_SPECULATIVE_RETRY_AFTER_NO_SPECULATE = 1000L;
    String SPECULATIVE_RETRY_AFTER_SPECULATE = "mapreduce.job.speculative.retry-after-speculate";
    long DEFAULT_SPECULATIVE_RETRY_AFTER_SPECULATE = 15000L;
    String JOB_LOCAL_DIR = "mapreduce.job.local.dir";
    String OUTPUT_KEY_CLASS = "mapreduce.job.output.key.class";
    String OUTPUT_VALUE_CLASS = "mapreduce.job.output.value.class";
    String KEY_COMPARATOR = "mapreduce.job.output.key.comparator.class";
    String COMBINER_GROUP_COMPARATOR_CLASS = "mapreduce.job.combiner.group.comparator.class";
    String GROUP_COMPARATOR_CLASS = "mapreduce.job.output.group.comparator.class";
    String WORKING_DIR = "mapreduce.job.working.dir";
    String CLASSPATH_ARCHIVES = "mapreduce.job.classpath.archives";
    String CLASSPATH_FILES = "mapreduce.job.classpath.files";
    String CACHE_FILES = "mapreduce.job.cache.files";
    String CACHE_ARCHIVES = "mapreduce.job.cache.archives";
    String CACHE_FILES_SIZES = "mapreduce.job.cache.files.filesizes";
    String CACHE_ARCHIVES_SIZES = "mapreduce.job.cache.archives.filesizes";
    String CACHE_LOCALFILES = "mapreduce.job.cache.local.files";
    String CACHE_LOCALARCHIVES = "mapreduce.job.cache.local.archives";
    String CACHE_FILE_TIMESTAMPS = "mapreduce.job.cache.files.timestamps";
    String CACHE_ARCHIVES_TIMESTAMPS = "mapreduce.job.cache.archives.timestamps";
    String CACHE_FILE_VISIBILITIES = "mapreduce.job.cache.files.visibilities";
    String CACHE_ARCHIVES_VISIBILITIES = "mapreduce.job.cache.archives.visibilities";
    String JOBJAR_VISIBILITY = "mapreduce.job.jobjar.visibility";
    boolean JOBJAR_VISIBILITY_DEFAULT = false;
    String JOBJAR_SHARED_CACHE_UPLOAD_POLICY = "mapreduce.job.jobjar.sharedcache.uploadpolicy";
    boolean JOBJAR_SHARED_CACHE_UPLOAD_POLICY_DEFAULT = false;
    String CACHE_FILES_SHARED_CACHE_UPLOAD_POLICIES = "mapreduce.job.cache.files.sharedcache.uploadpolicies";
    String CACHE_ARCHIVES_SHARED_CACHE_UPLOAD_POLICIES = "mapreduce.job.cache.archives.sharedcache.uploadpolicies";
    String FILES_FOR_SHARED_CACHE = "mapreduce.job.cache.sharedcache.files";
    String FILES_FOR_CLASSPATH_AND_SHARED_CACHE = "mapreduce.job.cache.sharedcache.files.addtoclasspath";
    String ARCHIVES_FOR_SHARED_CACHE = "mapreduce.job.cache.sharedcache.archives";
    String SHARED_CACHE_MODE = "mapreduce.job.sharedcache.mode";
    String SHARED_CACHE_MODE_DEFAULT = "disabled";
    /** @deprecated */
    @Deprecated
    String CACHE_SYMLINK = "mapreduce.job.cache.symlink.create";
    String USER_LOG_RETAIN_HOURS = "mapreduce.job.userlog.retain.hours";
    String MAPREDUCE_JOB_USER_CLASSPATH_FIRST = "mapreduce.job.user.classpath.first";
    String MAPREDUCE_JOB_CLASSLOADER = "mapreduce.job.classloader";
    String MAPREDUCE_JOB_SHUFFLE_PROVIDER_SERVICES = "mapreduce.job.shuffle.provider.services";
    String MAPREDUCE_JOB_CLASSLOADER_SYSTEM_CLASSES = "mapreduce.job.classloader.system.classes";
    String MAPREDUCE_JVM_SYSTEM_PROPERTIES_TO_LOG = "mapreduce.jvm.system-properties-to-log";
    String DEFAULT_MAPREDUCE_JVM_SYSTEM_PROPERTIES_TO_LOG = "os.name,os.version,java.home,java.runtime.version,java.vendor,java.version,java.vm.name,java.class.path,java.io.tmpdir,user.dir,user.name";
    String IO_SORT_FACTOR = "mapreduce.task.io.sort.factor";
    int DEFAULT_IO_SORT_FACTOR = 10;
    String IO_SORT_MB = "mapreduce.task.io.sort.mb";
    int DEFAULT_IO_SORT_MB = 100;
    String INDEX_CACHE_MEMORY_LIMIT = "mapreduce.task.index.cache.limit.bytes";
    String PRESERVE_FAILED_TASK_FILES = "mapreduce.task.files.preserve.failedtasks";
    String PRESERVE_FILES_PATTERN = "mapreduce.task.files.preserve.filepattern";
    String TASK_DEBUGOUT_LINES = "mapreduce.task.debugout.lines";
    String RECORDS_BEFORE_PROGRESS = "mapreduce.task.merge.progress.records";
    String SKIP_START_ATTEMPTS = "mapreduce.task.skip.start.attempts";
    String TASK_ATTEMPT_ID = "mapreduce.task.attempt.id";
    String TASK_ISMAP = "mapreduce.task.ismap";
    boolean DEFAULT_TASK_ISMAP = true;
    String TASK_PARTITION = "mapreduce.task.partition";
    String TASK_PROFILE = "mapreduce.task.profile";
    String TASK_PROFILE_PARAMS = "mapreduce.task.profile.params";
    String DEFAULT_TASK_PROFILE_PARAMS = "-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s";
    String NUM_MAP_PROFILES = "mapreduce.task.profile.maps";
    String NUM_REDUCE_PROFILES = "mapreduce.task.profile.reduces";
    String TASK_MAP_PROFILE_PARAMS = "mapreduce.task.profile.map.params";
    String TASK_REDUCE_PROFILE_PARAMS = "mapreduce.task.profile.reduce.params";
    String TASK_TIMEOUT = "mapreduce.task.timeout";
    long DEFAULT_TASK_TIMEOUT_MILLIS = 300000L;
    String TASK_PROGRESS_REPORT_INTERVAL = "mapreduce.task.progress-report.interval";
    String TASK_TIMEOUT_CHECK_INTERVAL_MS = "mapreduce.task.timeout.check-interval-ms";
    String TASK_EXIT_TIMEOUT = "mapreduce.task.exit.timeout";
    int TASK_EXIT_TIMEOUT_DEFAULT = 60000;
    String TASK_EXIT_TIMEOUT_CHECK_INTERVAL_MS = "mapreduce.task.exit.timeout.check-interval-ms";
    int TASK_EXIT_TIMEOUT_CHECK_INTERVAL_MS_DEFAULT = 20000;
    String TASK_ID = "mapreduce.task.id";
    String TASK_OUTPUT_DIR = "mapreduce.task.output.dir";
    String TASK_USERLOG_LIMIT = "mapreduce.task.userlog.limit.kb";
    String MAP_SORT_SPILL_PERCENT = "mapreduce.map.sort.spill.percent";
    String MAP_INPUT_FILE = "mapreduce.map.input.file";
    String MAP_INPUT_PATH = "mapreduce.map.input.length";
    String MAP_INPUT_START = "mapreduce.map.input.start";
    String MAP_MEMORY_MB = "mapreduce.map.memory.mb";
    int DEFAULT_MAP_MEMORY_MB = 1024;
    String MAP_CPU_VCORES = "mapreduce.map.cpu.vcores";
    int DEFAULT_MAP_CPU_VCORES = 1;
    String MAP_RESOURCE_TYPE_PREFIX = "mapreduce.map.resource.";
    String RESOURCE_TYPE_NAME_VCORE = "vcores";
    String RESOURCE_TYPE_NAME_MEMORY = "memory";
    String RESOURCE_TYPE_ALTERNATIVE_NAME_MEMORY = "memory-mb";
    String MAP_ENV = "mapreduce.map.env";
    String MAP_JAVA_OPTS = "mapreduce.map.java.opts";
    String MAP_MAX_ATTEMPTS = "mapreduce.map.maxattempts";
    String MAP_DEBUG_SCRIPT = "mapreduce.map.debug.script";
    String MAP_SPECULATIVE = "mapreduce.map.speculative";
    String MAP_FAILURES_MAX_PERCENT = "mapreduce.map.failures.maxpercent";
    String MAP_SKIP_INCR_PROC_COUNT = "mapreduce.map.skip.proc-count.auto-incr";
    String MAP_SKIP_MAX_RECORDS = "mapreduce.map.skip.maxrecords";
    String MAP_COMBINE_MIN_SPILLS = "mapreduce.map.combine.minspills";
    String MAP_OUTPUT_COMPRESS = "mapreduce.map.output.compress";
    String MAP_OUTPUT_COMPRESS_CODEC = "mapreduce.map.output.compress.codec";
    String MAP_OUTPUT_KEY_CLASS = "mapreduce.map.output.key.class";
    String MAP_OUTPUT_VALUE_CLASS = "mapreduce.map.output.value.class";
    String MAP_OUTPUT_KEY_FIELD_SEPARATOR = "mapreduce.map.output.key.field.separator";
    /** @deprecated */
    @Deprecated
    String MAP_OUTPUT_KEY_FIELD_SEPERATOR = "mapreduce.map.output.key.field.separator";
    String MAP_LOG_LEVEL = "mapreduce.map.log.level";
    String REDUCE_LOG_LEVEL = "mapreduce.reduce.log.level";
    String DEFAULT_LOG_LEVEL = "INFO";
    String REDUCE_MERGE_INMEM_THRESHOLD = "mapreduce.reduce.merge.inmem.threshold";
    String REDUCE_INPUT_BUFFER_PERCENT = "mapreduce.reduce.input.buffer.percent";
    String REDUCE_MARKRESET_BUFFER_PERCENT = "mapreduce.reduce.markreset.buffer.percent";
    String REDUCE_MARKRESET_BUFFER_SIZE = "mapreduce.reduce.markreset.buffer.size";
    String REDUCE_MEMORY_MB = "mapreduce.reduce.memory.mb";
    int DEFAULT_REDUCE_MEMORY_MB = 1024;
    String REDUCE_CPU_VCORES = "mapreduce.reduce.cpu.vcores";
    int DEFAULT_REDUCE_CPU_VCORES = 1;
    String REDUCE_RESOURCE_TYPE_PREFIX = "mapreduce.reduce.resource.";
    String REDUCE_MEMORY_TOTAL_BYTES = "mapreduce.reduce.memory.totalbytes";
    String SHUFFLE_INPUT_BUFFER_PERCENT = "mapreduce.reduce.shuffle.input.buffer.percent";
    float DEFAULT_SHUFFLE_INPUT_BUFFER_PERCENT = 0.7F;
    String SHUFFLE_MEMORY_LIMIT_PERCENT = "mapreduce.reduce.shuffle.memory.limit.percent";
    String SHUFFLE_MERGE_PERCENT = "mapreduce.reduce.shuffle.merge.percent";
    float DEFAULT_SHUFFLE_MERGE_PERCENT = 0.66F;
    String REDUCE_FAILURES_MAXPERCENT = "mapreduce.reduce.failures.maxpercent";
    String REDUCE_ENV = "mapreduce.reduce.env";
    String REDUCE_JAVA_OPTS = "mapreduce.reduce.java.opts";
    String MAPREDUCE_JOB_DIR = "mapreduce.job.dir";
    String REDUCE_MAX_ATTEMPTS = "mapreduce.reduce.maxattempts";
    String SHUFFLE_PARALLEL_COPIES = "mapreduce.reduce.shuffle.parallelcopies";
    String REDUCE_DEBUG_SCRIPT = "mapreduce.reduce.debug.script";
    String REDUCE_SPECULATIVE = "mapreduce.reduce.speculative";
    String SHUFFLE_CONNECT_TIMEOUT = "mapreduce.reduce.shuffle.connect.timeout";
    String SHUFFLE_READ_TIMEOUT = "mapreduce.reduce.shuffle.read.timeout";
    String SHUFFLE_FETCH_FAILURES = "mapreduce.reduce.shuffle.maxfetchfailures";
    String MAX_ALLOWED_FETCH_FAILURES_FRACTION = "mapreduce.reduce.shuffle.max-fetch-failures-fraction";
    float DEFAULT_MAX_ALLOWED_FETCH_FAILURES_FRACTION = 0.5F;
    String MAX_FETCH_FAILURES_NOTIFICATIONS = "mapreduce.reduce.shuffle.max-fetch-failures-notifications";
    int DEFAULT_MAX_FETCH_FAILURES_NOTIFICATIONS = 3;
    String SHUFFLE_FETCH_RETRY_INTERVAL_MS = "mapreduce.reduce.shuffle.fetch.retry.interval-ms";
    int DEFAULT_SHUFFLE_FETCH_RETRY_INTERVAL_MS = 1000;
    String SHUFFLE_FETCH_RETRY_TIMEOUT_MS = "mapreduce.reduce.shuffle.fetch.retry.timeout-ms";
    String SHUFFLE_FETCH_RETRY_ENABLED = "mapreduce.reduce.shuffle.fetch.retry.enabled";
    String SHUFFLE_NOTIFY_READERROR = "mapreduce.reduce.shuffle.notify.readerror";
    String MAX_SHUFFLE_FETCH_RETRY_DELAY = "mapreduce.reduce.shuffle.retry-delay.max.ms";
    long DEFAULT_MAX_SHUFFLE_FETCH_RETRY_DELAY = 60000L;
    String MAX_SHUFFLE_FETCH_HOST_FAILURES = "mapreduce.reduce.shuffle.max-host-failures";
    int DEFAULT_MAX_SHUFFLE_FETCH_HOST_FAILURES = 5;
    String REDUCE_SKIP_INCR_PROC_COUNT = "mapreduce.reduce.skip.proc-count.auto-incr";
    String REDUCE_SKIP_MAXGROUPS = "mapreduce.reduce.skip.maxgroups";
    String REDUCE_MEMTOMEM_THRESHOLD = "mapreduce.reduce.merge.memtomem.threshold";
    String REDUCE_MEMTOMEM_ENABLED = "mapreduce.reduce.merge.memtomem.enabled";
    String COMBINE_RECORDS_BEFORE_PROGRESS = "mapreduce.task.combine.progress.records";
    String JOB_NAMENODES = "mapreduce.job.hdfs-servers";
    String JOB_NAMENODES_TOKEN_RENEWAL_EXCLUDE = "mapreduce.job.hdfs-servers.token-renewal.exclude";
    String JOB_JOBTRACKER_ID = "mapreduce.job.kerberos.jtprinicipal";
    String JOB_CANCEL_DELEGATION_TOKEN = "mapreduce.job.complete.cancel.delegation.tokens";
    String JOB_ACL_VIEW_JOB = "mapreduce.job.acl-view-job";
    String DEFAULT_JOB_ACL_VIEW_JOB = " ";
    String JOB_ACL_MODIFY_JOB = "mapreduce.job.acl-modify-job";
    String DEFAULT_JOB_ACL_MODIFY_JOB = " ";
    String JOB_RUNNING_MAP_LIMIT = "mapreduce.job.running.map.limit";
    int DEFAULT_JOB_RUNNING_MAP_LIMIT = 0;
    String JOB_RUNNING_REDUCE_LIMIT = "mapreduce.job.running.reduce.limit";
    int DEFAULT_JOB_RUNNING_REDUCE_LIMIT = 0;
    String JOB_MAX_MAP = "mapreduce.job.max.map";
    int DEFAULT_JOB_MAX_MAP = -1;
    String MAPREDUCE_JOB_CREDENTIALS_BINARY = "mapreduce.job.credentials.binary";
    String JOB_TOKEN_TRACKING_IDS_ENABLED = "mapreduce.job.token.tracking.ids.enabled";
    boolean DEFAULT_JOB_TOKEN_TRACKING_IDS_ENABLED = false;
    String JOB_TOKEN_TRACKING_IDS = "mapreduce.job.token.tracking.ids";
    String JOB_SUBMITHOST = "mapreduce.job.submithostname";
    String JOB_SUBMITHOSTADDR = "mapreduce.job.submithostaddress";
    String COUNTERS_MAX_KEY = "mapreduce.job.counters.max";
    int COUNTERS_MAX_DEFAULT = 120;
    String COUNTER_GROUP_NAME_MAX_KEY = "mapreduce.job.counters.group.name.max";
    int COUNTER_GROUP_NAME_MAX_DEFAULT = 128;
    String COUNTER_NAME_MAX_KEY = "mapreduce.job.counters.counter.name.max";
    int COUNTER_NAME_MAX_DEFAULT = 64;
    String COUNTER_GROUPS_MAX_KEY = "mapreduce.job.counters.groups.max";
    int COUNTER_GROUPS_MAX_DEFAULT = 50;
    String JOB_UBERTASK_ENABLE = "mapreduce.job.ubertask.enable";
    String JOB_UBERTASK_MAXMAPS = "mapreduce.job.ubertask.maxmaps";
    String JOB_UBERTASK_MAXREDUCES = "mapreduce.job.ubertask.maxreduces";
    String JOB_UBERTASK_MAXBYTES = "mapreduce.job.ubertask.maxbytes";
    String MAPREDUCE_JOB_EMIT_TIMELINE_DATA = "mapreduce.job.emit-timeline-data";
    boolean DEFAULT_MAPREDUCE_JOB_EMIT_TIMELINE_DATA = false;
    String MR_PREFIX = "yarn.app.mapreduce.";
    String MR_AM_PREFIX = "yarn.app.mapreduce.am.";
    String MR_CLIENT_TO_AM_IPC_MAX_RETRIES = "yarn.app.mapreduce.client-am.ipc.max-retries";
    int DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES = 3;
    String MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS = "yarn.app.mapreduce.client-am.ipc.max-retries-on-timeouts";
    int DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS = 3;
    String MR_CLIENT_MAX_RETRIES = "yarn.app.mapreduce.client.max-retries";
    int DEFAULT_MR_CLIENT_MAX_RETRIES = 3;
    String MR_CLIENT_JOB_MAX_RETRIES = "yarn.app.mapreduce.client.job.max-retries";
    int DEFAULT_MR_CLIENT_JOB_MAX_RETRIES = 3;
    String MR_CLIENT_JOB_RETRY_INTERVAL = "yarn.app.mapreduce.client.job.retry-interval";
    long DEFAULT_MR_CLIENT_JOB_RETRY_INTERVAL = 2000L;
    String MR_AM_STAGING_DIR = "yarn.app.mapreduce.am.staging-dir";
    String DEFAULT_MR_AM_STAGING_DIR = "/tmp/hadoop-yarn/staging";
    String MR_AM_VMEM_MB = "yarn.app.mapreduce.am.resource.mb";
    int DEFAULT_MR_AM_VMEM_MB = 1536;
    String MR_AM_CPU_VCORES = "yarn.app.mapreduce.am.resource.cpu-vcores";
    int DEFAULT_MR_AM_CPU_VCORES = 1;
    String MR_AM_RESOURCE_PREFIX = "yarn.app.mapreduce.am.resource.";
    String MR_AM_COMMAND_OPTS = "yarn.app.mapreduce.am.command-opts";
    String DEFAULT_MR_AM_COMMAND_OPTS = "-Xmx1024m";
    String MR_AM_ADMIN_COMMAND_OPTS = "yarn.app.mapreduce.am.admin-command-opts";
    String DEFAULT_MR_AM_ADMIN_COMMAND_OPTS = "";
    String MR_AM_LOG_LEVEL = "yarn.app.mapreduce.am.log.level";
    String DEFAULT_MR_AM_LOG_LEVEL = "INFO";
    String MR_AM_LOG_KB = "yarn.app.mapreduce.am.container.log.limit.kb";
    int DEFAULT_MR_AM_LOG_KB = 0;
    String MR_AM_LOG_BACKUPS = "yarn.app.mapreduce.am.container.log.backups";
    int DEFAULT_MR_AM_LOG_BACKUPS = 0;
    String MR_AM_NUM_PROGRESS_SPLITS = "yarn.app.mapreduce.am.num-progress-splits";
    int DEFAULT_MR_AM_NUM_PROGRESS_SPLITS = 12;
    String MR_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT = "yarn.app.mapreduce.am.containerlauncher.thread-count-limit";
    int DEFAULT_MR_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT = 500;
    String MR_AM_CONTAINERLAUNCHER_THREADPOOL_INITIAL_SIZE = "yarn.app.mapreduce.am.containerlauncher.threadpool-initial-size";
    int DEFAULT_MR_AM_CONTAINERLAUNCHER_THREADPOOL_INITIAL_SIZE = 10;
    String MR_AM_JOB_CLIENT_THREAD_COUNT = "yarn.app.mapreduce.am.job.client.thread-count";
    int DEFAULT_MR_AM_JOB_CLIENT_THREAD_COUNT = 1;
    String MR_AM_JOB_CLIENT_PORT_RANGE = "yarn.app.mapreduce.am.job.client.port-range";
    String MR_AM_WEBAPP_PORT_RANGE = "yarn.app.mapreduce.am.webapp.port-range";
    String MR_AM_JOB_NODE_BLACKLISTING_ENABLE = "yarn.app.mapreduce.am.job.node-blacklisting.enable";
    String MR_AM_IGNORE_BLACKLISTING_BLACKLISTED_NODE_PERECENT = "yarn.app.mapreduce.am.job.node-blacklisting.ignore-threshold-node-percent";
    int DEFAULT_MR_AM_IGNORE_BLACKLISTING_BLACKLISTED_NODE_PERCENT = 33;
    String MR_AM_JOB_RECOVERY_ENABLE = "yarn.app.mapreduce.am.job.recovery.enable";
    boolean MR_AM_JOB_RECOVERY_ENABLE_DEFAULT = true;
    String MR_AM_JOB_REDUCE_PREEMPTION_LIMIT = "yarn.app.mapreduce.am.job.reduce.preemption.limit";
    float DEFAULT_MR_AM_JOB_REDUCE_PREEMPTION_LIMIT = 0.5F;
    String MR_AM_PREEMPTION_POLICY = "yarn.app.mapreduce.am.preemption.policy";
    String JOB_AM_ACCESS_DISABLED = "mapreduce.job.am-access-disabled";
    boolean DEFAULT_JOB_AM_ACCESS_DISABLED = false;
    String MR_AM_JOB_REDUCE_RAMPUP_UP_LIMIT = "yarn.app.mapreduce.am.job.reduce.rampup.limit";
    float DEFAULT_MR_AM_JOB_REDUCE_RAMP_UP_LIMIT = 0.5F;
    String MR_AM_JOB_SPECULATOR = "yarn.app.mapreduce.am.job.speculator.class";
    String MR_AM_TASK_ESTIMATOR = "yarn.app.mapreduce.am.job.task.estimator.class";
    String MR_AM_TASK_ESTIMATOR_SMOOTH_LAMBDA_MS = "yarn.app.mapreduce.am.job.task.estimator.exponential.smooth.lambda-ms";
    long DEFAULT_MR_AM_TASK_ESTIMATOR_SMOOTH_LAMBDA_MS = 60000L;
    String MR_AM_TASK_ESTIMATOR_EXPONENTIAL_RATE_ENABLE = "yarn.app.mapreduce.am.job.task.estimator.exponential.smooth.rate";
    String MR_AM_TASK_LISTENER_THREAD_COUNT = "yarn.app.mapreduce.am.job.task.listener.thread-count";
    int DEFAULT_MR_AM_TASK_LISTENER_THREAD_COUNT = 30;
    String MR_AM_TO_RM_HEARTBEAT_INTERVAL_MS = "yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms";
    int DEFAULT_MR_AM_TO_RM_HEARTBEAT_INTERVAL_MS = 1000;
    String MR_AM_TO_RM_WAIT_INTERVAL_MS = "yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms";
    int DEFAULT_MR_AM_TO_RM_WAIT_INTERVAL_MS = 360000;
    String MR_AM_COMMITTER_CANCEL_TIMEOUT_MS = "yarn.app.mapreduce.am.job.committer.cancel-timeout";
    int DEFAULT_MR_AM_COMMITTER_CANCEL_TIMEOUT_MS = 60000;
    String MR_AM_COMMIT_WINDOW_MS = "yarn.app.mapreduce.am.job.committer.commit-window";
    int DEFAULT_MR_AM_COMMIT_WINDOW_MS = 10000;
    String MR_AM_CREATE_JH_INTERMEDIATE_BASE_DIR = "yarn.app.mapreduce.am.create-intermediate-jh-base-dir";
    String MR_AM_HISTORY_MAX_UNFLUSHED_COMPLETE_EVENTS = "yarn.app.mapreduce.am.history.max-unflushed-events";
    int DEFAULT_MR_AM_HISTORY_MAX_UNFLUSHED_COMPLETE_EVENTS = 200;
    String MR_AM_HISTORY_JOB_COMPLETE_UNFLUSHED_MULTIPLIER = "yarn.app.mapreduce.am.history.job-complete-unflushed-multiplier";
    int DEFAULT_MR_AM_HISTORY_JOB_COMPLETE_UNFLUSHED_MULTIPLIER = 30;
    String MR_AM_HISTORY_COMPLETE_EVENT_FLUSH_TIMEOUT_MS = "yarn.app.mapreduce.am.history.complete-event-flush-timeout";
    long DEFAULT_MR_AM_HISTORY_COMPLETE_EVENT_FLUSH_TIMEOUT_MS = 30000L;
    String MR_AM_HISTORY_USE_BATCHED_FLUSH_QUEUE_SIZE_THRESHOLD = "yarn.app.mapreduce.am.history.use-batched-flush.queue-size.threshold";
    int DEFAULT_MR_AM_HISTORY_USE_BATCHED_FLUSH_QUEUE_SIZE_THRESHOLD = 50;
    String MR_AM_HARD_KILL_TIMEOUT_MS = "yarn.app.mapreduce.am.hard-kill-timeout-ms";
    long DEFAULT_MR_AM_HARD_KILL_TIMEOUT_MS = 10000L;
    String MR_JOB_REDUCER_UNCONDITIONAL_PREEMPT_DELAY_SEC = "mapreduce.job.reducer.unconditional-preempt.delay.sec";
    int DEFAULT_MR_JOB_REDUCER_UNCONDITIONAL_PREEMPT_DELAY_SEC = 300;
    String MR_JOB_REDUCER_PREEMPT_DELAY_SEC = "mapreduce.job.reducer.preempt.delay.sec";
    int DEFAULT_MR_JOB_REDUCER_PREEMPT_DELAY_SEC = 0;
    String MR_AM_ENV = "yarn.app.mapreduce.am.env";
    String MR_AM_ADMIN_USER_ENV = "yarn.app.mapreduce.am.admin.user.env";
    String DEFAULT_MR_AM_ADMIN_USER_ENV = Shell.WINDOWS ? "" : "LD_LIBRARY_PATH=" + Apps.crossPlatformify("HADOOP_COMMON_HOME") + "/lib/native";
    String MR_AM_PROFILE = "yarn.app.mapreduce.am.profile";
    boolean DEFAULT_MR_AM_PROFILE = false;
    String MR_AM_PROFILE_PARAMS = "yarn.app.mapreduce.am.profile.params";
    String MAPRED_MAP_ADMIN_JAVA_OPTS = "mapreduce.admin.map.child.java.opts";
    String MAPRED_REDUCE_ADMIN_JAVA_OPTS = "mapreduce.admin.reduce.child.java.opts";
    String DEFAULT_MAPRED_ADMIN_JAVA_OPTS = "-Djava.net.preferIPv4Stack=true -Dhadoop.metrics.log.level=WARN ";
    String MAPRED_ADMIN_USER_SHELL = "mapreduce.admin.user.shell";
    String DEFAULT_SHELL = "/bin/bash";
    String MAPRED_ADMIN_USER_ENV = "mapreduce.admin.user.env";
    String DEFAULT_MAPRED_ADMIN_USER_ENV = Shell.WINDOWS ? "PATH=%PATH%;%HADOOP_COMMON_HOME%\\\\bin" : "LD_LIBRARY_PATH=" + Apps.crossPlatformify("HADOOP_COMMON_HOME") + "/lib/native";
    String WORKDIR = "work";
    String OUTPUT = "output";
    String HADOOP_WORK_DIR = "HADOOP_WORK_DIR";
    String STDOUT_LOGFILE_ENV = "STDOUT_LOGFILE_ENV";
    String STDERR_LOGFILE_ENV = "STDERR_LOGFILE_ENV";
    String JOB_SUBMIT_DIR = "jobSubmitDir";
    String JOB_CONF_FILE = "job.xml";
    String JOB_JAR = "job.jar";
    String JOB_SPLIT = "job.split";
    String JOB_SPLIT_METAINFO = "job.splitmetainfo";
    String APPLICATION_MASTER_CLASS = "org.apache.hadoop.mapreduce.v2.app.MRAppMaster";
    String MAPREDUCE_V2_CHILD_CLASS = "org.apache.hadoop.mapred.YarnChild";
    String APPLICATION_ATTEMPT_ID = "mapreduce.job.application.attempt.id";
    String MR_JOB_END_NOTIFICATION_URL = "mapreduce.job.end-notification.url";
    String MR_JOB_END_NOTIFICATION_PROXY = "mapreduce.job.end-notification.proxy";
    String MR_JOB_END_NOTIFICATION_TIMEOUT = "mapreduce.job.end-notification.timeout";
    String MR_JOB_END_RETRY_ATTEMPTS = "mapreduce.job.end-notification.retry.attempts";
    String MR_JOB_END_RETRY_INTERVAL = "mapreduce.job.end-notification.retry.interval";
    String MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS = "mapreduce.job.end-notification.max.attempts";
    String MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL = "mapreduce.job.end-notification.max.retry.interval";
    int DEFAULT_MR_JOB_END_NOTIFICATION_TIMEOUT = 5000;
    String MR_AM_SECURITY_SERVICE_AUTHORIZATION_TASK_UMBILICAL = "security.job.task.protocol.acl";
    String MR_AM_SECURITY_SERVICE_AUTHORIZATION_CLIENT = "security.job.client.protocol.acl";
    String MAPREDUCE_APPLICATION_CLASSPATH = "mapreduce.application.classpath";
    String MAPREDUCE_JOB_LOG4J_PROPERTIES_FILE = "mapreduce.job.log4j-properties-file";
    String MAPREDUCE_APPLICATION_FRAMEWORK_PATH = "mapreduce.application.framework.path";
    @Public
    @Unstable
    String DEFAULT_MAPREDUCE_CROSS_PLATFORM_APPLICATION_CLASSPATH = Apps.crossPlatformify("HADOOP_MAPRED_HOME") + "/share/hadoop/mapreduce/*," + Apps.crossPlatformify("HADOOP_MAPRED_HOME") + "/share/hadoop/mapreduce/lib/*";
    String DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH = Shell.WINDOWS ? "%HADOOP_MAPRED_HOME%\\\\share\\\\hadoop\\\\mapreduce\\\\*,%HADOOP_MAPRED_HOME%\\\\share\\\\hadoop\\\\mapreduce\\\\lib\\\\*" : "$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*";
    String WORKFLOW_ID = "mapreduce.workflow.id";
    String TASK_LOG_BACKUPS = "yarn.app.mapreduce.task.container.log.backups";
    int DEFAULT_TASK_LOG_BACKUPS = 0;
    String REDUCE_SEPARATE_SHUFFLE_LOG = "yarn.app.mapreduce.shuffle.log.separate";
    boolean DEFAULT_REDUCE_SEPARATE_SHUFFLE_LOG = true;
    String SHUFFLE_LOG_BACKUPS = "yarn.app.mapreduce.shuffle.log.backups";
    int DEFAULT_SHUFFLE_LOG_BACKUPS = 0;
    String SHUFFLE_LOG_KB = "yarn.app.mapreduce.shuffle.log.limit.kb";
    long DEFAULT_SHUFFLE_LOG_KB = 0L;
    String WORKFLOW_NAME = "mapreduce.workflow.name";
    String WORKFLOW_NODE_NAME = "mapreduce.workflow.node.name";
    String WORKFLOW_ADJACENCY_PREFIX_STRING = "mapreduce.workflow.adjacency.";
    String WORKFLOW_ADJACENCY_PREFIX_PATTERN = "^mapreduce\\\\.workflow\\\\.adjacency\\\\..+";
    String WORKFLOW_TAGS = "mapreduce.workflow.tags";
    String MR_AM_MAX_ATTEMPTS = "mapreduce.am.max-attempts";
    int DEFAULT_MR_AM_MAX_ATTEMPTS = 2;
    String MR_APPLICATION_TYPE = "MAPREDUCE";
    String TASK_PREEMPTION = "mapreduce.job.preemption";
    String HEAP_MEMORY_MB_RATIO = "mapreduce.job.heap.memory-mb.ratio";
    float DEFAULT_HEAP_MEMORY_MB_RATIO = 0.8F;
    String MR_ENCRYPTED_INTERMEDIATE_DATA = "mapreduce.job.encrypted-intermediate-data";
    boolean DEFAULT_MR_ENCRYPTED_INTERMEDIATE_DATA = false;
    String MR_ENCRYPTED_INTERMEDIATE_DATA_KEY_SIZE_BITS = "mapreduce.job.encrypted-intermediate-data-key-size-bits";
    int DEFAULT_MR_ENCRYPTED_INTERMEDIATE_DATA_KEY_SIZE_BITS = 128;
    String MR_ENCRYPTED_INTERMEDIATE_DATA_BUFFER_KB = "mapreduce.job.encrypted-intermediate-data.buffer.kb";
    int DEFAULT_MR_ENCRYPTED_INTERMEDIATE_DATA_BUFFER_KB = 128;
    String MAX_RESOURCES = "mapreduce.job.cache.limit.max-resources";
    int MAX_RESOURCES_DEFAULT = 0;
    String MAX_RESOURCES_MB = "mapreduce.job.cache.limit.max-resources-mb";
    long MAX_RESOURCES_MB_DEFAULT = 0L;
    String MAX_SINGLE_RESOURCE_MB = "mapreduce.job.cache.limit.max-single-resource-mb";
    long MAX_SINGLE_RESOURCE_MB_DEFAULT = 0L;
    String MR_NUM_OPPORTUNISTIC_MAPS_PERCENT = "mapreduce.job.num-opportunistic-maps-percent";
    int DEFAULT_MR_NUM_OPPORTUNISTIC_MAPS_PERCENT = 0;
    String MR_JOB_REDACTED_PROPERTIES = "mapreduce.job.redacted-properties";
    String MR_JOB_SEND_TOKEN_CONF = "mapreduce.job.send-token-conf";
    String FINISH_JOB_WHEN_REDUCERS_DONE = "mapreduce.job.finish-when-all-reducers-done";
    boolean DEFAULT_FINISH_JOB_WHEN_REDUCERS_DONE = true;
    String MR_AM_STAGING_DIR_ERASURECODING_ENABLED = "yarn.app.mapreduce.am.staging-dir.erasurecoding.enabled";
    boolean DEFAULT_MR_AM_STAGING_ERASURECODING_ENABLED = false;
}
MRJobConfig

http://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/apidocs/org/apache/hadoop/mapreduce/Mapper.Context.html?is-external=true

 

静态类实现Reducer类

@Checkpointable
@Public
@Stable
public class Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> {
    public Reducer() {
    }

    protected void setup(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException {
    }

    protected void reduce(KEYIN key, Iterable<VALUEIN> values, Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException {
        Iterator var4 = values.iterator();

        while(var4.hasNext()) {
            VALUEIN value = var4.next();
            context.write(key, value);
        }

    }

    protected void cleanup(Reducer<KEYIN, VALUEIN, KEYOUT, VAL

以上是关于Hadoop小项目练习 更新中的主要内容,如果未能解决你的问题,请参考以下文章

聊聊Java中代码优化的30个小技巧

聊聊Java中代码优化的30个小技巧

Xcode中代码注释编写小技巧

Java中代码优化的30个小技巧

最近项目中代码管理学习

最近项目中代码管理学习