Hadoop HA 配置文件以及自动化Shell脚本开关HA集群
Posted AdamShyly
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Hadoop HA 配置文件以及自动化Shell脚本开关HA集群相关的知识,希望对你有一定的参考价值。
目录
配置文件
workers
hadoop102
hadoop103
hadoop104
core-site.xml
<configuration>
<!-- 把多个 NameNode 的地址组装成一个集群 mycluster -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<!-- 指定 hadoop 运行时产生文件的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/module/hadoop-3.2.3/data</value>
</property>
<!-- 指定zkfc要连接的zkServer地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop102:2181,hadoop103:2181,hadoop104:2181</value>
</property>
<!-- 配置 HDFS 网页登录使用的静态用户为 hadoop -->
<property>
<name>hadoop.http.staticuser.user</name>
<!-- hadoop用户 -->
<value>hadoop</value>
</property>
<!-- 整合 hive -->
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
</configuration>
hdfs-site.xml
<configuration>
<!-- NameNode 数据存储目录 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>file://$hadoop.tmp.dir/name</value>
</property>
<!-- DataNode 数据存储目录 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>file://$hadoop.tmp.dir/data</value>
</property>
<!-- JournalNode 数据存储目录 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>$hadoop.tmp.dir/jn</value>
</property>
<!-- 完全分布式集群名称 -->
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<!-- 集群中 NameNode 节点都有哪些 -->
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2,nn3</value>
</property>
<!-- NameNode 的 RPC 通信地址 -->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>hadoop102:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>hadoop103:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn3</name>
<value>hadoop104:8020</value>
</property>
<!-- NameNode 的 http 通信地址 -->
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>hadoop102:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>hadoop103:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn3</name>
<value>hadoop104:9870</value>
</property>
<!-- 指定 NameNode 元数据在 JournalNode 上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop102:8485;hadoop103:8485;hadoop104:8485/mycluster</value>
</property>
<!-- 访问代理类:client 用于确定哪个 NameNode 为 Active -->
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制,即同一时刻只能有一台服务器对外响应 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!-- 使用隔离机制时需要 ssh 秘钥登录-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<!-- 启动nn故障自动转移 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
</configuration>
mapred-site.xml
<configuration>
<!-- 指定 MapReduce 程序运行在 Yarn 上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- 历史服务器端地址 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop102:10020</value>
</property>
<!-- 历史服务器 web 端地址 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop102:19888</value>
</property>
</configuration>
yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 启用 resourcemanager ha -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- 声明 resourcemanager 的地址 -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>cluster-yarn1</value>
</property>
<!--指定 resourcemanager 的逻辑列表-->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2,rm3</value>
</property>
<!-- ========== rm1 的配置 ========== -->
<!-- 指定 rm1 的主机名 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hadoop102</value>
</property>
<!-- 指定 rm1 的 web 端地址 -->
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>hadoop102:8088</value>
</property>
<!-- 指定 rm1 的内部通信地址 -->
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>hadoop102:8032</value>
</property>
<!-- 指定 AM 向 rm1 申请资源的地址 -->
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>hadoop102:8030</value>
</property>
<!-- 指定供 NM 连接的地址 -->
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>hadoop102:8031</value>
</property>
<!-- ========== rm2 的配置 ========== -->
<!-- 指定 rm2 的主机名 -->
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hadoop103</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>hadoop103:8088</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>hadoop103:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>hadoop103:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>hadoop103:8031</value>
</property>
<!-- ========== rm3 的配置 ========== -->
<!-- 指定 rm1 的主机名 -->
<property>
<name>yarn.resourcemanager.hostname.rm3</name>
<value>hadoop104</value>
</property>
<!-- 指定 rm1 的 web 端地址 -->
<property>
<name>yarn.resourcemanager.webapp.address.rm3</name>
<value>hadoop104:8088</value>
</property>
<!-- 指定 rm1 的内部通信地址 -->
<property>
<name>yarn.resourcemanager.address.rm3</name>
<value>hadoop104:8032</value>
</property>
<!-- 指定 AM 向 rm1 申请资源的地址 -->
<property>
<name>yarn.resourcemanager.scheduler.address.rm3</name>
<value>hadoop104:8030</value>
</property>
<!-- 指定供 NM 连接的地址 -->
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm3</name>
<value>hadoop104:8031</value>
</property>
<!-- 指定 zookeeper 集群的地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>hadoop102:2181,hadoop103:2181,hadoop104:2181</value>
</property>
<!-- 启用自动恢复 -->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!-- 指定 resourcemanager 的状态信息存储在 zookeeper 集群 -->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<!-- 环境变量的继承 -->
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
<!-- 开启日志聚集功能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 设置日志聚集服务器地址 -->
<property>
<name>yarn.log.server.url</name>
<value>http://hadoop102:19888/jobhistory/logs</value>
</property>
<!-- 设置日志保留时间为7天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
</configuration>
自动化Shell脚本
以下是初始化、开关HA集群所用到的Shell脚本
[hadoop@hadoop102 bin]$ pwd
/home/hadoop/bin
[hadoop@hadoop102 bin]$ ls
format-ha hadoop-ha jpsall xcall xsync zk
[hadoop@hadoop102 bin]$
format-ha
初始化HA集群
#!/bin/bash
for HOST in hadoop102 hadoop103 hadoop104
do
echo "========= delete data and logs in $HOST =========="
ssh $HOST "rm -rf /opt/module/hadoop-3.2.3/data /opt/module/hadoop-3.2.3/logs"
done
echo "------- create journalnode --------"
xcall hdfs --daemon start journalnode
echo "------- format namenode -------"
ssh hadoop102 "hdfs namenode -format"
ssh hadoop102 "hdfs --daemon start namenode"
ssh hadoop103 "hdfs namenode -bootstrapStandby"
ssh hadoop104 "hdfs namenode -bootstrapStandby"
ssh hadoop102 "/opt/module/hadoop-3.2.3/sbin/stop-dfs.sh"
zk start
hdfs zkfc -formatZK
zk stop
hadoop-ha
开关HA集群
#!/bin/bash
if [ $# -lt 1 ]
then
echo "No Args Input ..."
exit;
fi
if [ $# -gt 1 ]
then
echo "Args Exceeded limit"
exit;
fi
case $1 in
"start")
echo "============= 启动 hadoop 集群 =============="
echo "------------- start ZooKeeper -----------"
ssh hadoop102 "/home/hadoop/bin/zk start"
echo "------------- start Journalnode ------------"
ssh hadoop102 "/home/hadoop/bin/xcall hdfs --daemon start journalnode"
echo "------------- 启动 HDFS --------------"
ssh hadoop102 "/opt/module/hadoop-3.2.3/sbin/start-dfs.sh"
echo "------------- 启动 yarn --------------"
ssh hadoop103 "/opt/module/hadoop-3.2.3/sbin/start-yarn.sh"
echo "------------- 启动 historyserver -------------"
ssh hadoop102 "/opt/module/hadoop-3.2.3/bin/mapred --daemon start historyserver"
;;
"stop")
echo "============= 关闭 hadoop 集群 =============="
echo "------------- 关闭 historyserver -------------"
ssh hadoop102 "/opt/module/hadoop-3.2.3/bin/mapred --daemon stop historyserver"
echo "------------- 关闭 yarn --------------"
ssh hadoop103 "/opt/module/hadoop-3.2.3/sbin/stop-yarn.sh"
echo "------------- 关闭 HDFS --------------"
ssh hadoop102 "/opt/module/hadoop-3.2.3/sbin/stop-dfs.sh"
echo "------------- stop ZooKeeper -------------"
ssh hadoop102 "/home/hadoop/bin/zk stop"
;;
*)
echo "Input Args Error..."
;;
esac
jpsall
查看所有节点服务器所有正在运行的Java进程
#!/bin/bash
for host in hadoop102 hadoop103 hadoop104
do
echo =========== $host ==========
ssh $host jps
done
xcall
远程执行bash指令
#!/bin/bash
if [ $# -lt 1 ]
then
echo not enough arguments
fi
for HOST in hadoop102 hadoop103 hadoop104
do
echo ========= $HOST ========
CMD=""
for I in $@
do
CMD="$CMD$I "
done
echo $CMD
ssh $HOST "$CMD"
done
xsync
集群同步文件
#!/bin/bash
if [ $# -lt 1 ]
then
echo Not Enough Argument!
exit;
fi
for host in hadoop102 hadoop103 hadoop104
do
echo =============== $host ===============
for file in $@
do
if [ -e $file ]
then
pdir=$(cd -P $(dirname $file); pwd) # -P 防止加入软链接路径
fname=$(basename $file)
ssh $host "mkdir -p $pdir"
rsync -av $pdir/$fname $host:$pdir # 由于dest主机的目标目录是绝对路径,所以需要确定$pdir
else
echo $file dose not exists!
fi
done
done
zk
开关ZooKeeper集群
#!/bin/bash
case $1 in
"start")
for host in hadoop102 hadoop103 hadoop104
do
echo ------------ zookeeper $host 启动 ---------------
ssh $host "/opt/module/zookeeper-3.5.9/bin/zkServer.sh start"
done
;;
"stop")
for host in hadoop102 hadoop103 hadoop104
do
echo ------------ zookeeper $host 停止 ---------------
ssh $host "/opt/module/zookeeper-3.5.9/bin/zkServer.sh stop"
done
;;
"status")
for host in hadoop102 hadoop103 hadoop104
do
echo ------------ zookeeper $host 状态 ---------------
ssh $host "/opt/module/zookeeper-3.5.9/bin/zkServer.sh status"
done
;;
*) echo Not exist the instruction
;;
esac
测试自动化脚本
HA集群初始化
启动HA集群
关闭HA集群
以上是关于Hadoop HA 配置文件以及自动化Shell脚本开关HA集群的主要内容,如果未能解决你的问题,请参考以下文章