1、安装CentOS 7.3操作系统mini版本即可
2、设置关闭Selinux
*编辑 /etc/selinux/config
vi /etc/selinux/config
SELINUX=disabled
*重启机器,查看selinux状态
sestatus
# SELinux status: disabled
3、关闭Firewalld
systemctl stop firewalld
systemctl disable firewalld
firewall-cmd --state
4、关闭NetworkManager服务
#停止服务
service NetworkManager stop
#禁用服务,下次不自动启动
chkconfig NetworkManager off
*安装epel源
yum install –y epel-release
*查看repo情况
yum repolist
5、更新Hostname,设置hostname cluster201,让机器可以通过hostname进行互相访问,统一 /etc/hosts文件
hostnamectl --static --transient set-hostname Master1
vi /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
10.0.0.14 Master01
10.0.0.13 Master02
10.0.0.3 Master03
10.0.0.12 SparkNode01
10.0.0.7 SparkNode02
10.0.0.10 SparkNode03
6、安装JDK
mkdir -p /usr/local/java
cp /opt/jdk-linux-x64.tar.gz /usr/local/java/
cd /usr/local/java
tar -zxvf jdk-8u121-linux-x64.tar.gz
vi /etc/profile
添加:
JAVA_HOME=/usr/local/java/jdk1.8.0_121
CLASSPATH=$JAVA_HOME/lib/
PATH=$PATH:$JAVA_HOME/bin
export PATH JAVA_HOME CLASSPATH
source /etc/profile
java –version
7. 设置ssh免密登录
1、所有节点都删除.ssh 里所有文件重新做:
mkdir ~/.ssh/
cd ~/.ssh/
2、所有节点都生成公密匙
ssh-keygen -t rsa
chmod 700 ~/.ssh
chmod 600 ~/.ssh/id_rsa
3、所有节点都复制授权key文件
cat id_rsa.pub >> authorized_keys
利用vi编辑authorized_keys文件,相互复制获取彼此公匙
vi ~/.ssh/authorized_keys
chmod 700 ~/.ssh
chmod 600 ~/.ssh/authorized_keys
8. 安装Zookeeper
cd /opt
wget http://mirrors.tuna.tsinghua.edu.cn/apache/zookeeper/zookeeper-3.4.10/zookeeper-3.4.10.tar.gz
tar -zvxf zookeeper-3.4.10.tar.gz
mv zookeeper-3.4.10 zookeeper
vi /etc/profile
添加:
#zookeeper
export ZOOKEEPER_HOME=/opt/zookeeper
export PATH=$PATH:$ZOOKEEPER_HOME/bin
source /etc/profile
cd /opt/zookeeper
mkdir data
mkdir log
cd conf
cp zoo_sample.cfg zoo.cfg
vi zoo.cfg
修改
#配置数据存放目录
dataDir=/opt/zookeeper/data
#配置日志存放目录
dataLogDir=/opt/zookeeper/log
末尾添加
#配置集群地址
server.1= Master01:2888:3888
server.2= Master02:2888:3888
server.3= Master03:2888:3888
cd ..
Master01节点执行
echo 1 > data/myid
Master02节点执行
echo 2 > data/myid
Master03节点执行
echo 3 > data/myid
zkServer.sh start
zkServer.sh status
9.安装hadoop2.7集群
cd opt
wget http://124.202.164.15/files/311200000B258CDE/mirror.bit.edu.cn/apache/hadoop/common/hadoop-2.7.5/hadoop-2.7.5.tar.gz
tar xzvf hadoop-2.7.5.tar.gz
mv hadoop-2.7.5 hadoop
cd hadoop/etc/hadoop/
vi hadoop-env.sh
修改:
export JAVA_HOME=/usr/local/java/jdk1.8.0_121
export HADOOP_CONF_DIR=/opt/hadoop/etc/hadoop
vi core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/tmp</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>Master01:2181,Master02:2181,Master03:2181</value>
</property>
</configuration>
vi hdfs-site.xml
<configuration>
<!--执行hdfs的nameservice为ns,和core-site.xml保持一致-->
<property>
<name>dfs.nameservices</name>
<value>ns</value>
</property>
<!--ns下有两个namenode,分别是nn1,nn2-->
<property>
<name>dfs.ha.namenodes.ns</name>
<value>nn1,nn2</value>
</property>
<!--nn1的RPC通信地址-->
<property>
<name>dfs.namenode.rpc-address.ns.nn1</name>
<value>Master01:9000</value>
</property>
<!--nn1的http通信地址-->
<property>
<name>dfs.namenode.http-address.ns.nn1</name>
<value>Master01:50070</value>
</property>
<!--nn2的RPC通信地址-->
<property>
<name>dfs.namenode.rpc-address.ns.nn2</name>
<value>Master02:9000</value>
</property>
<!--nn2的http通信地址-->
<property>
<name>dfs.namenode.http-address.ns.nn2</name>
<value>Master02:50070</value>
</property>
<!--指定namenode的元数据在JournalNode上的存放位置,这样,namenode2可以 从jn集群里获取最新的namenode的信息,达到热备的效果-->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://SparkNode01:8485;SparkNode02:8485;SparkNode03:8485/ns</value>
</property>
<!--指定JournalNode存放数据的位置-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/hadoop/journal</value>
</property>
<!--开启 namenode 故障时自动切换-->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!--配置切换的实现方式-->
<property>
<name>dfs.client.failover.proxy.provider.ns</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--配置隔离机制-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!--配置隔离机制的ssh登录秘钥所在的位置-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!--配置namenode数据存放的位置,可以不配置,如果不配置,默认用的是core-site.xml里配置的hadoop.tmp.dir的路径-->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///opt/hadoop/tmp/namenode</value>
</property>
<!--执行hdfs的nameservice为ns,和core-site.xml保持一致-->
<property>
<name>dfs.nameservices</name>
<value>ns</value>
</property>
<!--ns下有两个namenode,分别是nn1,nn2-->
<property>
<name>dfs.ha.namenodes.ns</name>
<value>nn1,nn2</value>
</property>
<!--nn1的RPC通信地址-->
<property>
<name>dfs.namenode.rpc-address.ns.nn1</name>
<value>Master01:9000</value>
</property>
<!--nn1的http通信地址-->
<property>
<name>dfs.namenode.http-address.ns.nn1</name>
<value>Master01:50070</value>
</property>
<!--nn2的RPC通信地址-->
<property>
<name>dfs.namenode.rpc-address.ns.nn2</name>
<value>Master02:9000</value>
</property>
<!--nn2的http通信地址-->
<property>
<name>dfs.namenode.http-address.ns.nn2</name>
<value>Master02:50070</value>
</property>
<!--指定namenode的元数据在JournalNode上的存放位置,这样,namenode2可以 从jn集群里获取最新的namenode的信息,达到热备的效果-->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://SparkNode01:8485;SparkNode02:8485;SparkNode03:8485/ns</value>
</property>
<!--指定JournalNode存放数据的位置-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/hadoop/journal</value>
</property>
<!--开启 namenode 故障时自动切换-->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!--配置切换的实现方式-->
<property>
<name>dfs.client.failover.proxy.provider.ns</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--配置隔离机制-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!--配置隔离机制的ssh登录秘钥所在的位置-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!--配置namenode数据存放的位置,可以不配置,如果不配置,默认用的是core-site.xml里配置的hadoop.tmp.dir的路径-->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///opt/hadoop/tmp/namenode</value>
</property>
</configuration>
cp mapred-site.xml.template mapred-site.xml
vi mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
vi yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<!--开启YARN HA -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!--指定两个 resourcemanager 的名称-->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!--配置rm1,rm2的主机-->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>Master01</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>Master03</value>
</property>
<!--开启yarn恢复机制-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--配置zookeeper的地址-->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>Master01:2181,Master02:2181,Master03:2181</value>
<description>For multiple zk services, separate them with comma</description>
</property>
<!--指定YARN HA的名称-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yarn-ha</value>
</property>
<!--指定yarn的老大resoucemanager的地址-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>Master01</value>
</property>
<property>
<!--NodeManager 获取数据的方式-->
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>
vi slaves
SparkNode01
SparkNode02
SparkNode03
配置 hadoop 的环境变量(可不配)
JAVA_HOME=/home/software/jdk1.8
HADOOP_HOME=/home/software/hadoop-2.7.4
CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
export JAVA_HOME PATH CLASSPATH HADOOP_HOME
根据配置文件,创建相关的文件夹,用来存放对应数据
mkdir -p /opt/hadoop/journal
mkdir -p /opt/hadoop/tmp
mkdir -p /opt/hadoop/tmp/namenode
mkdir -p /opt/hadoop/tmp/datanode
通过scp 命令,将hadoop安装目录远程copy到其他5台机器上
比如向Master02节点传输:
scp -r hadoop Master02:/opt/
启动zookeeper集群
在Zookeeper安装目录的bin目录下执行:
sh zkServer.sh start
利用sh zkServer.sh status检查状态
格式化zookeeper
在zk的leader节点上执行:
cd /opt/hadoop/bin
./hdfs zkfc -formatZK
,这个指令的作用是在zookeeper集群上生成ha节点 (ns节点)
进入hadoop安装目录的sbin目录,执行:
cd /opt/hadoop/sbin
./start-dfs.sh
cd /opt/hadoop/bin
hadoop namenode -format
在 01 节点上执行:
hadoop-daemon.sh start namenode
把02节点的namenode节点变为standby namenode节点
在02节点上执行:
hdfs namenode -bootstrapStandby
启动 02 节点的 namenode 节点
在02节点上执行:
hadoop-daemon.sh start namenode
在01节点上启动主Resourcemanager
在01节点上执行:
start-yarn.sh
启动成功后,04,05,06节点上应该有nodemanager 的进程
在 03 节点上启动副 Resoucemanager
在03节点上执行:
yarn-daemon.sh start resourcemanager
测试
输入地址: http://ip:50070 ,查看 namenode 的信息,是active状态 的
然后停掉01节点的namenode,此时返现standby的namenode变为active。
28、 查看 yarn 的管理地址
http://192.168.234.21:8088(节点01的8088端口)