Hadoop完全分布式集群搭建
Posted y_zilong
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Hadoop完全分布式集群搭建相关的知识,希望对你有一定的参考价值。
Hadoop完全分布式集群搭建
0、准备工作
版本选型:
系统名称 | 版本 |
---|---|
centos | 7.9 |
java | 1.8.0_291 |
mysql | 8.0.23 |
Hadoop | 2.7.7 |
Hive | 2.3.7 |
硬件规划:
组件 | 10.0.7.1 | 10.0.7.2 | 10.0.7.3 |
---|---|---|---|
Java | Y | Y | Y |
MySql | Server+Client | N | N |
Hive | Client | N | N |
Hadoop-YARN | NodeManager | ResourceManager+NodeManager | NodeManager |
Hadoop-HDFS | NameNode+DataNode | SeconderyNamenode+Datanode | Datanode |
1、初始化环境(每个节点)
#关闭防火墙
systemctl disable --now firewalld.service
#关闭selinux
sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config && setenforce 0
#修改主机名
hostnamectl set-hostname hadoop01
hostnamectl set-hostname hadoop02
hostnamectl set-hostname hadoop03
#hosts解析
cat >> /etc/hosts <<EOF
10.0.7.1 hadoop01
10.0.7.2 hadoop02
10.0.7.3 hadoop03
EOF
#配置ssh免密登陆
[root@hadoop01 ~]$cat hosts.txt
10.0.7.1
10.0.7.2
10.0.7.3
[root@hadoop01 ~]$cat key.sh
#!/bin/bash
#
set -x
passwd=("redhat") #密码不同 passwd=("abc123" "abc1234")
PWD_IDX=$(($#passwd[@]-1))
usage()
echo -e "\\t\\033[31m You entered an incorrent parameter \\033[0m"
echo -e "\\t\\033[31m Such as: $(basename $0) iplist\\033[0m"
exit 1
ins_pssh_sshpass()
os_version=$(awk -F"[ =\\"]" '/^NAME/print $3' /etc/os-release)
if [ "$os_version" = "Ubuntu" ];then
dpkg -l | grep -q pssh && echo "pssh is already installed" || apt -y install pssh && ln -s /usr/bin/parallel-ssh /usr/bin/pssh
dpkg -l | grep -q sshpass && echo "sshpass is already installed" || apt -y install sshpass
elif [ "$os_version" = "CentOS" ];then
rpm -qa |grep -q pssh && echo "pssh is already installed" || yum -y install pssh
rpm -qa |grep -q sshpass && echo "sshpass is already installed" || yum -y install sshpass
fi
exec_ip()
sort -g $INPUT_IP_FILE > not_check_ip
for i in $(seq 0 1)
do
if [ ! -s not_check_ip ];then
continue
fi
sshpass -p"$passwd[$i]" pssh -iAh not_check_ip -l root -p 100 -O StrictHostKeyChecking=no -t 5 "whoami" \\
|grep SUCCESS \\
|awk 'print $NF'|sort -g>ip.$i
grep -vFf ip.$i not_check_ip |sort -g > not_check_ip.tmp
mv not_check_ip.tmp not_check_ip
done
exec_key()
sshkey=$(ls ~/.ssh/id_rsa.pub)
[ -f "$sshkey" ] && echo "key exists" || ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa
for i in $(seq 0 $PWD_IDX)
do
if [ ! -e ip.$i ];then
break
fi
if [ -e ip.$i -a $(wc -l ip.$i|awk 'print $1') -gt 0 ];then
echo "---------------------------------------------------"
for ip in $(cat ip.$i)
do
arg="ssh-copy-id -i $sshkey -o StrictHostKeyChecking=no root@$ip"
echo "sshpass -p$passwd[$i] $arg"
sshpass -p$passwd[$i] $arg
done
fi
done
if [ $# -lt 1 ];then
usage
fi
INPUT_IP_FILE=$1
ins_pssh_sshpass
if [ $? -ne 0 ];then
exit
fi
exec_ip
exec_key
rm -f ip.*
[root@hadoop01 ~]$bash key.sh hosts.txt
[root@hadoop01 ~]$scp hosts.txt key.sh 10.0.7.2:
[root@hadoop01 ~]$scp hosts.txt key.sh 10.0.7.3:
[root@hadoop02 ~]$bash key.sh hosts.txt
[root@hadoop03 ~]$bash key.sh hosts.txt
2、安装jdk,每个节点
[root@localhost ~]$ cat install_jdk.sh
#!/bin/bash
DIR=`pwd`
JDK_FILE="jdk-8u291-linux-x64.tar.gz"
JDK_DIR="/usr/local"
color ()
RES_COL=60
MOVE_TO_COL="echo -en \\\\033[$RES_COLG"
SETCOLOR_SUCCESS="echo -en \\\\033[1;32m"
SETCOLOR_FAILURE="echo -en \\\\033[1;32m"
SETCOLOR_WARNING="echo -en \\\\033[1;33m"
SETCOLOR_NORMAL="echo -en \\E[0m"
echo -n "$2" && $MOVE_TO_COL
echo -n "["
if [ $1 = "success" -o $1 = "0" ] ;then
$SETCOLOR_SUCCESS
echo -n $" OK "
elif [ $1 = "failure" -o $1 = "1" ] ; then
$SETCOLOR_FAILURE
echo -n $"FAILED"
else
$SETCOLOR_WARNING
echo -n $"WARNING"
fi
$SETCOLOR_NORMAL
echo -n "]"
echo
install_jdk ()
if ! [ -f "$DIR/$JDK_FILE" ] ;then
color 1 "$JDK_FILE 文件不存在"
exit;
elif [ -d $JDK_DIR/jdk ] ;then
color 1 "JDK 已经安装"
exit;
else
[ -d "$JDK_DIR" ] || mkdir -pv $JDK_DIR
fi
tar xvf $DIR/$JDK_FILE -C $JDK_DIR
cd $JDK_DIR && ln -s jdk1.8.* jdk
cat > /etc/profile.d/jdk.sh << EOF
export JAVA_HOME=$JDK_DIR/jdk
export JRE_HOME=\\$JAVA_HOME/jre
export CLASSPATH=\\$JAVA_HOME/lib/:\\$JRE_HOME/lib/
export PATH=\\$PATH:\\$JAVA_HOME/bin
EOF
source /etc/profile.d/jdk.sh
java -version && color 0 "JDK 安装完成" || color 1 "JDK 安装失败" ; exit;
install_jdk
[root@localhost ~]$ bash install_jdk.sh
[root@localhost ~]$ source /etc/profile.d/jdk.sh
[root@localhost ~]$ java -version
3、安装mysql,hadoop01节点
[root@localhost ~]$ cat install_mysql-8.0.23.sh
. /etc/init.d/functions
SRC_DIR=`pwd`
#MYSQL='mysql-5.7.29-linux-glibc2.12-x86_64.tar.gz'
MYSQL='mysql-8.0.23-linux-glibc2.12-x86_64.tar.xz'
COLOR='echo -e \\E[01;31m'
END='\\E[0m'
MYSQL_ROOT_PASSWORD=redhat
check ()
if [ $UID -ne 0 ]; then
action "当前用户不是root,安装失败" false
exit 1
fi
cd $SRC_DIR
if [ ! -e $MYSQL ];then
$COLOR"缺少$MYSQL文件"$END
$COLOR"请将相关软件放在$SRC_DIR目录下"$END
exit
elif [ -e /usr/local/mysql ];then
action "数据库已存在,安装失败" false
exit
else
return
fi
install_mysql()
$COLOR"开始安装MySQL数据库..."$END
yum -y -q install libaio numactl-libs libaio &> /dev/null
cd $SRC_DIR
tar xf $MYSQL -C /usr/local/
MYSQL_DIR=`echo $MYSQL| sed -nr 's/^(.*[0-9]).*/\\1/p'`
ln -s /usr/local/$MYSQL_DIR /usr/local/mysql
ln -s /usr/lib64/libtinfo.so.6.1 /usr/lib64/libtinfo.so.5
mkdir -p /data/mysql
id mysql &> /dev/null || useradd -s /sbin/nologin -r mysql ; action "创建mysql用户";
chown mysql.mysql /data/mysql
chown -R mysql.mysql /usr/local/mysql/
echo 'PATH=/usr/local/mysql/bin/:$PATH' > /etc/profile.d/mysql.sh
. /etc/profile.d/mysql.sh
ln -s /usr/local/mysql/bin/* /usr/bin/
cat > /etc/my.cnf <<-EOF
[mysqld]
user=mysql
server-id=1
basedir=/usr/local/mysql
datadir=/data/mysql
socket=/data/mysql/mysql.sock
log-error=/data/mysql/mysql.log
pid-file=/data/mysql/mysql.pid
character-set-server=utf8mb4
[mysql]
socket=/data/mysql/mysql.sock
default-character-set=utf8mb4
#!includedir /etc/my.cnf.d
EOF
cat > /etc/systemd/system/mysqld.service <<-EOF
[Unit]
Description=MySQL Server
After=network.target
After=syslog.target
[Install]
WantedBy=multi-user.target
[Service]
User=mysql
Group=mysql
ExecStart=/usr/local/mysql/bin/mysqld --defaults-file=/etc/my.cnf
EOF
mysqld --initialize --user=mysql --basedir=/usr/local/mysql --datadir=/data/mysql
systemctl enable --now mysqld
[ $? -ne 0 ] && $COLOR"数据库启动失败,退出!"$END;exit;
sleep 5
MYSQL_OLDPASSWORD=`awk '/A temporary password/print $NF' /data/mysql/mysql.log`
mysqladmin -uroot -p$MYSQL_OLDPASSWORD password $MYSQL_ROOT_PASSWORD &>/dev/null
action "数据库安装完成"
check
install_mysql
[root@localhost ~]$bash install_mysql-8.0.23.sh
[root@localhost ~]$awk '/A temporary password/print $NF' /data/mysql/mysql.log
>cTyAB.*:2jW
[root@localhost ~]$mysql -uroot -p'>cTyAB.*:2jW'
#更改mysql数据库密码
alter user root@localhost identified by 'redhat';
#更改mysql root用户远程登陆
mysql -uroot -predhat
show databases;
user mysql;
select user,host from mysql.user;
update user set host='%' where user='root';
select user,host from mysql.user;
4、安装Hadoop
#下载官网 https://hadoop.apache.org/release/2.7.7.html
[root@hadoop01 ~]$cd /opt/
[root@hadoop01 opt]$ls
hadoop-2.7.7.tar.gz
[root@hadoop01 opt]$tar -xvf hadoop-2.7.7.tar.gz
#配置hadoop的环境变量
[root@hadoop01 ~]$cat > /etc/profile.d/hadoop.sh <<EOF
export HADOOP_HOME=/opt/hadoop-2.7.7 #该目录为解压安装目录
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
EOF
[root@hadoop01 ~]$source /etc/profile.d/hadoop.sh
[root@hadoop01 ~]$hadoop version
Hadoop 2.7.7
Subversion Unknown -r c1aad84bd27cd79c3d1a7dd58202a8c3ee1ed3ac
Compiled by stevel on 2018-07-18T22:47Z
Compiled with protoc 2.5.0
From source with checksum 792e15d20b12c74bd6f19a1fb886490
This command was run using /opt/hadoop-2.7.7/share/hadoop/common/hadoop-common-2.7.7.jar
[root@hadoop01 ~]$
4.1配置hadoop环境脚本文件中的JAVA_HOME参数
#进入hadoop安装目录下的/etc/hadoop目录
[root@hadoop01 ~]$cd /opt/hadoop-2.7.7/etc/hadoop/
#分别在hadoop-env.sh、mapred-env.sh、yarn-env.sh 文件中添加或修改如下参数:
[root@hadoop01 hadoop]$vim hadoop-env.sh
[root@hadoop01 hadoop]$vim mapred-env.sh
[root@hadoop01 hadoop]$vim yarn-env.sh
export JAVA_HOME="/usr/local/jdk" #路径为jdk的安装路径
4.2修改hadoop配置文件
1、core-site.xml
[root@hadoop01 hadoop]$vim core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://10.0.7.1:8020</value>
</property>
</configuration>
2、hdfs-site.xml
[root@hadoop01 hadoop]$vim hdfs-site.xml
<configuration>
<property>
<name>dfs.blocksize</name>
<value>268435456</value>
</property>
</configuration>
3、mapred-site.xml
[root@hadoop01 hadoop]$cp mapred-site.xml.template mapred-site.xml
[root@hadoop01 hadoop]$vim mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>10.0.7.3:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>10.0.7.3:19888</value>
</property>
</configuration>
4、yarn-site.xml
[root@hadoop01 hadoop]$vim yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>false</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>10.0.7.2</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
4.3在etc/hadoop/slaves 增加子节点,使用IP地址即可
[root@hadoop01 hadoop]$vim slaves
10.0.7.1
10.0.7.2
10.0.7.3
4.4依次分发配置文件到子节点
[root@hadoop01 hadoop]$rsync -av /opt/hadoop-2.7.7/etc/hadoop/ 10.0.7.2:/opt/hadoop-2.7.7/etc/hadoop/
[root@hadoop01 hadoop]$rsync -av /opt/hadoop-2.7.7/etc/hadoop/ 10.0.7.3:/opt/hadoop-2.7.7/etc/hadoop/
4.5格式化NameNode
[root@hadoop01 hadoop-2.7.7]$cd /opt/hadoop-2.7.7/
[root@hadoop01 hadoop-2.7.7]$./bin/hdfs namenode -format
4.6启动HDFS
[root@hadoop01 hadoop-2.7.7]$./sbin/start-dfs.sh
4.7启动YARN
[root@hadoop01 hadoop-2.7.7]$./sbin/start-yarn.sh
4.8验证hadoop启动成功
#主节点
[root@hadoop01 hadoop]$jps
22720 DataNode
23765 NodeManager
22620 NameNode
24236 Jps
22910 SecondaryNameNode
#从节点
[root@hadoop02 opt]$jps
23536 Jps
23153 NodeManager
23046 ResourceManager
21767 DataNode
[root@hadoop03 ~]$jps
22704 NodeManager
22843 Jps
21997 DataNode
以上是关于Hadoop完全分布式集群搭建的主要内容,如果未能解决你的问题,请参考以下文章