Hadoop完全分布式集群搭建

Posted y_zilong

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Hadoop完全分布式集群搭建相关的知识,希望对你有一定的参考价值。

Hadoop完全分布式集群搭建

0、准备工作

版本选型:

系统名称版本
centos7.9
java1.8.0_291
mysql8.0.23
Hadoop2.7.7
Hive2.3.7

硬件规划:

组件10.0.7.110.0.7.210.0.7.3
JavaYYY
MySqlServer+ClientNN
HiveClientNN
Hadoop-YARNNodeManagerResourceManager+NodeManagerNodeManager
Hadoop-HDFSNameNode+DataNodeSeconderyNamenode+DatanodeDatanode

1、初始化环境(每个节点)

#关闭防火墙
systemctl disable --now firewalld.service

#关闭selinux
sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config  && setenforce 0

#修改主机名
hostnamectl set-hostname hadoop01
hostnamectl set-hostname hadoop02
hostnamectl set-hostname hadoop03

#hosts解析
cat >> /etc/hosts <<EOF
10.0.7.1 hadoop01
10.0.7.2 hadoop02
10.0.7.3 hadoop03
EOF
#配置ssh免密登陆
[root@hadoop01 ~]$cat hosts.txt 
10.0.7.1
10.0.7.2
10.0.7.3
[root@hadoop01 ~]$cat key.sh 
#!/bin/bash
#
set -x

passwd=("redhat")    #密码不同 passwd=("abc123" "abc1234")

PWD_IDX=$(($#passwd[@]-1))

usage()
    echo -e "\\t\\033[31m You entered an incorrent parameter \\033[0m"
    echo -e "\\t\\033[31m Such as: $(basename $0) iplist\\033[0m"
    exit 1


ins_pssh_sshpass()
    os_version=$(awk -F"[ =\\"]" '/^NAME/print $3' /etc/os-release)
    if [ "$os_version" = "Ubuntu" ];then
        dpkg -l | grep -q pssh && echo "pssh is already installed" || apt -y install pssh && ln -s /usr/bin/parallel-ssh  /usr/bin/pssh
        dpkg -l | grep -q sshpass && echo "sshpass is already installed" || apt -y install sshpass
    elif [ "$os_version" = "CentOS" ];then
        rpm -qa |grep -q pssh && echo "pssh is already installed" || yum -y install pssh
        rpm -qa |grep -q sshpass && echo "sshpass is already installed" || yum -y install sshpass
    fi


exec_ip()
    sort -g $INPUT_IP_FILE > not_check_ip
    for i in $(seq 0 1)
    do
        if [ ! -s not_check_ip ];then
          continue
        fi
        sshpass -p"$passwd[$i]" pssh -iAh not_check_ip -l root -p 100 -O StrictHostKeyChecking=no -t 5 "whoami" \\
        |grep SUCCESS \\
        |awk 'print $NF'|sort -g>ip.$i
        grep -vFf ip.$i not_check_ip |sort -g > not_check_ip.tmp
        mv not_check_ip.tmp not_check_ip
    done


exec_key()
    sshkey=$(ls ~/.ssh/id_rsa.pub)
    [ -f "$sshkey" ] && echo "key exists" || ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa
    for i in $(seq 0 $PWD_IDX)
    do
        if [ ! -e ip.$i ];then
            break
        fi
        if [ -e ip.$i -a $(wc -l ip.$i|awk 'print $1') -gt 0 ];then
            echo "---------------------------------------------------"

            for ip in $(cat ip.$i)
            do
                arg="ssh-copy-id -i $sshkey -o StrictHostKeyChecking=no root@$ip"
                echo "sshpass -p$passwd[$i] $arg"
                sshpass -p$passwd[$i] $arg
            done
        fi
    done

if [ $# -lt 1 ];then
    usage
fi
INPUT_IP_FILE=$1
ins_pssh_sshpass
if [ $? -ne 0 ];then
    exit
fi
exec_ip
exec_key
rm -f ip.*

[root@hadoop01 ~]$bash key.sh hosts.txt 
[root@hadoop01 ~]$scp hosts.txt key.sh 10.0.7.2:
[root@hadoop01 ~]$scp hosts.txt key.sh 10.0.7.3:
[root@hadoop02 ~]$bash key.sh hosts.txt
[root@hadoop03 ~]$bash key.sh hosts.txt

 2、安装jdk,每个节点

[root@localhost ~]$ cat install_jdk.sh 
#!/bin/bash
DIR=`pwd`
JDK_FILE="jdk-8u291-linux-x64.tar.gz"
JDK_DIR="/usr/local"

color () 
    RES_COL=60
    MOVE_TO_COL="echo -en \\\\033[$RES_COLG"
    SETCOLOR_SUCCESS="echo -en \\\\033[1;32m"
    SETCOLOR_FAILURE="echo -en \\\\033[1;32m"
    SETCOLOR_WARNING="echo -en \\\\033[1;33m"
    SETCOLOR_NORMAL="echo -en \\E[0m"
    echo -n "$2" && $MOVE_TO_COL
    echo -n "["
    if [ $1 = "success" -o $1 = "0" ] ;then
	    $SETCOLOR_SUCCESS
	    echo -n $" OK "
    elif [ $1 = "failure" -o $1 = "1" ] ; then
	    $SETCOLOR_FAILURE
	    echo -n $"FAILED"
    else
	    $SETCOLOR_WARNING
	    echo -n $"WARNING"
    fi
    $SETCOLOR_NORMAL
    echo -n "]"
    echo


install_jdk () 
if ! [ -f "$DIR/$JDK_FILE" ] ;then
	color 1 "$JDK_FILE 文件不存在"
	exit;
elif [ -d $JDK_DIR/jdk ] ;then
	color 1 "JDK 已经安装"
        exit;
else  
	[ -d "$JDK_DIR" ] || mkdir -pv $JDK_DIR
fi
tar xvf $DIR/$JDK_FILE -C $JDK_DIR
cd $JDK_DIR && ln -s jdk1.8.* jdk

cat > /etc/profile.d/jdk.sh << EOF
export JAVA_HOME=$JDK_DIR/jdk
export JRE_HOME=\\$JAVA_HOME/jre
export CLASSPATH=\\$JAVA_HOME/lib/:\\$JRE_HOME/lib/
export PATH=\\$PATH:\\$JAVA_HOME/bin
EOF
source /etc/profile.d/jdk.sh
java -version && color 0 "JDK 安装完成" ||  color 1 "JDK 安装失败" ; exit;



install_jdk

[root@localhost ~]$ bash install_jdk.sh
[root@localhost ~]$ source /etc/profile.d/jdk.sh
[root@localhost ~]$ java -version

3、安装mysql,hadoop01节点

[root@localhost ~]$ cat install_mysql-8.0.23.sh

. /etc/init.d/functions

SRC_DIR=`pwd`
#MYSQL='mysql-5.7.29-linux-glibc2.12-x86_64.tar.gz'
MYSQL='mysql-8.0.23-linux-glibc2.12-x86_64.tar.xz'
COLOR='echo -e \\E[01;31m'
END='\\E[0m'
MYSQL_ROOT_PASSWORD=redhat
 
check ()
 
if [ $UID -ne 0 ]; then
  action "当前用户不是root,安装失败" false
  exit 1
fi
 
cd  $SRC_DIR
if [ !  -e $MYSQL ];then
        $COLOR"缺少$MYSQL文件"$END
        $COLOR"请将相关软件放在$SRC_DIR目录下"$END
        exit
elif [ -e /usr/local/mysql ];then
        action "数据库已存在,安装失败" false
        exit
else
    return
fi
 
 
install_mysql()
    $COLOR"开始安装MySQL数据库..."$END
    yum  -y -q install libaio numactl-libs   libaio &> /dev/null
    cd $SRC_DIR
    tar xf $MYSQL -C /usr/local/
    MYSQL_DIR=`echo $MYSQL| sed -nr 's/^(.*[0-9]).*/\\1/p'`
    ln -s  /usr/local/$MYSQL_DIR /usr/local/mysql
    ln -s /usr/lib64/libtinfo.so.6.1 /usr/lib64/libtinfo.so.5
    mkdir -p /data/mysql
    id mysql &> /dev/null ||  useradd -s /sbin/nologin -r  mysql ; action "创建mysql用户"; 
    chown mysql.mysql /data/mysql
    chown -R mysql.mysql /usr/local/mysql/
    echo 'PATH=/usr/local/mysql/bin/:$PATH' > /etc/profile.d/mysql.sh
    .  /etc/profile.d/mysql.sh
    ln -s /usr/local/mysql/bin/* /usr/bin/
    cat > /etc/my.cnf <<-EOF
[mysqld]
user=mysql
server-id=1
basedir=/usr/local/mysql
datadir=/data/mysql
socket=/data/mysql/mysql.sock
                                                                                                  
log-error=/data/mysql/mysql.log
pid-file=/data/mysql/mysql.pid
character-set-server=utf8mb4
 
[mysql]
socket=/data/mysql/mysql.sock
default-character-set=utf8mb4      
 
#!includedir /etc/my.cnf.d
 
EOF
 
   cat > /etc/systemd/system/mysqld.service <<-EOF
[Unit]
Description=MySQL Server
After=network.target
After=syslog.target
[Install]
WantedBy=multi-user.target
[Service]
User=mysql
Group=mysql
ExecStart=/usr/local/mysql/bin/mysqld --defaults-file=/etc/my.cnf
 
EOF
 
    mysqld --initialize --user=mysql --basedir=/usr/local/mysql --datadir=/data/mysql
 
    systemctl enable --now mysqld
    
 
    [ $? -ne 0 ] &&  $COLOR"数据库启动失败,退出!"$END;exit; 
 
    sleep 5
 
    MYSQL_OLDPASSWORD=`awk '/A temporary password/print $NF' /data/mysql/mysql.log`
    mysqladmin  -uroot -p$MYSQL_OLDPASSWORD password $MYSQL_ROOT_PASSWORD &>/dev/null
    action "数据库安装完成" 

 
 
check
 
install_mysql

[root@localhost ~]$bash install_mysql-8.0.23.sh

[root@localhost ~]$awk '/A temporary password/print $NF' /data/mysql/mysql.log
>cTyAB.*:2jW
[root@localhost ~]$mysql -uroot -p'>cTyAB.*:2jW'
#更改mysql数据库密码
alter user root@localhost identified by 'redhat';
#更改mysql root用户远程登陆
mysql  -uroot -predhat
show databases;
user mysql;
select user,host from mysql.user;
update user set host='%' where user='root';
select user,host from mysql.user;

4、安装Hadoop

#下载官网 https://hadoop.apache.org/release/2.7.7.html
[root@hadoop01 ~]$cd /opt/
[root@hadoop01 opt]$ls
hadoop-2.7.7.tar.gz
[root@hadoop01 opt]$tar -xvf hadoop-2.7.7.tar.gz 

#配置hadoop的环境变量
[root@hadoop01 ~]$cat > /etc/profile.d/hadoop.sh <<EOF
export HADOOP_HOME=/opt/hadoop-2.7.7  #该目录为解压安装目录
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
EOF
[root@hadoop01 ~]$source /etc/profile.d/hadoop.sh

[root@hadoop01 ~]$hadoop version
Hadoop 2.7.7
Subversion Unknown -r c1aad84bd27cd79c3d1a7dd58202a8c3ee1ed3ac
Compiled by stevel on 2018-07-18T22:47Z
Compiled with protoc 2.5.0
From source with checksum 792e15d20b12c74bd6f19a1fb886490
This command was run using /opt/hadoop-2.7.7/share/hadoop/common/hadoop-common-2.7.7.jar
[root@hadoop01 ~]$

4.1配置hadoop环境脚本文件中的JAVA_HOME参数

#进入hadoop安装目录下的/etc/hadoop目录
[root@hadoop01 ~]$cd /opt/hadoop-2.7.7/etc/hadoop/

#分别在hadoop-env.sh、mapred-env.sh、yarn-env.sh 文件中添加或修改如下参数:
[root@hadoop01 hadoop]$vim hadoop-env.sh 
[root@hadoop01 hadoop]$vim mapred-env.sh 
[root@hadoop01 hadoop]$vim yarn-env.sh 

export JAVA_HOME="/usr/local/jdk"   #路径为jdk的安装路径

4.2修改hadoop配置文件

1、core-site.xml 
[root@hadoop01 hadoop]$vim core-site.xml 

<configuration>

<property>
    <name>fs.defaultFS</name>
    <value>hdfs://10.0.7.1:8020</value>
</property>

</configuration>

2、hdfs-site.xml 
[root@hadoop01 hadoop]$vim hdfs-site.xml 

<configuration>

<property>
    <name>dfs.blocksize</name>
    <value>268435456</value>
</property>

</configuration>

3、mapred-site.xml
[root@hadoop01 hadoop]$cp mapred-site.xml.template mapred-site.xml
[root@hadoop01 hadoop]$vim mapred-site.xml

<configuration>
<property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
</property>
<property>
    <name>mapreduce.jobhistory.address</name>
    <value>10.0.7.3:10020</value>
</property>
<property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>10.0.7.3:19888</value>
</property>

</configuration>

4、yarn-site.xml
[root@hadoop01 hadoop]$vim yarn-site.xml

<configuration>

<!-- Site specific YARN configuration properties -->
<property>
    <name>yarn.log-aggregation-enable</name>
    <value>false</value>
</property>
<property>
    <name>yarn.resourcemanager.hostname</name>
    <value>10.0.7.2</value>
</property>
<property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
</property>

</configuration>

4.3在etc/hadoop/slaves 增加子节点,使用IP地址即可

[root@hadoop01 hadoop]$vim slaves 
10.0.7.1
10.0.7.2
10.0.7.3

4.4依次分发配置文件到子节点

[root@hadoop01 hadoop]$rsync -av /opt/hadoop-2.7.7/etc/hadoop/ 10.0.7.2:/opt/hadoop-2.7.7/etc/hadoop/
[root@hadoop01 hadoop]$rsync -av /opt/hadoop-2.7.7/etc/hadoop/ 10.0.7.3:/opt/hadoop-2.7.7/etc/hadoop/

4.5格式化NameNode

[root@hadoop01 hadoop-2.7.7]$cd /opt/hadoop-2.7.7/
[root@hadoop01 hadoop-2.7.7]$./bin/hdfs namenode -format

4.6启动HDFS

[root@hadoop01 hadoop-2.7.7]$./sbin/start-dfs.sh 

4.7启动YARN

[root@hadoop01 hadoop-2.7.7]$./sbin/start-yarn.sh

4.8验证hadoop启动成功

#主节点
[root@hadoop01 hadoop]$jps
22720 DataNode
23765 NodeManager
22620 NameNode
24236 Jps
22910 SecondaryNameNode

#从节点
[root@hadoop02 opt]$jps
23536 Jps
23153 NodeManager
23046 ResourceManager
21767 DataNode

[root@hadoop03 ~]$jps
22704 NodeManager
22843 Jps
21997 DataNode

以上是关于Hadoop完全分布式集群搭建的主要内容,如果未能解决你的问题,请参考以下文章

Hadoop完全分布式集群搭建

集群搭建Hadoop搭建HDFS(完全分布式)

hadoop-3.3.3完全分布式集群搭建

Hadoop完全分布式环境搭建

完全分布式Hadoop集群搭建

Hadoop完全分布式集群搭建