Hadoop双节点&Hive搭建自动化脚本

Posted y_zilong

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Hadoop双节点&Hive搭建自动化脚本相关的知识,希望对你有一定的参考价值。

初始化环境脚本

jdk安装脚本

hadoop双节点脚本

hive安装脚本

cat install_jdk-hadoop-hive.sh
#!/bin/bash
DIR=/usr/local/src
JDK_FILE="jdk-8u291-linux-x64.tar.gz"
JDK_DIR="/usr/local"

#hadoop node节点
HADOOPnode1=10.0.7.1
HADOOPnode2=10.0.7.2

#安装hadoop多节点时候,需要先在hadoop 2上执行,然后在在hadoop1上执行

HADOOP_FILE='hadoop-2.7.7.tar.gz'
HADOOP_DIR='/usr/local'

HIVE_FILE='apache-hive-2.3.9-bin.tar.gz'
HIVE_DIR='/usr/local'
mysqlip=10.0.7.1
HIVEmima=hive

#安装hive 之前先操作mysql数据库,重新初始化时候必须清空hive库
#create user 'hive' identified by 'hive';
#grant all privileges on *.* to 'hive'@'%' with grant option;
#create database hive;
#alter database hive character set latin1;


color () 
    RES_COL=60
    MOVE_TO_COL="echo -en \\\\033[$RES_COLG"
    SETCOLOR_SUCCESS="echo -en \\\\033[1;32m"
    SETCOLOR_FAILURE="echo -en \\\\033[1;32m"
    SETCOLOR_WARNING="echo -en \\\\033[1;33m"
    SETCOLOR_NORMAL="echo -en \\E[0m"
    echo -n "$2" && $MOVE_TO_COL
    echo -n "["
    if [ $1 = "success" -o $1 = "0" ] ;then
	    $SETCOLOR_SUCCESS
	    echo -n $" OK "
    elif [ $1 = "failure" -o $1 = "1" ] ; then
	    $SETCOLOR_FAILURE
	    echo -n $"FAILED"
    else
	    $SETCOLOR_WARNING
	    echo -n $"WARNING"
    fi
    $SETCOLOR_NORMAL
    echo -n "]"
    echo


initialize () 
sed -i '/SELINUX=enforcing/c SELINUX=disabled' /etc/selinux/config &&setenforce 0
systemctl disable --now firewalld


install_jdk () 
if ! [ -f "$DIR/$JDK_FILE" ] ;then
	color 1 "$JDK_FILE 文件不存在"
	exit;
elif [ -d $JDK_DIR/jdk ] ;then
	color 1 "JDK 已经安装"
        exit;
else  
	[ -d "$JDK_DIR" ] || mkdir -pv $JDK_DIR
fi

tar xvf $DIR/$JDK_FILE -C $JDK_DIR
cd $JDK_DIR && ln -s jdk1.8.* jdk

cat > /etc/profile.d/jdk.sh << EOF
export JAVA_HOME=$JDK_DIR/jdk
export JRE_HOME=\\$JAVA_HOME/jre
export CLASSPATH=\\$JAVA_HOME/lib/:\\$JRE_HOME/lib/
export PATH=\\$PATH:\\$JAVA_HOME/bin
EOF
source /etc/profile.d/jdk.sh
java -version && color 0 "JDK 安装完成" ||  color 1 "JDK 安装失败" ; exit;

sleep 3

source /etc/profile.d/jdk.sh
yum install -y rsync

echo $HADOOPnode1 hadoop1 >> /etc/hosts
echo $HADOOPnode2 hadoop2 >> /etc/hosts



install_hadoop2 () 
hostnamectl set-hostname hadoop2

if ! [ -f "$DIR/$HADOOP_FILE" ] ;then
        color 1 "$HADOOP_FILE 文件不存在"
        wget https://archive.apache.org/dist/hadoop/common/hadoop-2.7.7/hadoop-2.7.7.tar.gz
        exit;
elif [ -d $HADOOP_DIR/hadoop ] ;then
        color 1 "Hadoop 已经安装"
        exit;
else
        [ -d "$HADOOP_DIR" ] || mkdir -pv $HADOOP_DIR
fi

tar -xvf $DIR/$HADOOP_FILE -C $HADOOP_DIR

cd $HADOOP_DIR && ln -sv hadoop* hadoop

cat > /etc/profile.d/hadoop.sh << EOF
export HADOOP_HOME=$HADOOP_DIR/hadoop
export PATH=\\$PATH:\\$HADOOP_HOME/bin
export PATH=\\$PATH:\\$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=\\$HADOOP_HOME/etc/hadoop
EOF

source /etc/profile.d/hadoop.sh
hadoop version && color 0 "Hadoop2 安装完成" ||  color 1 "Hadoop 安装失败" ; exit;

sleep 3
source /etc/profile.d/hadoop.sh



install_hadoop1 () 

hostnamectl set-hostname hadoop2

if ! [ -f "$DIR/$HADOOP_FILE" ] ;then
	color 1 "$HADOOP_FILE 文件不存在"
        wget https://archive.apache.org/dist/hadoop/common/hadoop-2.7.7/hadoop-2.7.7.tar.gz
	exit;
elif [ -d $HADOOP_DIR/hadoop ] ;then
	color 1 "Hadoop 已经安装"
        exit;
else  
	[ -d "$HADOOP_DIR" ] || mkdir -pv $HADOOP_DIR
fi

tar -xvf $DIR/$HADOOP_FILE -C $HADOOP_DIR

cd $HADOOP_DIR && ln -sv hadoop* hadoop

cat > /etc/profile.d/hadoop.sh << EOF
export HADOOP_HOME=$HADOOP_DIR/hadoop
export PATH=\\$PATH:\\$HADOOP_HOME/bin
export PATH=\\$PATH:\\$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=\\$HADOOP_HOME/etc/hadoop
EOF

source /etc/profile.d/hadoop.sh

hadoop version && color 0 "Hadoop1 安装完成" ||  color 1 "Hadoop 安装失败" ; exit;

sleep 3
source /etc/profile.d/hadoop.sh
#配置hadoop环境脚本文件中的JAVA_HOME参数
mkdir $HADOOP_DIR/hadoop/tmp
cd $HADOOP_DIR/hadoop/etc/hadoop/

sed -i 's/export JAVA_HOME=$JAVA_HOME/export JAVA_HOME="\\/usr\\/local\\/jdk"/' $HADOOP_DIR/hadoop/etc/hadoop/hadoop-env.sh

echo export JAVA_HOME="/usr/local/jdk" >>  $HADOOP_DIR/hadoop/etc/hadoop/mapred-env.sh
echo export JAVA_HOME="/usr/local/jdk" >>  $HADOOP_DIR/hadoop/etc/hadoop/yarn-env.sh

#修改hadoop配置文件

#core-site.xml
cat > $HADOOP_DIR/hadoop/etc/hadoop/core-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
<!-- 设置默认使用的文件系统 Hadoop支持file、HDFS、GFS、ali|Amazon云等文件系统 -->
<property>
    <name>fs.defaultFS</name>
    <value>hdfs://$HADOOPnode1:8020</value>
</property>
<property>
    <!-- 保存临时文件目录,需先在/opt/hadoop-2.7.7下创建tmp目录 -->
    <name>hadoop.tmp.dir</name>
    <value>$HADOOP_DIR/hadoop/tmp</value>
</property>

<property>
    <name>hadoop.proxyuser.root.hosts</name>
    <value>*</value>
</property>
<property>
    <name>hadoop.proxyuser.root.groups</name>
    <value>*</value>
</property>

</configuration>
EOF
#hdfs-site.xml  hdfs文件系统模块设置

cat > $HADOOP_DIR/hadoop/etc/hadoop/hdfs-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 设置SNN进程运行机器位置信息 -->
<property>
  <name>dfs.namenode.secondary.http-address</name>
  <value>$HADOOPnode2:9868</value>
</property>

<!-- 必须将dfs.webhdfs.enabled属性设置为true,否则就不能使用webhdfs的LISTSTATUS、LISTFILESTATUS等需要列出文件、文件夹状态的命令,因为这些信息都是由namenode来保存的。 -->
<property>
  <name>dfs.webhdfs.enabled</name>
  <value>true</value>
</property>

</configuration>
EOF
#mapred-site.xml #MapReduce模块设置
cat > $HADOOP_DIR/hadoop/etc/hadoop/mapred-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
</property>
<!-- MR程序历史服务地址 -->
<property>
    <name>mapreduce.jobhistory.address</name>
    <value>$HADOOPnode2:10020</value>
</property>
<!-- MR程序历史服务web端地址 -->
<property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>$HADOOPnode2:19888</value>
</property>
</configuration>
EOF

#yarn-site.xml
cat > $HADOOP_DIR/hadoop/etc/hadoop/yarn-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 设置YARN集群主角色运行集群位置 -->
<property>
    <name>yarn.resourcemanager.hostname</name>
    <value>$HADOOPnode1</value>
</property>

<property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
</property>

<property>
    <name>yarn.log-aggregation-enable</name>
    <value>false</value>
</property>
</configuration>
EOF
#在etc/hadoop/slaves增加子节点,使用IP地址即可
cd $HADOOP_DIR/hadoop/etc/hadoop/
cat > $HADOOP_DIR/hadoop/etc/hadoop/slaves << EOF
$HADOOPnode1
$HADOOPnode2
EOF

#依次分发配置文件到子节点

rsync -av $HADOOP_DIR/hadoop/etc/hadoop/ $HADOOPnode2:$HADOOP_DIR/hadoop/etc/hadoop/

sleep 5
#格式化NameNode
cd $HADOOP_DIR/hadoop/
./bin/hdfs namenode -format

#启动HDFS
$HADOOP_DIR/hadoop/sbin/start-dfs.sh
#启动YARN
$HADOOP_DIR/hadoop/sbin/start-yarn.sh

jps 
sleep 3


install_Hive () 
if ! [ -f "$DIR/$HIVE_FILE" ] ;then
        color 1 "$HIVE_FILE 文件不存在"
        wget https://mirror.tuna.tsinghua.edu.cn/apache/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz --no-check-certificate
        exit;
elif [ -d $HIVE_DIR/hive ] ;then
        color 1 "Hive 已经安装"
        exit;
else
        [ -d "$HIVE_DIR" ] || mkdir -pv $HIVE_DIR
fi
tar -xvf $DIR/$HIVE_FILE -C $HIVE_DIR
cd $HIVE_DIR && ln -sv apache-hive* hive

#下载mysql jar包,将jar包放在hive lib目录下
cd $HIVE_DIR/hive/lib/
wget https://mirror.tuna.tsinghua.edu.cn/mysql/downloads/Connector-J/mysql-connector-java-8.0.26.tar.gz --no-check-certificate

tar -xvf mysql-connector-java-8.0.26.tar.gz
mv mysql-connector-java-8.0.26/mysql-connector-java-8.0.26.jar .
rm -rf mysql-connector-java-8.0.26 mysql-connector-java-8.0.26.tar.gz
#升级hive中的guava依赖的hadoop版本
cd $HIVE_DIR/hive/lib/
mv guava*.jar guava.jar-bak
cd $HADOOP_DIR/hadoop/share/hadoop/hdfs/lib/

ln -sv $HADOOP_DIR/hadoop/share/hadoop/hdfs/lib/guava-*.jar $HIVE_DIR/hive/lib/

cat > /etc/profile.d/hive.sh << EOF
export HIVE_HOME=$HIVE_DIR/hive
export PATH=\\$PATH:\\$HIVE_HOME/bin
EOF
source /etc/profile.d/hive.sh

sleep 1

source /etc/profile.d/hive.sh

color 0 "Hive 安装完成"
#关闭hadoop安全模式
hdfs dfsadmin -safemode leave
#创建hive数仓目录
hadoop fs -mkdir -p $HIVE_DIR/hive/warehouse
hadoop fs -mkdir -p $HIVE_DIR/hive/tmp
#授权
hadoop fs -chmod 777 $HIVE_DIR/hive/warehouse
hadoop fs -chmod 777 $HIVE_DIR/hive/tmp
#检查这两个目录是否成功创建
hadoop fs -ls $HIVE_DIR/hive/

#配置hive环境
cd $HIVE_DIR/hive/conf/
cp hive-log4j2.properties.template hive-log4j2.properties
sed -i 's/property.hive.log.dir = $sys:java.io.tmpdir\\/$sys:user.name/property.hive.log.dir = $HIVE_DIR\\/hive\\/log/' hive-log4j2.properties

cp hive-exec-log4j2.properties.template hive-exec-log4j2.properties
sed -i 's/property.hive.log.dir = $sys:java.io.tmpdir\\/$sys:user.name/property.hive.log.dir =$HIVE_DIR\\/hive\\/exelog/' hive-exec-log4j2.properties

#配置hive-site文件
cd $HIVE_DIR/hive/conf
cat > $HIVE_DIR/hive/conf/hive-site.xml << EOF
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <!--Hive用来存储不同阶段的MR作业的执行计划的目录,同时也存储中间输出结果 -->
    <property>
        <name>hive.exec.scratchdir</name>
        <value>$HIVE_DIR/hive/tmp</value>
    </property>
    <!--Hive用来存储不同阶段的MR作业的执行计划的目录,创建写权限 -->
    <property>
        <name>hive.scratch.dir.permission</name>
        <value>733</value>
    </property>
    <!--Default数据仓库原始位置是在hdfs上:/user/hive/warehouse路径下 -->
    <property>
        <name>hive.metastore.warehouse.dir</name>
        <value>$HIVE_DIR/hive/warehouse</value>
    </property>
    <!--连接hive元数据数据库地址,名称 -->
    <property>
        <name>javax.jdo.option.ConnectionURL</name>
        <value>jdbc:mysql://$MYSQLip:3306/hive?createDatabaseIfNotExist=true</value>
    </property>
    <!--连接数据库驱动 -->
    <property>
        <name>javax.jdo.option.ConnectionDriverName</name>
        <value>com.mysql.cj.jdbc.Driver</value>
    </property>
    <!--连接数据库用户名称 -->
    <property>
        <name>javax.jdo.option.ConnectionUserName</name>
        <value>hive</value>
    </property>
    <!--连接数据库用户密码 -->
    <property>
        <name>javax.jdo.option.ConnectionPassword</name>
        <value>$HIVEmima</value>
    </property>
    <!--客户端显示当前查询表的头信息 -->
    <property>
        <name>hive.cli.print.header</name>
        <value>true</value>
    </property>
    <!--客户端显示当前数据库名称信息 -->
    <property>
        <name>hive.cli.print.current.db</name>
        <value>true</value>
    </property>
    <property>
        <name>hive.server2.enable.doAs</name>
        <value>false</value>
    </property>
    <property>
        <name>hive.metastore.sasl.enabled</name>
        <value>false</value>
    </property>
    <property>
        <name>hive.server2.authenication</name>
        <value>NONE</value>
    </property>
</configuration>

EOF
#启动前先检查hadoop是否已经启动
#初始化Metastore元信息Schema
$HIVE_DIR/hive/bin/schematool -initSchema -dbType mysql

#后台启动hiveserver2
mkdir $HIVE_DIR/hive/log
nohup $HIVE_DIR/hive/bin/hiveserver2 >> $HIVE_DIR/hive/log/nohup.hs2.log 2>&1 &

#后台启动metastore
nohup hive --service metastore >> $HIVE_DIR/hive/log/metastore.log 2>&1 &

#查看hive的两个进程
ps -ef |grep hive



stop_hadoop () 
$HADOOP_DIR/hadoop/sbin/stop-dfs.sh
$HADOOP_DIR/hadoop/sbin/stop-yarn.sh


start_hadoop () 
$HADOOP_DIR/hadoop/sbin/start-dfs.sh
$HADOOP_DIR/hadoop/sbin/start-yarn.sh 


start_hive () 

nohup $HIVE_DIR/hive/bin/hiveserver2 >> $HIVE_DIR/hive/log/nohup.hs2.log 2>&1 &

nohup hive --service metastore >> $HIVE_DIR/hive/log/metastore.log 2>&1 &



#hadoop2上执行,把下面注释打开
#initialize
#install_jdk
#install_hadoop2

#hadoop1上执行,把下面注释打开
#initialize
#install_jdk
#install_hadoop1
#install_Hive

#启动、关停的命令
#hadoop
#stop_hadoop
#start_hadoop
#hive
#start_hive

以上是关于Hadoop双节点&Hive搭建自动化脚本的主要内容,如果未能解决你的问题,请参考以下文章

hadoop生态搭建(3节点)-07.hive配置

hadoop+hive+spark搭建

Hadoop Zookeeper Hbase Hive 分布式集群搭建实例

Linux搭建Hadoop双节点集群及服务器扩展超详细

hadoop_hive_zookeeper_kafka_spark平台搭建

hadoop + spark+ hive 集群搭建(apache版本)