Hadoop双节点&Hive搭建自动化脚本
Posted y_zilong
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Hadoop双节点&Hive搭建自动化脚本相关的知识,希望对你有一定的参考价值。
初始化环境脚本
jdk安装脚本
hadoop双节点脚本
hive安装脚本
cat install_jdk-hadoop-hive.sh
#!/bin/bash
DIR=/usr/local/src
JDK_FILE="jdk-8u291-linux-x64.tar.gz"
JDK_DIR="/usr/local"
#hadoop node节点
HADOOPnode1=10.0.7.1
HADOOPnode2=10.0.7.2
#安装hadoop多节点时候,需要先在hadoop 2上执行,然后在在hadoop1上执行
HADOOP_FILE='hadoop-2.7.7.tar.gz'
HADOOP_DIR='/usr/local'
HIVE_FILE='apache-hive-2.3.9-bin.tar.gz'
HIVE_DIR='/usr/local'
mysqlip=10.0.7.1
HIVEmima=hive
#安装hive 之前先操作mysql数据库,重新初始化时候必须清空hive库
#create user 'hive' identified by 'hive';
#grant all privileges on *.* to 'hive'@'%' with grant option;
#create database hive;
#alter database hive character set latin1;
color ()
RES_COL=60
MOVE_TO_COL="echo -en \\\\033[$RES_COLG"
SETCOLOR_SUCCESS="echo -en \\\\033[1;32m"
SETCOLOR_FAILURE="echo -en \\\\033[1;32m"
SETCOLOR_WARNING="echo -en \\\\033[1;33m"
SETCOLOR_NORMAL="echo -en \\E[0m"
echo -n "$2" && $MOVE_TO_COL
echo -n "["
if [ $1 = "success" -o $1 = "0" ] ;then
$SETCOLOR_SUCCESS
echo -n $" OK "
elif [ $1 = "failure" -o $1 = "1" ] ; then
$SETCOLOR_FAILURE
echo -n $"FAILED"
else
$SETCOLOR_WARNING
echo -n $"WARNING"
fi
$SETCOLOR_NORMAL
echo -n "]"
echo
initialize ()
sed -i '/SELINUX=enforcing/c SELINUX=disabled' /etc/selinux/config &&setenforce 0
systemctl disable --now firewalld
install_jdk ()
if ! [ -f "$DIR/$JDK_FILE" ] ;then
color 1 "$JDK_FILE 文件不存在"
exit;
elif [ -d $JDK_DIR/jdk ] ;then
color 1 "JDK 已经安装"
exit;
else
[ -d "$JDK_DIR" ] || mkdir -pv $JDK_DIR
fi
tar xvf $DIR/$JDK_FILE -C $JDK_DIR
cd $JDK_DIR && ln -s jdk1.8.* jdk
cat > /etc/profile.d/jdk.sh << EOF
export JAVA_HOME=$JDK_DIR/jdk
export JRE_HOME=\\$JAVA_HOME/jre
export CLASSPATH=\\$JAVA_HOME/lib/:\\$JRE_HOME/lib/
export PATH=\\$PATH:\\$JAVA_HOME/bin
EOF
source /etc/profile.d/jdk.sh
java -version && color 0 "JDK 安装完成" || color 1 "JDK 安装失败" ; exit;
sleep 3
source /etc/profile.d/jdk.sh
yum install -y rsync
echo $HADOOPnode1 hadoop1 >> /etc/hosts
echo $HADOOPnode2 hadoop2 >> /etc/hosts
install_hadoop2 ()
hostnamectl set-hostname hadoop2
if ! [ -f "$DIR/$HADOOP_FILE" ] ;then
color 1 "$HADOOP_FILE 文件不存在"
wget https://archive.apache.org/dist/hadoop/common/hadoop-2.7.7/hadoop-2.7.7.tar.gz
exit;
elif [ -d $HADOOP_DIR/hadoop ] ;then
color 1 "Hadoop 已经安装"
exit;
else
[ -d "$HADOOP_DIR" ] || mkdir -pv $HADOOP_DIR
fi
tar -xvf $DIR/$HADOOP_FILE -C $HADOOP_DIR
cd $HADOOP_DIR && ln -sv hadoop* hadoop
cat > /etc/profile.d/hadoop.sh << EOF
export HADOOP_HOME=$HADOOP_DIR/hadoop
export PATH=\\$PATH:\\$HADOOP_HOME/bin
export PATH=\\$PATH:\\$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=\\$HADOOP_HOME/etc/hadoop
EOF
source /etc/profile.d/hadoop.sh
hadoop version && color 0 "Hadoop2 安装完成" || color 1 "Hadoop 安装失败" ; exit;
sleep 3
source /etc/profile.d/hadoop.sh
install_hadoop1 ()
hostnamectl set-hostname hadoop2
if ! [ -f "$DIR/$HADOOP_FILE" ] ;then
color 1 "$HADOOP_FILE 文件不存在"
wget https://archive.apache.org/dist/hadoop/common/hadoop-2.7.7/hadoop-2.7.7.tar.gz
exit;
elif [ -d $HADOOP_DIR/hadoop ] ;then
color 1 "Hadoop 已经安装"
exit;
else
[ -d "$HADOOP_DIR" ] || mkdir -pv $HADOOP_DIR
fi
tar -xvf $DIR/$HADOOP_FILE -C $HADOOP_DIR
cd $HADOOP_DIR && ln -sv hadoop* hadoop
cat > /etc/profile.d/hadoop.sh << EOF
export HADOOP_HOME=$HADOOP_DIR/hadoop
export PATH=\\$PATH:\\$HADOOP_HOME/bin
export PATH=\\$PATH:\\$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=\\$HADOOP_HOME/etc/hadoop
EOF
source /etc/profile.d/hadoop.sh
hadoop version && color 0 "Hadoop1 安装完成" || color 1 "Hadoop 安装失败" ; exit;
sleep 3
source /etc/profile.d/hadoop.sh
#配置hadoop环境脚本文件中的JAVA_HOME参数
mkdir $HADOOP_DIR/hadoop/tmp
cd $HADOOP_DIR/hadoop/etc/hadoop/
sed -i 's/export JAVA_HOME=$JAVA_HOME/export JAVA_HOME="\\/usr\\/local\\/jdk"/' $HADOOP_DIR/hadoop/etc/hadoop/hadoop-env.sh
echo export JAVA_HOME="/usr/local/jdk" >> $HADOOP_DIR/hadoop/etc/hadoop/mapred-env.sh
echo export JAVA_HOME="/usr/local/jdk" >> $HADOOP_DIR/hadoop/etc/hadoop/yarn-env.sh
#修改hadoop配置文件
#core-site.xml
cat > $HADOOP_DIR/hadoop/etc/hadoop/core-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 设置默认使用的文件系统 Hadoop支持file、HDFS、GFS、ali|Amazon云等文件系统 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://$HADOOPnode1:8020</value>
</property>
<property>
<!-- 保存临时文件目录,需先在/opt/hadoop-2.7.7下创建tmp目录 -->
<name>hadoop.tmp.dir</name>
<value>$HADOOP_DIR/hadoop/tmp</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
</configuration>
EOF
#hdfs-site.xml hdfs文件系统模块设置
cat > $HADOOP_DIR/hadoop/etc/hadoop/hdfs-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 设置SNN进程运行机器位置信息 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>$HADOOPnode2:9868</value>
</property>
<!-- 必须将dfs.webhdfs.enabled属性设置为true,否则就不能使用webhdfs的LISTSTATUS、LISTFILESTATUS等需要列出文件、文件夹状态的命令,因为这些信息都是由namenode来保存的。 -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
EOF
#mapred-site.xml #MapReduce模块设置
cat > $HADOOP_DIR/hadoop/etc/hadoop/mapred-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- MR程序历史服务地址 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>$HADOOPnode2:10020</value>
</property>
<!-- MR程序历史服务web端地址 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>$HADOOPnode2:19888</value>
</property>
</configuration>
EOF
#yarn-site.xml
cat > $HADOOP_DIR/hadoop/etc/hadoop/yarn-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 设置YARN集群主角色运行集群位置 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>$HADOOPnode1</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>false</value>
</property>
</configuration>
EOF
#在etc/hadoop/slaves增加子节点,使用IP地址即可
cd $HADOOP_DIR/hadoop/etc/hadoop/
cat > $HADOOP_DIR/hadoop/etc/hadoop/slaves << EOF
$HADOOPnode1
$HADOOPnode2
EOF
#依次分发配置文件到子节点
rsync -av $HADOOP_DIR/hadoop/etc/hadoop/ $HADOOPnode2:$HADOOP_DIR/hadoop/etc/hadoop/
sleep 5
#格式化NameNode
cd $HADOOP_DIR/hadoop/
./bin/hdfs namenode -format
#启动HDFS
$HADOOP_DIR/hadoop/sbin/start-dfs.sh
#启动YARN
$HADOOP_DIR/hadoop/sbin/start-yarn.sh
jps
sleep 3
install_Hive ()
if ! [ -f "$DIR/$HIVE_FILE" ] ;then
color 1 "$HIVE_FILE 文件不存在"
wget https://mirror.tuna.tsinghua.edu.cn/apache/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz --no-check-certificate
exit;
elif [ -d $HIVE_DIR/hive ] ;then
color 1 "Hive 已经安装"
exit;
else
[ -d "$HIVE_DIR" ] || mkdir -pv $HIVE_DIR
fi
tar -xvf $DIR/$HIVE_FILE -C $HIVE_DIR
cd $HIVE_DIR && ln -sv apache-hive* hive
#下载mysql jar包,将jar包放在hive lib目录下
cd $HIVE_DIR/hive/lib/
wget https://mirror.tuna.tsinghua.edu.cn/mysql/downloads/Connector-J/mysql-connector-java-8.0.26.tar.gz --no-check-certificate
tar -xvf mysql-connector-java-8.0.26.tar.gz
mv mysql-connector-java-8.0.26/mysql-connector-java-8.0.26.jar .
rm -rf mysql-connector-java-8.0.26 mysql-connector-java-8.0.26.tar.gz
#升级hive中的guava依赖的hadoop版本
cd $HIVE_DIR/hive/lib/
mv guava*.jar guava.jar-bak
cd $HADOOP_DIR/hadoop/share/hadoop/hdfs/lib/
ln -sv $HADOOP_DIR/hadoop/share/hadoop/hdfs/lib/guava-*.jar $HIVE_DIR/hive/lib/
cat > /etc/profile.d/hive.sh << EOF
export HIVE_HOME=$HIVE_DIR/hive
export PATH=\\$PATH:\\$HIVE_HOME/bin
EOF
source /etc/profile.d/hive.sh
sleep 1
source /etc/profile.d/hive.sh
color 0 "Hive 安装完成"
#关闭hadoop安全模式
hdfs dfsadmin -safemode leave
#创建hive数仓目录
hadoop fs -mkdir -p $HIVE_DIR/hive/warehouse
hadoop fs -mkdir -p $HIVE_DIR/hive/tmp
#授权
hadoop fs -chmod 777 $HIVE_DIR/hive/warehouse
hadoop fs -chmod 777 $HIVE_DIR/hive/tmp
#检查这两个目录是否成功创建
hadoop fs -ls $HIVE_DIR/hive/
#配置hive环境
cd $HIVE_DIR/hive/conf/
cp hive-log4j2.properties.template hive-log4j2.properties
sed -i 's/property.hive.log.dir = $sys:java.io.tmpdir\\/$sys:user.name/property.hive.log.dir = $HIVE_DIR\\/hive\\/log/' hive-log4j2.properties
cp hive-exec-log4j2.properties.template hive-exec-log4j2.properties
sed -i 's/property.hive.log.dir = $sys:java.io.tmpdir\\/$sys:user.name/property.hive.log.dir =$HIVE_DIR\\/hive\\/exelog/' hive-exec-log4j2.properties
#配置hive-site文件
cd $HIVE_DIR/hive/conf
cat > $HIVE_DIR/hive/conf/hive-site.xml << EOF
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!--Hive用来存储不同阶段的MR作业的执行计划的目录,同时也存储中间输出结果 -->
<property>
<name>hive.exec.scratchdir</name>
<value>$HIVE_DIR/hive/tmp</value>
</property>
<!--Hive用来存储不同阶段的MR作业的执行计划的目录,创建写权限 -->
<property>
<name>hive.scratch.dir.permission</name>
<value>733</value>
</property>
<!--Default数据仓库原始位置是在hdfs上:/user/hive/warehouse路径下 -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>$HIVE_DIR/hive/warehouse</value>
</property>
<!--连接hive元数据数据库地址,名称 -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://$MYSQLip:3306/hive?createDatabaseIfNotExist=true</value>
</property>
<!--连接数据库驱动 -->
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
</property>
<!--连接数据库用户名称 -->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
</property>
<!--连接数据库用户密码 -->
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>$HIVEmima</value>
</property>
<!--客户端显示当前查询表的头信息 -->
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<!--客户端显示当前数据库名称信息 -->
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.sasl.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.server2.authenication</name>
<value>NONE</value>
</property>
</configuration>
EOF
#启动前先检查hadoop是否已经启动
#初始化Metastore元信息Schema
$HIVE_DIR/hive/bin/schematool -initSchema -dbType mysql
#后台启动hiveserver2
mkdir $HIVE_DIR/hive/log
nohup $HIVE_DIR/hive/bin/hiveserver2 >> $HIVE_DIR/hive/log/nohup.hs2.log 2>&1 &
#后台启动metastore
nohup hive --service metastore >> $HIVE_DIR/hive/log/metastore.log 2>&1 &
#查看hive的两个进程
ps -ef |grep hive
stop_hadoop ()
$HADOOP_DIR/hadoop/sbin/stop-dfs.sh
$HADOOP_DIR/hadoop/sbin/stop-yarn.sh
start_hadoop ()
$HADOOP_DIR/hadoop/sbin/start-dfs.sh
$HADOOP_DIR/hadoop/sbin/start-yarn.sh
start_hive ()
nohup $HIVE_DIR/hive/bin/hiveserver2 >> $HIVE_DIR/hive/log/nohup.hs2.log 2>&1 &
nohup hive --service metastore >> $HIVE_DIR/hive/log/metastore.log 2>&1 &
#hadoop2上执行,把下面注释打开
#initialize
#install_jdk
#install_hadoop2
#hadoop1上执行,把下面注释打开
#initialize
#install_jdk
#install_hadoop1
#install_Hive
#启动、关停的命令
#hadoop
#stop_hadoop
#start_hadoop
#hive
#start_hive
以上是关于Hadoop双节点&Hive搭建自动化脚本的主要内容,如果未能解决你的问题,请参考以下文章
Hadoop Zookeeper Hbase Hive 分布式集群搭建实例