大数据平台搭建:Hadoop-3.1.3+Hive-3.1.2+HBase-2.2.3+Zookeeper-3.5.7+Kafka_2.11-2.4.0+Spark-2.4.5
Posted 大数据小码农
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了大数据平台搭建:Hadoop-3.1.3+Hive-3.1.2+HBase-2.2.3+Zookeeper-3.5.7+Kafka_2.11-2.4.0+Spark-2.4.5相关的知识,希望对你有一定的参考价值。
1.框架选型
hadoop-3.1.3
hive-3.1.2
zookeeper-3.5.7
hbase-2.2.3
kafka_2.11-2.4.0
spark-2.4.5-bin-hadoop2.7
2.安装前准备
1.关闭防火墙
2.安装 JDK
3.安装 Scala
4.配置ssh 免密
5.配置 IP 和 主机名映射
6.mysql 安装
3.安装
3.1 Hadoop 安装
1.hadoop-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_121
2.hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<!-- 指定Hadoop辅助名称节点主机配置 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop102:50090</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/module/hadoop-3.1.3/data/dfs/nn</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/module/hadoop-3.1.3/data/dfs/dn</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
3.yarn-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_121
4.yarn-site.xml
<!-- reducer获取数据的方式 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 指定YARN的ResourceManager的地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop103</value>
</property>
<!-- 日志聚集功能使能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 日志保留时间设置3天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>259200</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>5</value>
</property>
5.mapred-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_121
6.mapred-site.xml
<!-- 指定mr运行在yarn上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--配置历史服务器-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop103:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop103:19888</value>
</property>
<!-- 在hadoop3.x中需要执行mapreduce的运行环境 -->
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>1024</value>
</property>
7.slaves
hadoop101
hadoop102
hadoop103
8.配置环境变量 /etc/profile
#Java
export JAVA_HOME=/opt/module/jdk1.8.0_121
export PATH=$PATH:$JAVA_HOME/bin
#Scala
export SCALA_HOME=/opt/module/scala-2.11.12
export PATH=$PATH:$SCALA_HOME/bin
#Hadoop
export HADOOP_HOME=/opt/module/hadoop-3.1.3
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
3.2 Hive安装
1.hive-env.sh
HADOOP_HOME=/opt/module/hadoop-3.1.3
export HIVE_CONF_DIR=/opt/module/hive-3.1.2/conf
export HIVE_AUX_JARS_PATH=/opt/module/hive-3.1.2/auxlib
2.hive-site.xml
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://hadoop101:3306/metastore?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://hadoop101:9083</value>
</property>
<property>
<name>hive.server2.webui.host</name>
<value>hadoop101</value>
</property>
<property>
<name>hive.server2.webui.port</name>
<value>10002</value>
</property>
<!--Hive 3.x 默认打开了ACID,Spark不支持读取 ACID 的 Hive,需要关闭ACID-->
<property>
<name>hive.strict.managed.tables</name>
<value>false</value>
</property>
<property>
<name>hive.create.as.insert.only</name>
<value>false</value>
</property>
<property>
<name>metastore.create.as.acid</name>
<value>false</value>
</property>
<!--关闭版本验证-->
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
3.创建 HIVE_AUX_JARS_PATH
mkdir -p /opt/module/hive-3.1.2/auxlib
4.拷贝 mysql-connector-java-5.1.27-bin.jar 到 /opt/module/hive-3.1.2/lib 下
cp /opt/software/mysql-libs/mysql-connector-java-5.1.27/mysql-connector-java-5.1.27-bin.jar /opt/module/hive-3.1.2/lib
5. 配置环境变量
#HIVE_HOME
export HIVE_HOME=/opt/module/hive-3.1.2
export PATH=$PATH:$HIVE_HOME/bin
6. 第一次执行,进行初始化
schematool -dbType mysql -initSchema
3.3 Zookeeper 安装
1.zoo.cfg
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/opt/module/zookeeper-3.5.7/zkData
clientPort=2181
# 3台机器
server.1=hadoop101:2888:3888
server.2=hadoop102:2888:3888
server.3=hadoop103:2888:3888
2.创建 Zookeeper 数据目录
mkdir -p /opt/module/zookeeper-3.5.7/zkData
3.在 /opt/module/zookeeper-3.5.7/zkData 下创建myid,标识当前主机
echo "1" > /opt/module/zookeeper-3.5.7/zkData/myid
4.配置环境变量 /etc/profile
#Zookeeper
export ZOOKEEPER_HOME=/opt/module/zookeeper-3.5.7
export PATH=$PATH:$ZOOKEEPER_HOME/bin
5.分发zookeeper;注意:每台zookeeper节点的 myid 必须唯一
3.4 HBase 安装
1.hbase-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_121
#是否使用Hbase内置的Zookeeper.改成false,使用我们以前配置的Zookeeper
export HBASE_MANAGES_ZK=false
2.hbase-site.xml
<!--hbase在hdfs上存储数据时的目录-->
<property>
<name>hbase.rootdir</name>
<value>hdfs://hadoop101:9000/hbase</value>
</property>
<!--是否开启集群-->
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.tmp.dir</name>
<value>/opt/module/hbase-2.2.3/tmp</value>
</property>
<!--配置Zookeeper-->
<property>
<name>hbase.zookeeper.quorum</name>
<value>hadoop101,hadoop102,hadoop103</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<!--Zookeeper的dataDir目录-->
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/opt/module/zookeeper-3.5.7/zkData</value>
</property>
<property>
<name>zookeeper.znode.parent</name>
<value>/hbase</value>
</property>
<property>
<name>hbase.unsafe.stream.capability.enforce</name>
<value>false</value>
</property>
3.regionservers
hadoop101
hadoop102
hadoop102
4.配置环境变量 /etc/profile
#HBase
export HBASE_HOME=/opt/module/hbase-2.2.3
export PATH=$PATH:$HBASE_HOME/bin
3.5 Kafka 安装
1.server.properties
broker.id=0
log.dirs=/opt/module/kafka_2.11-2.4.0/logs
zookeeper.connect=hadoop101:2181,hadoop102:2181,hadoop103:2181/kafka
2.分发 kafka 到其他kafka节点;注意 server.properties中的 broker.id 必须全局唯一
3.配置环境变量 /etc/profile
#KAFKA_HOME
export KAFKA_HOME=/opt/module/kafka_2.11-2.4.0
export PATH=$PATH:$KAFKA_HOME/bin
3.6 Spark安装
1.spark-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_121
export SCALA_HOME=/opt/module/scala-2.11.12
export SPARK_MASTER_IP=hadoop101
export HADOOP_CONF_DIR=/opt/module/hadoop-3.1.3/etc/hadoop
#history.retainedApplications=3 //内存中历史副本存1份
export SPARK_HISTORY_OPTS="-Dspark.history.retainedApplications=1 -Dspark.history.fs.logDirectory=hdfs://hadoop101:9000/spark/log/"
2.spark-defalt.conf
spark.eventLog.enabled true
spark.eventLog.dir hdfs://hadoop101:9000/spark/log/
spark.yarn.historyServer.address hadoop102:18080
3.slaves
hadoop101
hadoop102
hadoop103
4.创建 hdfs-site.xml,hdfs-site.xml,hive-site.xml的软连接到 /opt/module/spark-2.4.5-bin-hadoop2.7/conf 下
ln -s /opt/module/hadoop-3.1.3/etc/hadoop/core-site.xml
ln -s /opt/module/hadoop-3.1.3/etc/hadoop/hdfs-site.xml
ln -s /opt/module/hive-3.1.2/conf/hive-site.xml
5. 拷贝 mysql-connector-java-5.1.27-bin.jar 到 /opt/module/spark-2.4.5-bin-hadoop2.7/jars 下
cp /opt/software/mysql-libs/mysql-connector-java-5.1.27/mysql-connector-java-5.1.27-bin.jar /opt/module/spark-2.4.5-bin-hadoop2.7/jars
6.配置环境变量 /etc/profile
#Spark
export SPARK_HOME=/opt/module/spark-2.4.5-bin-hadoop2.7
export PATH=$PATH:$SPARK_HOME/bin
#spark 提示 unable to load native-hadoop library for your platform... using builtin-java classes where applicable
export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native/:$LD_LIBRARY_PATH
4. 总结
本次安装,经测试全部可用!
以上是关于大数据平台搭建:Hadoop-3.1.3+Hive-3.1.2+HBase-2.2.3+Zookeeper-3.5.7+Kafka_2.11-2.4.0+Spark-2.4.5的主要内容,如果未能解决你的问题,请参考以下文章