在spark1上操作
1. 安装Hadoop
$ cd /usr/local
$ tar -zxvf hadoop-2.4.1.tar.gz
$ mv hadoop-2.4.1 hadoop
$ vi ~/.bashrc
export HADOOP_HOME=/usr/local/hadoop export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
$ source .bashrc
2. Hadoop配置
$ cd /usr/local/hadoop/etc/hadoop
$ vi core-site.xml
<property> <name>fs.default.name</name> <value>hdfs://spark1:9000</value> </property>
$ mkdir /usr/local/data/
$ vi hdfs-site.xml
<property> <name>dfs.name.dir</name> <value>/usr/local/data/namenode</value> </property> <property> <name>dfs.data.dir</name> <value>/usr/local/data/datanode</value> </property> <property> <name>dfs.tmp.dir</name> <value>/usr/local/data/tmp</value> </property> <property> <name>dfs.replication</name> <value>3</value> </property>
$ vi mapred-site.xml.template
<property> <name>mapreduce.framework.name</name> <value>yarn</value> </property>
$ vi yarn-site.xml
<property> <name>yarn.resourcemanager.hostname</name> <value>spark1</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property>
$ vi slaves
spark1 spark2 spark3
$ scp -r hadoop [email protected]:/usr/local/
$ scp ~/.bashrc [email protected]:~/
在spark2和spark3的 /usr/local/ 目录下创建 data 目录, 以及source .bashrc
$ mkdir /usr/local/data
$ source .bashrc
在spark1上执行
# 格式化namenode $ hdfs namenode -format # 启动集群 $ start-dfs.sh # 查看启动情况 $ jps spark1 1424 SecondaryNameNode 1324 DataNode 1218 NameNode 1554 Jps spark2 1619 Jps 1555 DataNode spark3 1473 DataNode 1537 Jps
# 在浏览器打开 http://spark1:50070/
# 启动yarn集群
$ start-yarn.sh
$ jps
spark1
ResourceManager
NodeManager
spark2
NodeManager
spark3
NodeManager
# 在浏览器打开 http://spark1:8088/