-
環境准備
安裝java
yum install java-1.8.0-openjdk java-1.8.0-openjdk-devel
//所有的機器修改profile文件增加這行代碼
export JAVA_HOME=/usr/lib/jvm/java-openjdk
三台服務器(由於我是學習環境,所以三台服務器不屬於同一內網),配置如下:
公網ip 119.29.186.83 內網ip 10.104.157.113
公網ip 119.29.250.47 內網ip 10.104.9.181
公網ip 119.29.251.99 內網ip 10.104.196.48
軟件包下載:
http://archive.cloudera.com/cdh5/cdh/5/ 下載地址
下載的文件名:
1.hadoop-2.6.0-cdh5.8.3.tar (hadoop的安裝包)
2.hive-1.1.0-cdh5.8.3.tar
3.zookeeper-3.4.5-cdh5.8.3.tar
4.scala-2.11.8.tar
5.kafka_2.11-0.10.2.0.tar
6.slf4j-1.7.9.zip
2.編輯hosts文件,並且配置免密碼登錄
在119.29.186.83的機器
vim /etc/hosts 10.104.157.113 master 119.29.250.47 slave 119.29.251.99 slave1
然后使用scp命令
scp /etc/hosts root@slave:/etc
scp /etc/hosts root@slave1:/etc
由於我這邊的服務器不是同一個內網中(內網中不需要更改),所以要登錄到slave的各台機器,修改hosts中的公網ip和內網ip以對應
slave1中的文件如下
127.0.0.1 localhost localhost.localdomain VM_157_113_centos ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 119.29.186.83 master 10.104.9.181 slave1 119.29.251.99 slave
免密登錄:
ssh-copy-id root@slave
ssh-copy-id root@slave1
關閉防火牆:
systemctl start firewalld firewall-cmd --permanent --zone=public --add-port=1-63000/tcp firewall-cmd --permanent --zone=public --add-port=1-63000/udp firewall-cmd --reload
hadoop安裝
在本機解壓(本機編輯起來比服務器上方便)
- tar -zxvf hadoop-2.6.0-cdh5.8.3.tar.gz
- 然后右鍵重命名為hadoop
- 進入hadoop文件夾中 cd hadoop/etc/hadoop
- 修改core-site.xml
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>fs.defaultFS</name> //文件系統 <value>hdfs://master:9000</value> </property> </configuration>
- 修改hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>dfs.replication</name> //副本數量 <value>2</value> </property> <property> <name>dfs.name.dir</name> <value>/usr/local/data/namenode</value> </property> <property> <name>dfs.data.dir</name> <value>/usr/local/data/datanode</value> </property> <property> <name>dfs.tmp.dir</name> <value>/usr/local/data/tmp</value> </property> </configuration>
- 修改mapred-site.xml
<?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration>
- 修改yarn-site.xml
<?xml version="1.0"?> <configuration> //yarn的主機 <property> <name>yarn.resoucemanager.hostname</name> <value>master</value> </property> //shuffle服務 <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> </configuration>
- 修改slaves文件
slave slave1
-
將hadoop文件壓縮為hadoop.zip
-
分發安裝包,並解壓
scp hadoop.zip root@master:/usr/local scp hadoop.zip root@slave:/usr/local
cd /usr/local
unzip hadoop.zip scp hadoop.zip root@slave1:/usr/local - 登錄到master機器上,解壓hadoop.zip
ssh root@master cd /usr/local unzip hadoop.zip
rm -rf hadoop.zip - 到各個節點中創建/use/local/data目錄
ssh root@slave1 mkdir /usr/local/data ssh root@slave mkdir /usr/local/data ssh root@master mkdir /usr/local/data
- 編輯環境變量
ssh root@master vim ~/.bashrc export JAVA_HOME=/usr/lib/jvm/java-openjd export HADOOP_HOME=/usr/local/hadoop export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin scp ~/.bashrc root@slave1:~/ ssh root@slave source ~/.bashrc scp ~/.bashrc root@slave:~/ ssh root@slave1 source ~/.bashrc
- 格式化namenode
ssh root@master hdfs namenode -format
- 啟動hdfs集群和yarn集群
start-dfs.sh
start-yarn.sh - 訪問master:50070和master:8088可分別查看web端頁面
hive搭建
- 上傳tar文件
scp hive-1.1.0-cdh5.8.3.tar root@master:/usr/local
ssh root@master
cd /usr/local
tar -xvf hive-1.1.0-cdh5.8.3.tar
rm -rf tar -xvf hive-1.1.0-cdh5.8.3.tar
mv hive-1.1.0-cdh5.8.3 hive - 修改~/.bashrc
export JAVA_HOME=/usr/lib/jvm/java-openjdk export HADOOP_HOME=/usr/local/hadoop export HIVE_HOME=/usr/local/hive export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$HIVE_HOME/bin
- 安裝mysql 請移到我的一篇博客 http://blog.csdn.net/qq_30259339/article/details/50466494
- 創建數據庫
mysql> create database if not exists hive_metadata;
- 將mysql連接包拷貝到hive的依賴包中
scp mysql-connector-java-5.1.6.jar root@master:/usr/local/hive/lib
- 修改配置文件hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>hive.metastore.schema.verification</name> <value>false</value> <description> Enforce metastore schema version consistency. True: Verify that version information stored in is compatible with one from Hive jars. Also disable automatic schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures proper metastore schema migration. (Default) False: Warn if the version information stored in metastore doesn't match with one from in Hive jars. </description> </property> //mysql服務器地址 <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://slave1:3306/hive_metadata?createDatebaseIfNotExist=true</value> <description>jdbc</description> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> <description>driver class</description> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>admin</value> <description>jdbc</description> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>admin</value> <description>jdbc</description> </property> </configuration>
scp hive-site.xml root@master:/usr/local/hive/conf
- 修改hive-config.sh 並重名hive-env.sh
mv hive-env.sh.template hive-env.sh vim /usr/local/hive/bin/hive-config.sh export HIVE_CONF_DIR=$HIVE_CONF_DIR export HIVE_AUX_JARS_PATH=$HIVE_AUX_JARS_PATH export HIVE_HOME=/usr/local/hive export HADOOP_HOME=/usr/local/hadoop # Default to use 256MB export JAVA_HOME=/usr/lib/jvm/java-openjdk
命令行輸入hive,就直接進入了hive
ZOOKERPER集群搭建
- 在本地解壓zookeeper-3.4.5-cdh5.8.3.tar
- 然后重命名為zk文件夾
- 進入conf文件夾,重名文件 mv zoo_sample.cfg zoo.cfg
tickTime=2000 initLimit=10 syncLimit=5 //修改dataDir dataDir=/usr/local/zk/data clientPort=2181 //下面為新增內容 server.0=master:2888:3888 server.1=slave:2888:3888 server.2=slave1:2888:3888
- 壓縮zk文件夾,並上傳到服務器
scp zk.zip root@master:/usr/local scp zk.zip root@slave:/usr/local scp zk.zip root@slave1:/usr/local
然后各自解壓
- 到各個服務器上建立相應目錄
ssh root@master cd /usr/localzk mkdir data vim myid 0 ssh root@slave cd /usr/localzk mkdir data vim myid 1 ssh root@slave1 cd /usr/localzk mkdir data vim myid 2
- 配置環境變量
vim ~/.bashrc export JAVA_HOME=/usr/lib/jvm/java-openjdk export HADOOP_HOME=/usr/local/hadoop export HIVE_HOME=/usr/local/hive export ZOOKEEPER_HOME=/usr/local/zk export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin source ~/.bashrc scp ~/.bashrc root@slave:~/ scp ~/.bashrc root@slave1:~/
- 啟動zookper集群
//分別在三台機器上執行 zkServer.sh start //檢查zooker狀態 zkServer.sh status
//jps檢查 是否有QuromPeerMain
kafka集群搭建
- scala安裝
http://www.scala-lang.org/download/2.11.8.html網站下載scala2.11.8.tgz
scp scala-2.11.8.tgz root@master:/usr/localscp scala-2.11.8.tgz root@slave:/usr/local
scp scala-2.11.8.tgz root@slave1:/usr/local
//到各個服務器上進行一下操作,這里以master以例子
ssh root@master
tar -zxvf scala-2.11.8.tgz
mv scala-2.11.8 scala編輯環境變量
export JAVA_HOME=/usr/lib/jvm/java-openjdk export HADOOP_HOME=/usr/local/hadoop export HIVE_HOME=/usr/local/hive export ZOOKEEPER_HOME=/usr/local/zk export SCALA_HOME=/usr/local/scala export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin:$SCALA_HOME/bin
分發文件
scp ~/.bashrc root@slave:~/ scp ~/.bashrc root@slave1:~/
-
kafka搭建
首先在本地解壓kafka_2.11-0.10.2.0.tar
tar -xvf kafka_2.11-0.10.2.0.tar
mv kafka_2.11-0.10.2.0.tar kafka配置kafka,修改config目錄中的server.properties文件
#將zookeeper配置為我們的zookeeper集群 zookeeper.connect=master:2181,slave:2181,slave1:2181
//在集群中 各台機器的broker.id配置為遞增的,這是master的配置文件,slave可為broker.id=1 broker.id=0
安裝slf4j
unzip slf4j-1.7.9.zip
並把slf4j-nop-1.7.9.jar復制到kafka的libs目錄下壓縮kafka,並上傳到各個服務器上
kafka.zip root@master:/usr/local scp kafka.zip root@slave:/usr/local scp kafka.zip root@slave1:/usr/local //各自unzip unzip kafka.zip
修改config目錄中的server.properties文件的broker.id
# The id of the broker. This must be set to a unique integer for each broker. broker.id=1 # The id of the broker. This must be set to a unique integer for each broker. broker.id=2
解決kafka Unrecognized VM option 'UseCompressOops問題
cd kafka vim bin/kafka-run-class.sh 去掉 -XX:+UseCompressedOops
啟動,在三條機器上分別執行以下命令(cd 到kafka目錄中)nohup bin/kafka-server-start.sh config/server.properties &
cat nohup.out 可以查看是否啟動success