hadoop集群搭建教程


1. 相關軟件准備:

VMware-workstation-full-15.0.4-12990004.exe  

CentOS-7-x86_64-DVD-1810.iso

jdk-8u231-linux-x64.tar.gz  賬號:郵箱,密碼首字母大寫

hadoop-3.2.1.tar.gz

apache-zookeeper-3.5.6-bin.tar.gz

apache-hive-3.1.2-bin.tar.gz    其他

pyspark-2.4.4.tar.gz

xshell+xftp  xshell破解版  非學生版

2. VMware虛擬機及linux系統安裝及網絡環境配置,參見之前的博客

3. hadoop安裝及配置教程

Hadoop3.2.1版本的環境搭建

(1)查看、打開、關閉防火牆

# CentOS 7默認沒有了iptables文件
cd /etc/sysconfig
ls -l
yum install iptables-services
systemctl enable iptables

  systemctl disable iptables.service

(2)權限不足 :  sudo chmod -R 777 /home/hadoop/apps/hadoop-3.2.1/bin/yarn

 

vim /etc/profile    #也可以vim ~/.bashrc   
export JAVA_HOME="/opt/modules/jdk1.8.0_271"
export PATH=$JAVA_HOME/bin:$PATH
export HADOOP_HOME="/opt/modules/hadoop-2.10.1"
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
source /etc/profile

mkdir $HADOOP_HOME/tmp
mkdir $HADOOP_HOME/hdfs/data
mkdir $HADOOP_HOME/hdfs/name

#etc/hadoop/core-site.xml:
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://localhost:9000</value>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>$HADOOP_HOME/tmp</value>
    </property>
</configuration>


#etc/hadoop/hdfs-site.xml:
<configuration>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>
    <property>
        <name>dfs.name.dir</name>
        <value>$HADOOP_HOME/hdfs/name</value>
    </property>
    <property>
        <name>dfs.data.dir</name>
        <value>$HADOOP_HOME/hdfs/data</value>
    </property>
</configuration>


#etc/hadoop/yarn-site.xml:
<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
</configuration>


#vim $HADOOP_HOME/etc/hadoop/hadoop-env.sh 添加
export JAVA_HOME=/opt/modules/jdk1.8.0_271  #修改
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root


#啟動 - 初次啟動格式化namenode?
bin/hdfs namenode -format
#關閉防火牆
#https://blog.csdn.net/u011170921/article/details/80437937?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-4.control&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-4.control
#./sbin/stop-all.sh
#service iptables stop
#chkconfig iptables off
systemctl disable iptables.service
service iptables status

#關閉 NetworkManager
service  NetworkManager stop
chkconfig NetworkManager off

#./sbin/start-all.sh
sbin/start-dfs.sh
sbin/start-yarn.sh

#檢查 9000 端口是否打開並遠程訪問,一般集群啟動就打開了,如為localhost表示只能本地訪問
netstat -tlpn   #netstat -ap | grep 9000
#檢查是否允許遠程訪問可使用telnet ip port
telnet 192.168.100.10 9000
#沒啟動9000,解決措施
https://www.cnblogs.com/woofwoof/p/10261751.html


http://dblab.xmu.edu.cn/blog/2440-2/
http://dblab.xmu.edu.cn/blog/install-mysql/
#安裝hive元數據庫 - 以mysql為元數據庫
-----------------------------------------------------
#安裝新版mysql前,需將系統自帶的mariadb-lib卸載
#rpm -qa|grep mariadb  #mariadb-libs-5.5.60-1.el7_5.x86_64
#rpm -e --nodeps mariadb-libs-5.5.60-1.el7_5.x86_64
#會自動替換掉這個服務
#sudo tar -zxvf ./apache-hive-3.1.2-bin.tar.gz -C /usr/local   # 解壓到/usr/local中
#cd /usr/local/
#sudo mv apache-hive-3.1.2-bin hive       # 將文件夾名改為hive
#sudo chown -R dblab:dblab hive            # 修改文件權限
wget https://mirrors.tuna.tsinghua.edu.cn/mysql/yum/mysql80-community-el7/mysql80-community-release-el7-3.noarch.rpm
rpm -ivh mysql80-community-release-el7-3.noarch.rpm
yum update mysql80-community-release-el7-3.noarch.rpm
yum install -y mysql-server
#啟動服務
service mysqld start
netstat -tap | grep mysql  #mysql節點處於LISTEN狀態表示啟動成功
systemctl status mysqld.service
mysqladmin --version
grep 'temporary password' /var/log/mysqld.log    #查看root默認密碼  &aF%C#+4NMo/
#修改密碼
vim /etc/my.cnf
[mysqld]
skip-grant-tables
basedir=/var/lib/mysql
datadir=/var/lib/mysql/data
socket=/var/lib/mysql/mysql.sock
character-set-server=utf8
log-error=/var/log/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid
#重啟
systemctl restart mysqld   
show databases;
use mysql;
select user,host from user;
update user set host="%" where user="root";
#update user set authentication_string='' where user='root';   #如果直接改不了就先置空  xZg#jK49sIl.
update mysql.user set authentication_string='l123456' where user='root';
#ALTER USER 'root'@'localhost' IDENTIFIED WITH mysql_native_password BY 'l123456';
flush privileges;
quit;
#將這行注釋
vim /etc/my.cnf
#skip-grant-tables
#systemctl restart mysqld
service mysqld restart

#忘記密碼
ps -ef | grep -i mysql    --查看狀態
service mysqld stop   --關閉
vim /etc/my.cnf   --skip-grant-tables
service mysqld start

mysql -u root -p


#創建hive數據庫及hive用戶
select host, user, plugin from mysql.user;
CREATE DATABASE hive;
USE hive;
CREATE USER 'hive'@'localhost' IDENTIFIED BY 'hive';
CREATE USER 'hive'@'%' IDENTIFIED BY 'hive';
CREATE USER 'root'@'%' IDENTIFIED BY 'iUpoint@123';
#GRANT ALL ON hive.* TO 'hive'@'localhost' IDENTIFIED BY 'hive';  #8.0.19報錯
#GRANT ALL privileges ON hive.* TO 'hive'@'%' IDENTIFIED BY 'hive';
GRANT ALL ON *.* TO 'hive'@'localhost';
GRANT ALL ON *.* TO 'hive'@'%';
FLUSH PRIVILEGES;


#安裝hive
-----------------------------------------------------
wget https://mirrors.tuna.tsinghua.edu.cn/apache/hive/hive-3.1.2/apache-hive-3.1.2-bin.tar.gz
tar -xzvf apache-hive-2.3.7-bin.tar.gz
#ln -s /opt/modules/apache-hive-2.3.7-bin hive  #類似快捷方式
mv apache-hive-2.3.7-bin hive

vim /etc/profile
export HIVE_HOME="/opt/modules/hive"
export path=$HIVE_HOME/bin:$PATH
source /etc/profile

cd hive/conf
cp hive-default.xml.template hive-site.xml
# 將相對路徑${system:java.io.tmpdir{/${system:user.name}改為絕對路徑/home/user_name/hive/tmp/hive
mv hive-default.xml hive-default.xml.template 

#mysql jdbc
wget https://mirrors.tuna.tsinghua.edu.cn/mysql/downloads/Connector-J/mysql-connector-java-8.0.22.tar.gz
tar -zxvf mysql-connector-java-8.0.22.tar.gz
cp mysql-connector-java-8.0.22/mysql-connector-java-8.0.22.jar /opt/modules/hive-2.3.7/lib
#rmdir mysql-connector-java-8.0.22   #刪除空目錄
rm -rf mysql-connector-java-8.0.22/  #遞歸刪除目錄

#hive配置文件
#參考:https://www.jianshu.com/p/02ec73752e1c
#grep -n 'datanucleus.autoStartMechanism' conf/hive-site.xml
#grep -n '<name>.*dir</name>' conf/hive-site.xml
#grep -n '<name>.*Connection.*</name>' conf/hive-site.xml
#vim +529 conf/hive-site.xml   #打開后esc狀態下輸入:n可跳轉到指定行
#添加
  <property>
    <name>system:java.io.tmpdir</name>
    <value>/opt/modules/hive/tmp/</value>
  </property>
  <property>
    <name>system:user.name</name>
    <value>hive</value>
  </property>
#修改
<property>
    <name>javax.jdo.option.ConnectionURL</name>
	<!-- createDatabaseIfNotExist=true;characterEncoding=UTF-8; -->
    <value>jdbc:mysql://localhost:3306/hive?useSSL=false</value>
    <description>
      JDBC connect string for a JDBC metastore.
      To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.
      For example, jdbc:postgresql://myhost/db?ssl=true for postgres database.
    </description>
</property>
 
 <property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <!-- <value>com.mysql.jdbc.Driver</value> -->
	<value>com.mysql.cj.jdbc.Driver</value>
    <description>Driver class name for a JDBC metastore</description>
  </property>
 
  <property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>hive</value>
    <description>Username to use against metastore database</description>
  </property>
 
  <property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>hive</value>
    <description>password to use against metastore database</description>
  </property>
 
  <property>
    <name>hive.metastore.warehouse.dir</name>
    <value>/user/hive/warehouse</value>
    <description>location of default database for the warehouse</description>
  </property>

<property>
   <name>datanucleus.autoStartMechanism</name>
   <value>SchemaTable</value>
</property>

<!-- 當配置為true時,則表示會強制metastore的版本信息與hive jar一致 -->
<property>
   <name>hive.metastore.schema.verification</name>
   <value>false</value>
   <description>  
    Enforce metastore schema version consistency.  
    True: Verify that version information stored in metastore matches with one from Hive jars.  Also disable automatic schema migration attempt. Users are required to manully migrate schema after Hive upgrade which ensures proper metastore schema migration. (Default)  
    False: Warn if the version information stored in metastore doesn't match with one from in Hive jars.  
    </description>  
</property>
<property>
    <name>datanucleus.schema.autoCreateAll</name>
    <value>true</value>
 </property>



#配置hive-env.sh
cd $HIVE_HOME/conf
cp hive-env.sh.template hive-env.sh
vim hive-env.sh

# 相應的目錄換成自己的
# hadoop 目錄
HADOOP_HOME=/opt/modules/hadoop-3.1.4

# hive 配置目錄
export HIVE_CONF_DIR=/opt/modules/hive/conf

# hive 的lib目錄
export HIVE_AUX_JARS_PATH=/opt/modules/hive/lib



#Could not create ServerSocket on address 0.0.0.0/0.0.0.0:9083.
# Jps查看Runjar進程號,並殺掉
jps
kill -9 進程號

#$HIVE_HOME/lib 中guava的版本應與 $HADOOP_HOME/share/hadoop/common/lib 中相同
schematool -initSchema -dbType mysql
hive --service metastore &

#hadoop測試
查看文件權限
ls -l /opt/data
chmod -R 777 /opt/data   #不加-R改變的是該文件的權限


mkdir /opt/data
cd /opt/data
touch text.txt
vim text.txt
Hello World
Hello Hadoop


cd $HADOOP_HOME/share/hadoop/mapreduce
hadoop jar hadoop-mapreduce-examples-2.10.1.jar wordcount /opt/data/tmp/test.txt /opt/data/output


cd /opt/data/output
cat part-r-00000
Hadoop 1
Hello 2
World 1

  

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM