大數據環境安裝部署步驟


 

系統環境搭建:

 
         

#2.復制出來的虛擬機配置ip方法
#1.刪除mac地址
vi /etc/sysconfig/network-scripts/ifcfg-eth0
#HWADDR="00:0C:29:64:6A:F9"
#2.刪除網卡和mac地址綁定的文件
rm -rf /etc/udev/rules.d/70-persistent-net.rules
#3.重啟系統
reboot


系統環境初始配置:
1.配置IP vi /etc/sysconfig/network-scripts/ifcfg-eth0 DEVICE="eth0" BOOTPROTO="none" HWADDR="00:0C:29:54:E0:EF" IPV6INIT="yes" NM_CONTROLLED="yes" ONBOOT="yes" TYPE="Ethernet" IPADDR=192.168.0.18 NETMASK=255.255.255.0 GATEWAY=192.168.0.1 UUID="3a66fd1d-d033-411b-908d-7386421a281b" 2.配置hosts vi /etc/hosts 2.命名節點名稱 vi /etc/sysconfig/network 4.禁用防火牆 service iptables stop chkconfig iptables off 5.配置root用戶ssh免密登錄 su - root cd ~ #對每個節點分別產生公鑰和私鑰: cd ~/.ssh ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa #將公鑰文件復制保存為authorized_keys cat id_dsa.pub >> authorized_keys 6.修改vim cd ~ vi .bashrc #添加如下內容: alias vi='vim' #執行命令 source .bashrc

環境變量配置【預先配置好】

6.先把所有環境變量配好
    cd /usr/local/
    rm -rf *
    vi /etc/profile
#添加如下內容最后執行source /etc/profile命令:
#配置java環境變量
export JAVA_HOME=/usr/local/jdk
export JAVA_BIN=$JAVA_HOME/bin
export JAVA_LIB=$JAVA_HOME/lib
export CLASSPATH=.:$JAVA_LIB/tools.jar:$JAVA_LIB/dt.jar

#配置Python環境變量
export PYTHON_HOME=/usr/local/python2.7


#配置HADOOP環境變量
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"

#配置HIVE環境變量
export HIVE_HOME=/usr/local/hive

#配置zookeeper環境變量
export ZOOKEEPER_HOME=/usr/local/zookeeper

#配置hbase的環境變量
export HBASE_HOME=/usr/local/hbase

#export PATH=.:$HBASE_HOME/bin:$ZOOKEEPER_HOME/bin:$HIVE_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_BIN:$PATH
export PATH=.:$HBASE_HOME/bin:$ZOOKEEPER_HOME/bin:$HIVE_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_BIN:$PYTHON_HOME/bin:$PATH


#設置時區
TZ='Asia/Shanghai'
export TZ

    source /etc/profile
    

Python安裝升級:

cd /software
    cp Python-2.7.13.tgz /usr/local/
    cp get-pip.py /usr/local/
    cd /usr/local/
    tar -zxvf Python-2.7.13.tgz
    cd /usr/local/Python-2.7.13
    mkdir /usr/local/python2.7
    ./configure --prefix=/usr/local/python2.7
    make && make install
    mv /usr/bin/python /usr/bin/python_old
    ln -s /usr/local/python2.7/bin/python2.7 /usr/bin/python
    #驗證
    python -V
    cd /usr/bin/
    vi yum 
    #修改第一行為:
    #!/usr/bin/python_old
    
    cd /software
    cp pip-9.0.1.tar.gz /usr/local
    cd /usr/local 
    tar -zxvf pip-9.0.1.tar.gz
yum install gcc libffi-devel python-devel openssl-devel -y
yum install openssl-devel yum install ncurses-devel yum install sqlite-devel yum install zlib-devel yum install bzip2-devel yum install python-devel yum -y install zlib* yum install zlibc zlib1g-dev yum install zlib yum install zlib-devel cd /usr/local wget http://pypi.python.org/packages/source/s/setuptools/setuptools-2.0.tar.gz --no-check-certificate tar zxvf setuptools-2.0.tar.gz cd setuptools-2.0 python setup.py build python setup.py install python get-pip.py pip install package numpy:數據處理; matplotlib:數據可視化; pandas:數據分析; pyserial:串口通信(注:利用pip安裝的pyserial是版本3的,而在WinXP上只能使用pyserial2版本); cx_freeze:將py打包成exe文件; pip install pyquery pip install beautifulsoup pip install numpy pip install matplotlib pip install pandas:數據分析; pip install pandas pip install cx_freeze pip install lrd pip install pyserial pip install nltk pip install mlpy pip install Pygame pip install Sh pip install Peewee pip install Gooey pip install pillow pip install xlrd pip install lxml pip install configparser pip install uuid pip install msgpack-python pip install psutil pip installMySQL-Python pip install MySQL-Python pip install pymongo pip install cxOracle pip install Arrow pip install when.py pip install PIL pip install Pyquery pip install virtualenv

jdk安裝

#-------------------------jdk安裝-----------------------------
1.將jdk安裝包jdk-8u51-linux-x64.gz上傳至/usr/local/目錄下
    cd /software
    cp jdk-8u51-linux-x64.gz /usr/local
    cd /usr/local
2.解壓jdk安裝包
    tar  -zxvf  jdk-8u51-linux-x64.gz
    #重命名安裝包
    mv jdk1.8.0_51 jdk
3.配置環境變量[前面已經配置好]
    vi /etc/profile
    export JAVA_HOME=/usr/local/jdk
    export JAVA_BIN=$JAVA_HOME/bin
    export JAVA_LIB=$JAVA_HOME/lib
    export CLASSPATH=.:$JAVA_LIB/tools.jar:$JAVA_LIB/dt.jar
    #最后將所有的路徑加到
    export PATH=.:$JAVA_BIN:$PATH
    #使環境變量生效
    source /etc/profile
4.驗證jdk安裝是否成功
    java -version
    

mysql安裝

#-----------------------安裝mysql------------------------
1.上傳mysql安裝包到/usr/local目錄下
    cd /software
    cp MySQL-server-5.6.22-1.el6.x86_64.rpm MySQL-client-5.6.22-1.el6.x86_64.rpm /usr/local/
    cd /usr/local
2.卸載依賴包
    #查找安裝了的mysql
    rpm -qa | grep mysql
    #如果有,則執行命令卸載
    rpm -e  mysql-libs-5.1.71-1.el6.x86_64  --nodeps
2.安裝mysql
    rpm -ivh MySQL-client-5.6.22-1.el6.x86_64.rpm  --nodeps
    rpm -ivh MySQL-server-5.6.22-1.el6.x86_64.rpm  --nodeps
3.啟動mysql服務
    service  mysql start
4.查看root賬號密碼並登陸
    cat /root/.mysql_secret   #CQslM7ZtrjTbwiFv
    #登錄mysql
    mysql -uroot -p密碼
    #設置密碼
    mysql> SET PASSWORD = PASSWORD('root');
    #測試新密碼登錄
    mysql -uroot -proot 
5設置允許遠程登錄
    GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY 'root' WITH GRANT OPTION;
    GRANT ALL PRIVILEGES ON *.* TO 'root'@'hadoop' IDENTIFIED BY 'root' WITH GRANT OPTION;
    flush privileges;
    exit;
6.設置開機自動啟動
    chkconfig mysql on

安裝hadoop

#------------------------hadoop安裝--------------------------
1.上傳hadoop安裝包到/usr/local目錄下
    cd /software
    cp hadoop-2.6.0.tar.gz hadoop-native-64-2.6.0.tar /usr/local
    cd /usr/local
2.解壓hadoop安裝包
    tar -xzvf  hadoop-2.6.0.tar.gz
    #重命令hadoop
    mv hadoop-2.6.0 hadoop 
3.設置hadoop環境變量
    vi /etc/profile
    export HADOOP_HOME=/usr/local/hadoop
    #修改:
    export PATH=.:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_BIN:$PATH 
4.配置hadoop的參數
#4.1 修改hadoop-env.sh文件
    #添加java_home的環境變量
    cd /usr/local/hadoop/etc/hadoop
    vi hadoop-env.sh
    JAVA_HOME=/usr/local/jdk
#4.2 配置core-site.xml
    cd /usr/local/hadoop/etc/hadoop
    vi core-site.xml
<configuration>
    <property>
        <name>fs.default.name</name>
        <value>hdfs://hadoop:9000</value>
        <description>change your own hostname</description>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/usr/local/hadoop/tmp</value>
    </property>  
</configuration>
  
#4.3 配置hdfs-site.xml  
  <configuration>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>
    <property>
        <name>dfs.permissions</name>
        <value>false</value>
    </property>
  </configuration>
  
#4.4.配置mapred-site.xml
  <configuration>
    <property>
        <name>mapred.job.tracker</name>
        <value>hadoop:9001</value>
        <description>change your own hostname</description>
    </property>
</configuration>

9.64位系統錯誤問題處理
##安裝Hadoop啟動之后總有警告:
##Unable to load native-hadoop library for your platform... using builtin-Javaclasses where applicable
##這是因為在64位的linux系統中運行hadoop不兼容。
##這時候將准備好的64位的lib包解壓到已經安裝好的hadoop的lib目錄下
#注意:是lib目錄,而不是lib下的native目錄
##執行如下命令:
#tar -x hadoop-native-64-2.4.0.tar -C  hadoop/lib/
    cd /usr/local
    cp hadoop-native-64-2.6.0.tar hadoop/lib
    cd /usr/local/hadoop/lib
    tar -xvf  hadoop-native-64-2.6.0.tar
#然后在環境變量中添加如下內容:
    vi /etc/profile
    export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
    export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
#最后使環境變量生效
    source  /etc/profile

5.對hadoop進行格式化
    hadoop namenode -format
6.啟動hadoop
    start-all.sh
7.驗證hadoop是否安裝成功:
 輸入命令:jps 
 #發現有五個java進程:
    DataNode
    NameNode
    SecondaryNameNode
    JobTracker
    TaskTracker
 #通過瀏覽器查看:
    HDFS:
        hadoop:50070 
    MapReduce:
        hadoop:50030 
 
8.修改windows下的文件,即可在本地電腦查看:
    C:\Windows\System32\drivers\etc\hosts

10.錯誤處理辦法
    如果在windows中頁面不能成功,有肯能
    NameNode進程啟動沒有成功?
    1.沒有格式化
    2.配置文件
    3.hostname沒有與ip綁定
    4.SSH的免密碼登錄沒有配置成功
    #多次格式化也是錯誤的
    方法:刪除/usr/local/hadoop/tmp文件夾,重新格式化

11.執行hdfs dfs 命令出現如下警告
  WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

   修改etc/hadoop/log4j.properties文件,在末尾添加:

    log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR

 

 

 

hive安裝

1.上傳hive安裝包到/usr/local目錄下
    cd /usr/local
2.解壓hive安裝包
    tar -zxvf hive-0.9.0.tar.gz
    mv hive-0.9.0 hive
3.配置hive環境變量
    vi /etc/profile
    export HIVE_HOME=/usr/local/hive
    export PATH=.:$HIVE_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_BIN:$PATH 
    source /etc/profile
4.上傳驅動到/usr/local目錄下並添加驅動到hive的lib目錄下
    cd /usr/local
    cp mysql-connector-java-5.1.39-bin.jar  /usr/local/hive/lib/
5.hive安裝參數配置
#修改hive-env.sh文件,添加hadoop的環境變量
    cd /usr/local/hive/conf
    cp hive-env.sh.template  hive-env.sh
    vi hive-env.sh
    export HADOOP_HOME=/usr/local/hadoop
#修改hive-log4j.properties文件
    cd /usr/local/hive/conf
    cp hive-log4j.properties.template hive-log4j.properties
    vi hive-log4j.properties
#log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
    log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter

#修改hive-site.xml文件
    cd /usr/local/hive/conf
    cp hive-default.xml.template hive-site.xml
    vi hive-site.xml
#添加如下內容:
<property>
  <name>javax.jdo.option.ConnectionURL</name>
  <value>jdbc:mysql://hadoop:3306/hive?createDatabaseIfNotExist=true&amp;characterEncoding=UTF-8</value>
  <description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
  <name>javax.jdo.option.ConnectionDriverName</name>
  <value>com.mysql.jdbc.Driver</value>
  <description>Driver class name for a JDBC metastore</description>
</property>
<property>
  <name>javax.jdo.option.ConnectionUserName</name>
  <value>root</value>
  <description>username to use against metastore database</description>
</property>
<property>
  <name>javax.jdo.option.ConnectionPassword</name>
  <value>root</value>
  <description>password to use against metastore database</description>
</property>
6.驗證hive安裝是否成功
 hive
 show databases;
 mysql -uroot -proot 
 show databases;
 use hive;
 show tables;
7.上傳hive-builtins-0.9.0.jar包到hdfs的/usr/local/hive/lib/目錄下
 cd /usr/local/hive/lib
 hdfs dfs -mkdir -p  /usr/local/hive/lib
 hdfs dfs -put hive-builtins-0.9.0.jar /usr/local/hive/lib

schematool -dbType mysql -initSchema

DB2安裝

#--------------------------DB2安裝-----------------------------------#
1.配置實例用戶
    mkdir -p /db2home
    groupadd -g 600 edwgadm
    groupadd -g 601 edwgfenc
    groupadd -g 602 edwgdas
    useradd -u 600 -g 600 -d /home/edwinst edwinst
    useradd -u 601 -g 601 -d /db2home/edwfenc edwfenc
    useradd -u 602 -g 602 -d /db2home/edwdas edwdas
    passwd  edwinst
    passwd  edwfenc
    passwd  edwdas
2.創建目錄
    mkdir -p /db2home
    mkdir -p /edwpath
    mkdir -p /edwpath/edwinst/NODE0000
    mkdir -p /edwp0
    mkdir -p /edwpath/edwinst/NODE0001
    mkdir -p /edwp1
    mkdir -p /edwpath/edwinst/NODE0002
    mkdir -p /edwp2
    mkdir -p /edwpath/edwinst/NODE0003
    mkdir -p /edwp3
    
    chown -R edwinst:edwgadm /edwpath
    chown -R edwinst:edwgadm /edwpath/edwinst/NODE0000
    chown -R edwinst:edwgadm /edwp0
    chown -R edwinst:edwgadm /edwpath/edwinst/NODE0001
    chown -R edwinst:edwgadm /edwp1
    chown -R edwinst:edwgadm /edwpath/edwinst/NODE0002
    chown -R edwinst:edwgadm /edwp2
    chown -R edwinst:edwgadm /edwpath/edwinst/NODE0003
    chown -R edwinst:edwgadm /edwp3
3.開啟相關服務
    #yum install rpcbind nfs-utils
    #yum install xinetd
    service nfs restart
    service sshd restart
    #service portmap restart
    service rpcbind restart
    service xinetd restart
    chkconfig --level 2345 nfs on
    chkconfig --level 2345 nfslock on
    chkconfig --level 2345 sshd on
    #chkconfig --level 2345 portmap on
    chkconfig --level 2345 rpcbind on
    chkconfig --level 2345 xinetd on
創建實例:
    cd /opt/ibm/db2/V9.7/instance
     ./db2icrt -s ese -u edwfenc edwinst
1.關閉內存地址隨機化機制
    vi /etc/sysctl.conf
    增加 kernel.randomize_va_space=0
    sysctl -p
2.上傳db2安裝包並解壓安裝
    cd /software
    cp v9.7fp9_linuxx64_server.tar.gz /usr/local
    cd /usr/local
    tar -xvf v9.7fp9_linuxx64_server.tar.gz
    cd ./server
    ./db2_install #開始安裝db2,選擇ESE企業版安裝
    #安裝ksh
    cd /software 
    rpm -ivh ksh-20120801-33.el6.x86_64.rpm
    su - edwinst 
    cd ~
    #對每個節點分別產生公鑰和私鑰:
    cd  ~/.ssh
    ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
    #將公鑰文件復制保存為authorized_keys
    cat id_dsa.pub >> authorized_keys
4.修改配置文件,添加端口號
    vi /etc/services
    db2c_edwinst    60000/tcp
    DB2_edwinst     60001/tcp
    DB2_edwinst_1   60002/tcp
    DB2_edwinst_2   60003/tcp
    DB2_edwinst_3   60004/tcp
    DB2_edwinst_4   60005/tcp
    DB2_edwinst_END 60006/tcp
5.新建實例
    cd /opt/ibm/db2/V9.7/instance
    ./db2icrt -s ese -u edwfenc edwinst
    su - edwinst
    vi /home/edwinst/sqllib/db2nodes.cfg(更新為一下內容)
    0 hadoop 0
    1 hadoop 1
    2 hadoop 2
    3 hadoop 3
6.設置管理參數:
    db2set DB2RSHCMD=/usr/bin/ssh
    db2set DB2CODEPAGE=1386
    db2set DB2_EXTENDED_OPTIMIZATION=ON
    db2set DB2_ANTIJOIN=YES
    db2set DB2COMM=TCPIP
    db2set DB2_PARALLEL_IO=*
    
    db2 update dbm cfg using SVCENAME db2c_edwinst
    db2start
7.創建EDW數據庫
    db2 "CREATE DATABASE EDW AUTOMATIC STORAGE NO ON /edwpath USING CODESET GBK TERRITORY CN RESTRICTIVE"

    db2 connect to edw

    db2 "CREATE DATABASE PARTITION GROUP PDPG ON DBPARTITIONNUMS (0 to 3)"

    db2 "CREATE DATABASE PARTITION GROUP SDPG ON DBPARTITIONNUMS (0)"

    db2 "ALTER BUFFERPOOL IBMDEFAULTBP SIZE 20"

    db2 "CREATE BUFFERPOOL BP32K ALL DBPARTITIONNUMS SIZE 20 PAGESIZE 32K"

    db2 update db cfg using LOGFILSIZ  131072 LOGPRIMARY 30 LOGSECOND 5
    db2 update dbm cfg using FEDERATED YES

    db2 force application all
    db2stop
    db2start
    

9.檢查並建立 BUFFERPOOL
  #db2 ALTER BUFFERPOOL IBMDEFAULTBP SIZE 250;
  db2 ALTER BUFFERPOOL IBMDEFAULTBP SIZE 20;
  #db2 CREATE BUFFERPOOL BP32K SIZE 16384 PAGESIZE 32768;
  #db2 CREATE BUFFERPOOL BP32K SIZE 50 PAGESIZE 32768;
  #db2 CONNECT RESET;
  db2 -x "select BPNAME,NPAGES,PAGESIZE from syscat.bufferpools with ur"
  
10.設置當前會話模式
    db2 set schema dainst
11.db2執行sql腳本
 在命令行中執行建表語句
 db2 -svtf crt_dwmm_etl_table.ddl

調度平台安裝【另外一台機器】

#--------------------------調度平台安裝【另外一台機器】-----------------------------------#

# 安裝ncompress包
   yum -y install ncompress
# 上傳版本程序包 etldata.tar.gz
# apache-tomcat-7.0.73.tar.gz job-schd-engine-0.1Silver.tar.gz JobSchd.war 至當前目錄
    tar xf apache-tomcat-7.0.73.tar.gz -C ./etldata/script

    tar xf job-schd-engine-0.1Silver.tar.gz -C ./etldata/script

    cp JobSchd.war ./etldata/script/apache-tomcat-7.0.73/webapps
   
    cd /etl/etldata/script/apache-tomcat-7.0.73/webapps
    unzip JobSchd.war -d JobSchd
    cd /etl/etldata/script/apache-tomcat-7.0.73/webapps/JobSchd/WEB-INF/classes
    vi jdbc.properties
    
   /etl/etldata/script/apache-tomcat-7.0.73/webapps
 
    cd /etl/etldata/script/apache-tomcat-7.0.73/bin
    ./startup.sh
    瀏覽器訪問:http://192.168.0.18:8080/JobSchd/logon.jsp
    用戶:admin
    密碼:12345678
    
創建聯邦:
    db2 catalog tcpip node EDW remote hadoop server 60000
    db2 catalog db EDW as EDW at node EDW
    db2 terminate
    db2 connect to EDW user edwinst using edwinst
    db2 create wrapper drda
    db2 "create server EDW1SVR type DB2/UDB version 9.7 wrapper "drda" authorization \"edwinst\" password \"edwinst\" options(NODE 'EDW',DBNAME 'EDW')"
    db2 "create user mapping for \"dainst\" server EDW1SVR options(remote_authid 'edwinst',remote_password 'edwinst')"

 導入新增數據:
 import from '/home/isinst/tmp/1.csv'of del insert into XMETA.SRC_STU_INFO;
 
 導入替換數據s
 import from '/home/isinst/tmp/1.csv'of del replace into XMETA.SRC_STU_INFO;
 
DB2建表語句【例子】:
create table XMETA.SRC_STU_INFO (
    SRC_SYS_ID         INTEGER  NOT NULL    ,
    NAME             VARCHAR(20)  ,
    SCORE             INTEGER     ,
    SEX           VARCHAR(10)    NOT NULL   ); 
    
ALTER TABLE EXT_${tab_name} SET LOCATION 'hdfs://nameservice1/etldata/input/${tab_name}/${today}'

 

Hbase集群安裝

上傳壓縮包,解壓,重命名

配置/usr/local/hbase/conf/hbase-env.sh

如果集群模式就設置為:true

 

配置hbase-site.xml

<configuration>
  <property>
           <name>hbase.rootdir</name>
           <value>hdfs://master:9000/hbase</value>
   </property>
   <property>
           <name>hbase.cluster.distributed</name>
           <value>true</value>
   </property>
   <property>
           <name>hbase.zookeeper.quorum</name>
           <value>master,slave1,slave2</value>
   </property>
  <property>
           <name>dfs.replication</name>
           <value>1</value>
   </property>
   <property>  
   <name>hbase.master.info.port</name>  
   <value>60010</value>  
   </property>  
</configuration>

配置/usr/local/hbase/conf/regionservers

 

Zookeeper集群安裝

上傳壓縮包,解壓,重命名

修改conf目錄下,重命名為: zoo.cfg

修改dataDir目錄,然后創建該目錄,並在該目錄創建文件myid, 值分別為0, 1, 2

 

末尾添加:

 

Spark安裝

編輯 spark-env.sh

export SPARK_DIST_CLASSPATH=$(/usr/local/hadoop/bin/hadoop classpath)

 hive2端口監聽設置

<property>
  <name>hive.server2.thrift.port</name>
  <value>10000</value>
</property>
<property>
  <name>hive.server2.thrift.bind.host</name>
  <value>192.168.48.130</value>
</property>

<property>
   <name>hive.server2.webui.host</name>
   <value>192.168.122.140</value>
   <description>The host address the HiveServer2 WebUI will listen on</description>
</property>
<property>
   <name>hive.server2.webui.port</name>
   <value>10002</value>
   <description>The port the HiveServer2 WebUI will listen on. This can beset to 0 o
r a negative integer to disable the web UI</description>
</property>

 

啟動服務:

       1):啟動metastore

       bin/hive --service metastore &

       默認端口為9083

       2):啟動hiveserver2

       bin/hive --service hiveserver2 &

       3):測試

       Web UI:http://192.168.48.130:10002/

 

flume例子

# Define a memory channel called ch1 on agent1  
agent1.channels.ch1.type = memory  
agent1.channels.ch1.capacity = 100000  
agent1.channels.ch1.transactionCapacity = 100000  
agent1.channels.ch1.keep-alive = 30  
   
# Define an Avro source called avro-source1 on agent1 and tell it  
# to bind to 0.0.0.0:41414. Connect it to channel ch1.  
#agent1.sources.avro-source1.channels = ch1  
#agent1.sources.avro-source1.type = avro  
#agent1.sources.avro-source1.bind = 0.0.0.0  
#agent1.sources.avro-source1.port = 41414  
#agent1.sources.avro-source1.threads = 5  
   
#define source monitor a file  
agent1.sources.avro-source1.type = exec  
agent1.sources.avro-source1.shell = /bin/bash -c  
agent1.sources.avro-source1.command = tail -n +0 -F /home/workspace/id.txt  
agent1.sources.avro-source1.channels = ch1  
agent1.sources.avro-source1.threads = 5
   
# Define a logger sink that simply logs all events it receives  
# and connect it to the other end of the same channel.  
agent1.sinks.log-sink1.channel = ch1  
agent1.sinks.log-sink1.type = hdfs  
agent1.sinks.log-sink1.hdfs.path = hdfs://192.168.88.134:9000/flumeTest
agent1.sinks.log-sink1.hdfs.writeFormat = Text  
agent1.sinks.log-sink1.hdfs.fileType = DataStream  
agent1.sinks.log-sink1.hdfs.rollInterval = 0  
agent1.sinks.log-sink1.hdfs.rollSize = 1000000  
agent1.sinks.log-sink1.hdfs.rollCount = 0  
agent1.sinks.log-sink1.hdfs.batchSize = 1000  
agent1.sinks.log-sink1.hdfs.txnEventMax = 1000  
agent1.sinks.log-sink1.hdfs.callTimeout = 60000  
agent1.sinks.log-sink1.hdfs.appendTimeout = 60000  
   
# Finally, now that we've defined all of our components, tell  
# agent1 which ones we want to activate.  
agent1.channels = ch1  
agent1.sources = avro-source1  
agent1.sinks = log-sink1  

flume-ng agent --conf conf --conf-file flume.conf --name agent1 -Dflume.root.logger=INFO,console

 

jupyter notebook安裝使用

#1.安裝相關需要的工具包
pip install jupyter
pip install numpy
pip install matplotlib
pip install scipy
pip install scikit-learn
pip install seaborn

#2指定ip和端口打開[默認端口:8888]
jupyter notebook --no-browser --port 8888 --ip=192.168.0.16
#瀏覽器打開指定的url
#例如:http://192.168.0.16:8888/?token=ab8f641d12a9a2a90aa42cfdb36198db4d23895de8abc2b0

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM