使用dataX將數據從Mysql數據庫同步到Oracle數據


最近有需求將數據從mysql同步到oracle,之前有使用kettle將表從oracle同步到mysql,這里使用的插件依然是阿里的dataX

詳細見:Githup地址:https://github.com/alibaba/DataX

這里也是根據阿里雲的一篇帖子來進行驗證

https://yq.aliyun.com/articles/715393?spm=a2c4e.11155472.0.0.3aef4dc1IWo28F

1 環境需要

    JDK(1.8以上,推薦1.8)
    Python(推薦Python2.6.X)
    Apache Maven 3.x (Compile DataX)

2 #服務器配置

[root@yhq ~]# yum search java|grep jdk
[root@yhq ~]# yum install java-1.8.0-openjdk.x86_64 -y
[root@yhq ~]# java -version
openjdk version "1.8.0_242"
OpenJDK Runtime Environment (build 1.8.0_242-b07)
OpenJDK 64-Bit Server VM (build 25.242-b07, mixed mode)
[root@yhq ~]# python --version
Python 2.6.6
下載地址:https://maven.apache.org/download.cgi
[root@yhq soft]# ls apache-maven-3.6.3-bin.tar.gz 
apache-maven-3.6.3-bin.tar.gz
[root@yhq soft]# tar -zxvf apache-maven-3.6.3-bin.tar.gz 
[root@yhq soft]#  mkdir /opt/maven
[root@yhq soft]# mv apache-maven-3.6.3/* /opt/maven/
[root@yhq soft]# ln -s /opt/maven/b
bin/  boot/ 
[root@yhq soft]# ln -s /opt/maven/bin/mvn /usr/bin/mvn
[root@yhq soft]# vim /etc/profile.d/maven.sh
export M2_HOME=/opt/maven
export PATH=${M2_HOME}/bin:${PATH}
[root@yhq soft]# mvn -v
Apache Maven 3.6.3 (cecedd343002696d0abb50b32b541b8a6ba2883f)
Maven home: /opt/maven
Java version: 1.8.0_242, vendor: Oracle Corporation, runtime: /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.242.b07-1.el6_10.x86_64/jre
Default locale: en_US, platform encoding: UTF-8
OS name: "linux", version: "2.6.32-754.el6.x86_64", arch: "amd64", family: "unix"
#安裝datax
[root@yhq soft]# tar -zxvf datax.tar.gz
[root@yhq soft]# ls datax
bin  conf  job  lib  plugin  script  tmp
#自檢測試腳本
[root@yhq soft]# python datax/bin/datax.py datax/job/job.json 

DataX (DATAX-OPENSOURCE-3.0), From Alibaba !
Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.


2020-02-09 23:27:35.717 [main] INFO  VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl
2020-02-09 23:27:35.763 [main] INFO  Engine - the machine info  => 

    osInfo:    Oracle Corporation 1.8 25.242-b07
    jvmInfo:    Linux amd64 2.6.32-754.el6.x86_64
    cpu num:    2
2020-02-09 23:27:46.974 [job-0] INFO  JobContainer - PerfTrace not enable!
2020-02-09 23:27:46.975 [job-0] INFO  StandAloneJobContainerCommunicator - Total 100000 records, 2600000 bytes | Speed 253.91KB/s, 10000 records/s | Error 0 records, 0 bytes |  All Task WaitWriterTime 0.020s |  All Task WaitReaderTime 0.121s | Percentage 100.00%
2020-02-09 23:27:46.976 [job-0] INFO  JobContainer - 
任務啟動時刻                    : 2020-02-09 23:27:35
任務結束時刻                    : 2020-02-09 23:27:46
任務總計耗時                    :                 10s
任務平均流量                    :          253.91KB/s
記錄寫入速度                    :          10000rec/s
讀出記錄總數                    :              100000
讀寫失敗總數                    :                   0

#測試使用

mysql 表
 CREATE TABLE `datax_test` (
  `id` int(11) NOT NULL,
  `name` varchar(20) DEFAULT NULL,
  `birthday` date DEFAULT NULL,
  `memo` varchar(32) DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
#插入10w數據

#在oracle里面可以先創建好表結構,或者用kettle同步表

create table datax_test
(id number,
name varchar2(20),
birthday date,
memo varchar2(32));

#創建配置文件
#可以通過命令查看配置模板: python datax.py -r {YOUR_READER} -w {YOUR_WRITER}

#  python datax.py -r streamreader -w streamwriter
[root@yhq job]# vim job1.json
{
    "job": {
        "setting": {
            "speed": {
                "channel": 5
            }
        },
        "content": [
            {
                "reader": {
                    "name": "mysqlreader",
                    "parameter": {
                        "username": "system",
                        "password": "****",
                        "column": ["*"],
                        "connection": [
                           {
                              "table": ["datax_test"],
                              "jdbcUrl": ["jdbc:mysql://ip:3306/vision"]
                           }
                        ]
                    }
                },
                "writer": {
                    "name": "oraclewriter",
                    "parameter": {
                        "username": "vision",
                        "password": "***",
                        "column": ["*"],
                        "connection": [
                            {
                               "jdbcUrl": "jdbc:oracle:thin:@ip:1521:orcl",
                               "table": ["datax_test"]
                            }
                        ]
                      }
                    }
                }
        ]
    }
}

#啟動數據同步

[root@yhq job]# python /data/soft/datax/bin/datax.py job1.json 

DataX (DATAX-OPENSOURCE-3.0), From Alibaba !
Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.


2020-02-10 00:12:03.796 [main] INFO  VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl
2020-02-10 00:12:03.807 [main] INFO  Engine - the machine info  => 

    osInfo:    Oracle Corporation 1.8 25.242-b07
    jvmInfo:    Linux amd64 2.6.32-754.el6.x86_64
    cpu num:    2

    totalPhysicalMemory:    -0.00G
    freePhysicalMemory:    -0.00G
    maxFileDescriptorCount:    -1
    currentOpenFileDescriptorCount:    -1

    GC Names    [PS MarkSweep, PS Scavenge]

    MEMORY_NAME                    | allocation_size                | init_size                      
    PS Eden Space                  | 256.00MB                       | 256.00MB                       
    Code Cache                     | 240.00MB                       | 2.44MB                         
    Compressed Class Space         | 1,024.00MB                     | 0.00MB                         
    PS Survivor Space              | 42.50MB                        | 42.50MB                        
    PS Old Gen                     | 683.00MB                       | 683.00MB                       
    Metaspace                      | -0.00MB                        | 0.00MB                         


2020-02-10 00:12:03.869 [main] INFO  Engine - 
2020-02-10 00:13:48.061 [job-0] INFO  JobContainer - PerfTrace not enable!
2020-02-10 00:13:48.061 [job-0] INFO  StandAloneJobContainerCommunicator - Total 99999 records, 1888875 bytes | Speed 21.96KB/s, 1190 records/s | Error 0 records, 0 bytes |  All Task WaitWriterTime 42.323s |  All Task WaitReaderTime 29.424s | Percentage 100.00%
2020-02-10 00:13:48.069 [job-0] INFO  JobContainer - 
任務啟動時刻                    : 2020-02-10 00:12:05
任務結束時刻                    : 2020-02-10 00:13:48
任務總計耗時                    :                102s
任務平均流量                    :           21.96KB/s
記錄寫入速度                    :           1190rec/s
讀出記錄總數                    :               99999
讀寫失敗總數                    :                   0

#在oracle查看數據
select count(*) from datax_test; #同步正常
#剩下的增量同步,在crontab中定時任務就行。
$ crontab -e
#會進入已有crontab文件編輯界面,繼續增加定時任務即可,本示例增加以下內容,並保存
0,10,20,30,40,50 * * * *  python /data/soft/datax/bin/datax.py /data/soft/datax/job/mysql2odps.json  >>/tmp/log.`date +\%Y\%m\%d\%H\%M\%S`  2>&1

測試環境

#測試環境
==mysql db 5.7.27 centos 6.10
create database vision;==oracle db 11.2.0.4  centos 6.10
select file#,name from v$datafile;

create tablespace vision
  logging
  datafile '/u01/app/oracle/oradata/test/vision01.dbf'
  size 100m
  autoextend on
  next 500m;
  
  create user vision identified by ****  default tablespace vision ;
  grant connect,resource,dba to vision;==
delimiter //
create procedure insrt_t ()
begin
declare i int default 1;
declare j int default 1;
while i < 100000 do
#insert into test values (i,'a');
insert into datax_test
(id,name,birthday,memo)
VALUES
(i,'yhq',now(),'123');
set i=i+1;
set j=j+1;
if j=5000 then
set j=1;
commit;
end if;
end while;
end
//
delimiter ;

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM