由於各種原因,我們都可能不會一直使用同一個redis實例,從而需要在不定的時候進行切換。
切換數據本身應該只是一個更改鏈接的過程而已,但進行數據源切換的唯一要點就是,原有數據如何平滑遷移到新實例,從而可以實現無縫遷移!
網上進行redis數據庫遷移的方式羅列:
1. redis-dump,
2. 基於 slave 方式的 rdb 備份恢復;
3. 自己寫的python/shell腳本進行全量數據操作;
4. 其他...
總體來說,都可行,但是自己可能遇到各種問題。略去不說。
今天,我們就來看另一個redis數據庫遷移工具: redis-shake.
redis-shake是阿里雲自研的開源工具,可以順利用於進行redis數據遷移!
官方文檔藏得比較深: https://tech.antfin.com/docs/2/117311
github項目地址: https://github.com/alibaba/RedisShake
應該說,看了官方說明文檔,基本就是依葫蘆畫瓢了。
本也沒啥好說!就檢需要的說吧!
步驟羅列:
1. 下載安裝包,這個可以github的release頁面找到:
wget https://github.com/alibaba/RedisShake/releases/download/release-v1.6.6-20190609/redis-shake.tar.gz
2. 解壓文件:
tar -xvf redis-shake.tar.gz
3. 編輯 redis-shake.conf 配置文件, 主要是更改 source, target 選項:
vim redis-shake.conf # 注意按照普通的redis連接來更改即可;
4. 運行 redis-shake 同步腳本:
./redis-shake.linux64 -type=rump -conf=redis-shake.conf
其他說明:
1. 同步過程可能出現異常,有可能是因為數據正在被寫入,在同步時出了問題。解決辦法是:沒關系,再次同步就好了!
2. 針對大數據量的同步,還得自己注意下!
redis-shake.conf 文件如下:
# this is the configuration of redis-shake. # if you have any problem, please visit https://github.com/alibaba/RedisShake/wiki/FAQ # id id = redis-shake # log file,日志文件,不配置將打印到stdout (e.g. /var/log/redis-shake.log ) log.file = # log level: "none", "error", "warn", "info", "all". default is "info". log.level = info # pid path,進程文件存儲地址(e.g. /var/run/),不配置將默認輸出到執行下面, # 注意這個是目錄,真正的pid是`{pid_path}/{id}.pid` pid_path = # pprof port system_profile = 9310 # restful port,查看metric端口 http_profile = 9320 # runtime.GOMAXPROCS, 0 means use cpu core number: runtime.NumCPU() ncpu = 0 # parallel routines number used in RDB file syncing. default is 64. parallel = 32 # source redis configuration. # used in `dump`, `sync` and `rump`. # source redis type, e.g. "standalone" (default), "sentinel" or "cluster". # 1. "standalone": standalone db mode. # 2. "sentinel": the redis address is read from sentinel. # 3. "cluster": the source redis has several db. # 4. "proxy": the proxy address, currently, only used in "rump" mode. # 源端redis的類型,支持standalone,sentinel,cluster和proxy四種模式,注意:目前proxy只用於rump模式。 source.type = standalone # ip:port # the source address can be the following: # 1. single db address. for "standalone" type. # 2. ${sentinel_master_name}:${master or slave}@sentinel single/cluster address, e.g., mymaster:master@127.0.0.1:26379;127.0.0.1:26380. for "sentinel" type. # 3. cluster that has several db nodes split by semicolon(;). for "cluster" type. e.g., 10.1.1.1:20331;10.1.1.2:20441. # 4. proxy address(used in "rump" mode only). for "proxy" type. # 源redis地址。對於sentinel模式,輸入格式為"master名字:拉取角色為master或者slave@sentinel的地址" source.address = r-a.redis.rds.aliyuncs.com:6379 # password. source.password_raw = 123456 # auth type, don't modify it source.auth_type = auth # tls enable, true or false. Currently, only support standalone. # open source redis does NOT support tls so far, but some cloud versions do. source.tls_enable = false # target redis configuration. used in `restore`, `sync` and `rump`. # the type of target redis can be "standalone", "proxy" or "cluster". # 1. "standalone": standalone db mode. # 2. "sentinel": the redis address is read from sentinel. # 3. "cluster": open source cluster (not supported currently). # 4. "proxy": proxy layer ahead redis. Data will be inserted in a round-robin way if more than 1 proxy given. # 目的redis的類型,支持standalone,sentinel,cluster和proxy四種模式。 target.type = standalone # ip:port # the target address can be the following: # 1. single db address. for "standalone" type. # 2. sentinel_master_name@sentinel single/cluster address, e.g., mymaster@127.0.0.1:26379;127.0.0.1:26380. for "sentinel" type. # 3. cluster that has several db nodes split by semicolon(;). for "cluster" type. # 4. proxy address(used in "rump" mode only). for "proxy" type. target.address = r-b.redis.rds.aliyuncs.com:6379 # password. target.password_raw = 123456 # auth type, don't modify it target.auth_type = auth # all the data will be written into this db. < 0 means disable. target.db = -1 # tls enable, true or false. Currently, only support standalone. # open source redis does NOT support tls so far, but some cloud versions do. target.tls_enable = false # input RDB file. # used in `decode` and `restore`. # if the input is list split by semicolon(;), redis-shake will restore the list one by one. # 如果是decode或者restore,這個參數表示讀取的rdb文件。支持輸入列表,例如:rdb.0;rdb.1;rdb.2 # redis-shake將會挨個進行恢復。 rdb.input = local # output RDB file prefix. # used in `decode` and `dump`. # 如果是decode或者dump,這個參數表示輸出的rdb前綴,比如輸入有3個db,那么dump分別是: # ${output_rdb}.0, ${output_rdb}.1, ${output_rdb}.2 rdb.output = local_dump # the concurrence of fetching data, default is len(source.address) or len(rdb.input). # used in `dump`, `sync` and `restore`. 0 means default. # 拉取的並發度,如果是`dump`或者`sync`,默認是source.address中db的個數,`restore`模式默認len(rdb.input)。 # 假如db節點/輸入的rdb有5個,但rdb.parallel=3,那么一次只會 # 並發拉取3個db的全量數據,直到某個db的rdb拉取完畢,才會拉取第4個db節點的rdb,以此類推。 rdb.parallel = 0 # for special cloud vendor: ucloud # ucloud集群版的rdb文件添加了slot前綴,進行特判剝離: ucloud_cluster。 rdb.special_cloud = # use for expire key, set the time gap when source and target timestamp are not the same. # 用於處理過期的鍵值,當遷移兩端不一致的時候,目的端需要加上這個值 fake_time = # force rewrite when destination restore has the key # used in `restore`, `sync` and `rump`. # 當源目的有重復key,是否進行覆寫 rewrite = true # filter db or key or slot # choose these db, e.g., 5, only choose db5. defalut is all. # used in `restore` and `sync`. # 支持過濾db,只讓指定的db通過 filter.db = # filter key with prefix string. multiple keys are separated by ';'. # e.g., a;b;c # default is all. # used in `restore` and `sync`. # 支持過濾key,只讓指定的key通過,分號分隔 filter.key = # filter given slot, multiple slots are separated by ';'. # e.g., 1;2;3 # used in `sync`. # 指定過濾slot,只讓指定的slot通過 filter.slot = # big key threshold, the default is 500 * 1024 * 1024 bytes. If the value is bigger than # this given value, all the field will be spilt and write into the target in order. # 正常key如果不大,那么都是直接調用restore寫入到目的端,如果key對應的value字節超過了給定 # 的值,那么會分批依次一個一個寫入。 big_key_threshold = 524288000 # use psync command. # used in `sync`. # 默認使用sync命令,啟用將會使用psync命令 psync = false # enable metric # used in `sync`. # 是否啟用metric metric = true # print in log # 是否將metric打印到log中 metric.print_log = false # heartbeat # send heartbeat to this url # used in `sync`. # 心跳的url地址,redis-shake將會發送到這個地址 #heartbeat.url = http://127.0.0.1:8000 heartbeat.url = # interval by seconds # 心跳保活周期 heartbeat.interval = 3 # external info which will be included in heartbeat data. # 在心跳報文中添加額外的信息 heartbeat.external = test external # local network card to get ip address, e.g., "lo", "eth0", "en0" # 獲取ip的網卡 heartbeat.network_interface = # sender information. # sender flush buffer size of byte. # used in `sync`. # 發送緩存的字節長度,超過這個閾值將會強行刷緩存發送 sender.size = 104857600 # sender flush buffer size of oplog number. # used in `sync`. # 發送緩存的報文個數,超過這個閾值將會強行刷緩存發送 sender.count = 5000 # delay channel size. once one oplog is sent to target redis, the oplog id and timestamp will also # stored in this delay queue. this timestamp will be used to calculate the time delay when receiving # ack from target redis. # used in `sync`. # 用於metric統計時延的隊列 sender.delay_channel_size = 65535 # enable keep_alive option in TCP when connecting redis. # the unit is second. # 0 means disable. # TCP keep-alive保活參數,單位秒,0表示不啟用。 keep_alive = 0 # used in `rump`. # number of keys captured each time. default is 100. # 每次scan的個數,不配置則默認100. scan.key_number = 50 # used in `rump`. # we support some special redis types that don't use default `scan` command like alibaba cloud and tencent cloud. # 有些版本具有特殊的格式,與普通的scan命令有所不同,我們進行了特殊的適配。目前支持騰訊雲的集群版"tencent_cluster" # 和阿里雲的集群版"aliyun_cluster"。 scan.special_cloud = # used in `rump`. # we support to fetching data from given file which marks the key list. # 有些雲版本,既不支持sync/psync,也不支持scan,我們支持從文件中進行讀取所有key列表並進行抓取:一行一個key。 scan.key_file = # ----------------splitter---------------- # below variables are useless for current open source version so don't set. # replace hash tag. # used in `sync`. replace_hash_tag = false # used in `restore` and `dump`. extra = false