一.前言
1.為何部署sentinel哨兵
前文redis主從架構中,當主服務故障時,需要手動將從服務切換為主服務,sentinel服務就是將這個過程自動化。
主要功能有:
1)不時監控主從服務正常運行
2)可以通過腳本、API接口發出報警
3)自動故障遷移
當然sentinel不止以上這些,更多的功能參考https://redis.io/topics/sentinel
二.redis部署sentinel
1.環境說明
系統基於CentOS-6.7-x86_64-minimal.iso
源碼安裝包redis-3.2.4版本,sentinel功能2.8版本開始支持
192.168.56.101 host101 192.168.56.102 host102 192.168.56.103 host103 master 192.168.56.101:6379 slave1 192.168.56.102:6379 slave2 192.168.56.103:6379 sentinel1 192.168.56.101:26379 sentinel2 192.168.56.102:26379 sentinel3 192.168.56.103:26379
2.redis配置
#master端配置 #復制一份默認的配置文件再修改以下個地方 [root@host101 ~]# cp /usr/local/src/redis-3.2.4/redis.conf /etc/redis/6379.conf [root@host101 ~]# vim /etc/redis/6379.conf bind 0.0.0.0 #監控地址 daemonize yes #dameon形式運行 pidfile /var/run/redis_6379.pid #pid文件 logfile "/var/log/redis_6379.log" #啟動日志 dbfilename dump.rdb #數據文件 dir /var/lib/redis #數據目錄 requirepass mima #配置連接密碼 #slave1端配置 [root@host102 ~]# vim /etc/redis/6379.conf bind 0.0.0.0 #監控地址 daemonize yes #dameon形式運行 pidfile /var/run/redis_6379.pid #pid文件 logfile "/var/log/redis_6379.log" #啟動日志 dbfilename dump.rdb #數據文件 dir /var/lib/redis #數據目錄 requirepass mima #配置連接密碼 slaveof 192.168.56.101 6379 #指定master masterauth mima #slave認證master密碼 slave-read-only yes #設置slave為只讀模式 #slave2端配置 [root@host103 ~]# vim /etc/redis/6379.conf bind 0.0.0.0 #監控地址 daemonize yes #dameon形式運行 pidfile /var/run/redis_6379.pid #pid文件 logfile "/var/log/redis_6379.log" #啟動日志 dbfilename dump.rdb #數據文件 dir /var/lib/redis #數據目錄 requirepass mima #配置連接密碼 slaveof 192.168.56.101 6379 #指定master masterauth mima #slave認證master密碼 slave-read-only yes #設置slave為只讀模式
3.啟動redis並查看主從狀態
#啟動redis服務,三台都操作 [root@host101 redis]# /usr/local/redis/bin/redis-server /etc/redis/6379.conf #查看主從狀態 [root@host101 redis]# /usr/local/redis/bin/redis-cli -a mima 127.0.0.1:6379> info replication # Replication role:master connected_slaves:2 slave0:ip=192.168.56.102,port=6379,state=online,offset=15,lag=0 slave1:ip=192.168.56.103,port=6379,state=online,offset=15,lag=1 master_repl_offset:15 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:2 repl_backlog_histlen:14 #測試讀寫 [root@host101 redis]# /usr/local/redis/bin/redis-cli -a mima 127.0.0.1:6379> set name marry OK 127.0.0.1:6379> get name "marry" 127.0.0.1:6379> exit [root@host101 redis]# /usr/local/redis/bin/redis-cli -h 192.168.56.103 -a mima 192.168.56.103:6379> get name "marry" 192.168.56.103:6379> set name marry3 (error) READONLY You can't write against a read only slave. 192.168.56.103:6379> exit [root@host101 redis]# /usr/local/redis/bin/redis-cli -h 192.168.56.102 -a mima 192.168.56.102:6379> info replication # Replication role:slave master_host:192.168.56.101 master_port:6379 master_link_status:up master_last_io_seconds_ago:2 master_sync_in_progress:0 slave_repl_offset:537 slave_priority:100 slave_read_only:1 connected_slaves:0 master_repl_offset:0 repl_backlog_active:0 repl_backlog_size:1048576 repl_backlog_first_byte_offset:0 repl_backlog_histlen:0
4.配置sentinel並啟動
#三台都修改為以下同樣配置
[root@host101 ~]# grep -Ev "^$|#" /usr/local/src/redis-3.2.4/sentinel.conf > /etc/redis/sentinel.conf
[root@host101 ~]# vim /etc/redis/sentinel.conf
port 26379 #sentinel監聽端口
daemonize yes #以daemon形式運行在后台
logfile /var/log/sentinel.log #日志文件
dir /tmp
sentinel monitor mymaster 192.168.56.101 6379 2 #監控mymaster組,master地址,端口,quorum次數
sentinel down-after-milliseconds mymaster 5000 #5000毫秒即5秒連續不能連通master,認為master掛掉
sentinel parallel-syncs mymaster 1
sentinel failover-timeout mymaster 60000 #故障切換超時時間
sentinel auth-pass mymaster mima #密碼認證
protected-mode no #默認情況下sentinel只監聽環回地址,這樣就導致sentinel之間不能通信,可以改為監聽網卡或者關閉protected-mode
#三台都啟動
[root@host101 ~]# /usr/local/redis/bin/redis-sentinel /etc/redis/sentinel.conf
#查看sentinel啟動日志
#正常情況下可以看到+sentinel-address-switch字樣
[root@host101 ~]# more /var/log/sentinel.log
24470:X 08 Dec 10:53:12.205 * Increased maximum number of open files to 10032 (it was originally set to 1024).
_._
_.-``__ ''-._
_.-`` `. `_. ''-._ Redis 3.2.4 (00000000/0) 64 bit
.-`` .-```. ```\/ _.,_ ''-._
( ' , .-` | `, ) Running in sentinel mode
|`-._`-...-` __...-.``-._|'` _.-'| Port: 26379
| `-._ `._ / _.-' | PID: 24470
`-._ `-._ `-./ _.-' _.-'
|`-._`-._ `-.__.-' _.-'_.-'|
| `-._`-._ _.-'_.-' | http://redis.io
`-._ `-._`-.__.-'_.-' _.-'
|`-._`-._ `-.__.-' _.-'_.-'|
| `-._`-._ _.-'_.-' |
`-._ `-._`-.__.-'_.-' _.-'
`-._ `-.__.-' _.-'
`-._ _.-'
`-.__.-'
24470:X 08 Dec 10:53:12.206 # Sentinel ID is 106e22fad7ad280b2c38542c164f7060b6587d68
24470:X 08 Dec 10:53:12.206 # +monitor master mymaster 192.168.56.101 6379 quorum 2
24470:X 08 Dec 10:53:14.321 * +sentinel-address-switch master mymaster 192.168.56.101 6379 ip 192.168.56.102 port 26379 for fae94df5596af315af0f5f97fe7ade3fad0b8a98
24470:X 08 Dec 10:53:14.336 * +sentinel-address-switch master mymaster 192.168.56.101 6379 ip 192.168.56.103 port 26379 for 8ea722390cabf3ad304b20f8cc42157603d21d84
#需要注意的地方,一旦啟動sentinel服務后,服務會自動修改sentinel配置文件/etc/redis/sentinel.conf
[root@host101 ~]# cat /etc/redis/sentinel.conf
port 26379
daemonize yes
dir "/tmp"
logfile "/var/log/sentinel.log"
sentinel myid 106e22fad7ad280b2c38542c164f7060b6587d68
sentinel monitor mymaster 192.168.56.101 6379 2
sentinel down-after-milliseconds mymaster 5000
sentinel failover-timeout mymaster 60000
sentinel auth-pass mymaster mima
protected-mode no
# Generated by CONFIG REWRITE
sentinel config-epoch mymaster 0
sentinel leader-epoch mymaster 0
sentinel known-slave mymaster 192.168.56.102 6379
sentinel known-slave mymaster 192.168.56.103 6379
sentinel known-sentinel mymaster 192.168.56.103 26379 8ea722390cabf3ad304b20f8cc42157603d21d84
sentinel known-sentinel mymaster 192.168.56.102 26379 fae94df5596af315af0f5f97fe7ade3fad0b8a98
sentinel current-epoch 0
4.詳細介紹以上幾個參數的用途
sentinel monitor mymaster 192.168.56.101 6379 2
4.1這一行代表sentinel監控的master的名字叫做mymaster,地址為192.168.56.101:6379,行尾最后的一個2代表什么意思呢?我們知道,網絡是不可靠的,有時候一個sentinel會因為網絡堵塞而誤以為一個master redis已經死掉了,當sentinel集群式,解決這個問題的方法就變得很簡單,只需要多個sentinel互相溝通來確認某個master是否真的死了,這個2代表,當集群中有2個sentinel認為master死了時,才能真正認為該master已經不可用了。(sentinel集群中各個sentinel也有互相通信,通過gossip協議)。
sentinel down-after-milliseconds mymaster 5000
4.2sentinel會向master發送心跳PING來確認master是否存活,如果master在“一定時間范圍”內不回應PONG 或者是回復了一個錯誤消息,那么這個sentinel會主觀地(單方面地)認為這個master已經不可用了(subjectively down, 也簡稱為SDOWN)。而這個down-after-milliseconds就是用來指定這個“一定時間范圍”的,單位是毫秒。
不過需要注意的是,這個時候sentinel並不會馬上進行failover主備切換,這個sentinel還需要參考sentinel集群中其他sentinel的意見,如果超過某個數量的sentinel也主觀地認為該master死了,那么這個master就會被客觀地(注意哦,這次不是主觀,是客觀,與剛才的subjectively down相對,這次是objectively down,簡稱為ODOWN)認為已經死了。需要一起做出決定的sentinel數量在上一條配置中進行配置。
sentinel failover-timeout mymaster 60000
4.3failover過期時間,當failover開始后,在此時間內仍然沒有觸發任何failover操作,當前sentinel將會認為此次failoer失敗。
5.測試故障轉移
#任意sentinel節點,查看狀態 [root@host101 ~]# /usr/local/redis/bin/redis-cli -p 26379 127.0.0.1:26379> info sentinel # Sentinel sentinel_masters:1 sentinel_tilt:0 sentinel_running_scripts:0 sentinel_scripts_queue_length:0 sentinel_simulate_failure_flags:0 master0:name=mymaster,status=ok,address=192.168.56.101:6379,slaves=2,sentinels=3 #slave監聽sentinel日志 [root@host102 redis]# tail -f /var/log/sentinel.log #master殺掉redis-server進程 [root@host101 ~]# killall redis-server #slave監聽sentinel日志,大概5秒后刷出日志 [root@host102 redis]# tail -f /var/log/sentinel.log 6711:X 08 Dec 11:16:15.164 # +sdown master mymaster 192.168.56.101 6379 6711:X 08 Dec 11:16:15.223 # +odown master mymaster 192.168.56.101 6379 #quorum 2/2 6711:X 08 Dec 11:16:15.224 # +new-epoch 1 6711:X 08 Dec 11:16:15.224 # +try-failover master mymaster 192.168.56.101 6379 6711:X 08 Dec 11:16:15.226 # +vote-for-leader fae94df5596af315af0f5f97fe7ade3fad0b8a98 1 6711:X 08 Dec 11:16:15.232 # 8ea722390cabf3ad304b20f8cc42157603d21d84 voted for fae94df5596af315af0f5f97fe7ade3fad0b8a98 1 6711:X 08 Dec 11:16:15.232 # 106e22fad7ad280b2c38542c164f7060b6587d68 voted for fae94df5596af315af0f5f97fe7ade3fad0b8a98 1 6711:X 08 Dec 11:16:15.293 # +elected-leader master mymaster 192.168.56.101 6379 6711:X 08 Dec 11:16:15.293 # +failover-state-select-slave master mymaster 192.168.56.101 6379 6711:X 08 Dec 11:16:15.346 # +selected-slave slave 192.168.56.102:6379 192.168.56.102 6379 @ mymaster 192.168.56.101 6379 6711:X 08 Dec 11:16:15.346 * +failover-state-send-slaveof-noone slave 192.168.56.102:6379 192.168.56.102 6379 @ mymaster 192.168.56.101 6379 6711:X 08 Dec 11:16:15.447 * +failover-state-wait-promotion slave 192.168.56.102:6379 192.168.56.102 6379 @ mymaster 192.168.56.101 6379 6711:X 08 Dec 11:16:16.273 # +promoted-slave slave 192.168.56.102:6379 192.168.56.102 6379 @ mymaster 192.168.56.101 6379 6711:X 08 Dec 11:16:16.273 # +failover-state-reconf-slaves master mymaster 192.168.56.101 6379 6711:X 08 Dec 11:16:16.333 * +slave-reconf-sent slave 192.168.56.103:6379 192.168.56.103 6379 @ mymaster 192.168.56.101 6379 6711:X 08 Dec 11:16:17.283 * +slave-reconf-inprog slave 192.168.56.103:6379 192.168.56.103 6379 @ mymaster 192.168.56.101 6379 6711:X 08 Dec 11:16:17.283 * +slave-reconf-done slave 192.168.56.103:6379 192.168.56.103 6379 @ mymaster 192.168.56.101 6379 6711:X 08 Dec 11:16:17.358 # -odown master mymaster 192.168.56.101 6379 6711:X 08 Dec 11:16:17.358 # +failover-end master mymaster 192.168.56.101 6379 6711:X 08 Dec 11:16:17.358 # +switch-master mymaster 192.168.56.101 6379 192.168.56.102 6379 6711:X 08 Dec 11:16:17.358 * +slave slave 192.168.56.103:6379 192.168.56.103 6379 @ mymaster 192.168.56.102 6379 6711:X 08 Dec 11:16:17.358 * +slave slave 192.168.56.101:6379 192.168.56.101 6379 @ mymaster 192.168.56.102 6379 6711:X 08 Dec 11:16:22.402 # +sdown slave 192.168.56.101:6379 192.168.56.101 6379 @ mymaster 192.168.56.102 6379 #再次查看sentinel狀態,可以看到master節點已變為原192.168.56.102:6379 [root@host101 ~]# /usr/local/redis/bin/redis-cli -p 26379 127.0.0.1:26379> info sentinel # Sentinel sentinel_masters:1 sentinel_tilt:0 sentinel_running_scripts:0 sentinel_scripts_queue_length:0 sentinel_simulate_failure_flags:0 master0:name=mymaster,status=ok,address=192.168.56.102:6379,slaves=2,sentinels=3
6.恢復原master192.168.56.101:3679
#添加redis-server認證密碼,因為一開始原master並沒有配置這個選項,啟動服務 [root@host101 ~]# echo "masterauth mima" >> /etc/redis/6379.conf [root@host101 ~]# /usr/local/redis/bin/redis-server /etc/redis/6379.conf #檢查sentinel日志,檢查replication狀態,測試讀寫
總結:sentinel只是實現的redis的高可用,並沒有實現前端服務的高可用。
1,keepalived:通過keepalived的虛擬IP,提供主從的統一訪問,在主出現問題時,通過keepalived運行腳本將從提升為主,待主恢復后先同步后自動變為主,該方案的好處是主從切換后,應用程序不需要知道(因為訪問的虛擬IP不變),壞處是引入keepalived增加部署復雜性, 而且keepalived的應用場景有限,比如它的核心協議VRRP只能工作在局域網內,不能工作在局域網外(網間、廣域網),而且在網絡不受自己控制時基本不能用,除非設定好的VIP是供局域網使用;
2,zookeeper:通過zookeeper來監控主從實例,維護最新有效的IP,應用通過zookeeper取得IP,對Redis進行訪問;
3,sentinel:通過Sentinel監控主從實例,自動進行故障恢復,該方案有個缺陷:因為主從實例地址(IP PORT)是不同的,當故障發生進行主從切換后,應用程序無法知道新地址,故在Jedis2.2.2中新增了對Sentinel的支持,應用通過redis.clients.jedis.JedisSentinelPool.getResource()取得的Jedis實例會及時更新到新的主實例地址。
