root@monitor yum.repos.d]# cat CentOS-Base.repo
[base]
name=CentOS-$releasever - Base
failovermethod=priority
baseurl=http://mirrors.aliyun.com/centos/$releasever/os/$basearch/
http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/
gpgcheck=1
gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-6
http://mirrors.aliyuncs.com/centos/RPM-GPG-KEY-CentOS-6
#released updates
[updates]
name=CentOS-$releasever - Updates
failovermethod=priority
baseurl=http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/
http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/
gpgcheck=1
gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-6
http://mirrors.aliyuncs.com/centos/RPM-GPG-KEY-CentOS-6
#additional packages that may be useful
[extras]
name=CentOS-$releasever - Extras
failovermethod=priority
baseurl=http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/
http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/
gpgcheck=1
gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-6
http://mirrors.aliyuncs.com/centos/RPM-GPG-KEY-CentOS-6
#additional packages that extend functionality of existing packages
[centosplus]
name=CentOS-$releasever - Plus
failovermethod=priority
baseurl=http://mirrors.aliyun.com/centos/$releasever/centosplus/$basearch/
http://mirrors.aliyuncs.com/centos/$releasever/centosplus/$basearch/
gpgcheck=1
enabled=0
gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-6
http://mirrors.aliyuncs.com/centos/RPM-GPG-KEY-CentOS-6
#contrib - packages by Centos Users
[contrib]
name=CentOS-$releasever - Contrib
failovermethod=priority
baseurl=http://mirrors.aliyun.com/centos/$releasever/contrib/$basearch/
http://mirrors.aliyuncs.com/centos/$releasever/contrib/$basearch/
gpgcheck=1
enabled=0
gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-6
http://mirrors.aliyuncs.com/centos/RPM-GPG-KEY-CentOS-6
---------------------------------------------------------------------------------------------------
http://www.cnblogs.com/xiaochaohuashengmi/archive/2011/10/08/2203153.html
-ivh:安裝顯示安裝進度--install--verbose--hash
-Uvh:升級軟件包--Update;
-qpl:列出RPM軟件包內的文件信息[Query Package list];
-qpi:列出RPM軟件包的描述信息[Query Package install package(s)];
-qf:查找指定文件屬於哪個RPM軟件包[Query File];
-Va:校驗所有的RPM軟件包,查找丟失的文件[View Lost];
-e:刪除包
--------------------------------------------------------------------------------------------------
安裝MHA 節點包:
[root@monitor ~]# rpm -ivh mha4mysql-node-0.56-0.el6.noarch.rpm
error: Failed dependencies:
perl(DBD::mysql) is needed by mha4mysql-node-0.56-0.el6.noarch
perl(DBI) is needed by mha4mysql-node-0.56-0.el6.noarch
yum install perl-DBD-MySQL
Total 1.1 MB/s | 839 kB
00:00
Running rpm_check_debug
Running Transaction Test
Transaction Test Succeeded
Running Transaction
Installing : perl-DBI-1.609-4.el6.x86_64
1/2
Installing : perl-DBD-MySQL-4.013-3.el6.x86_64
2/2
Verifying : perl-DBD-MySQL-4.013-3.el6.x86_64
1/2
Verifying : perl-DBI-1.609-4.el6.x86_64
rpm -ivh mha4mysql-node-0.56-0.el6.noarch.rpm
--------------------------------------------------------------------------------------------
安裝MHA管理器:
[root@monitor ~]# rpm -ivh mha4mysql-manager-0.56-0.el6.noarch.rpm
error: Failed dependencies:
perl(Config::Tiny) is needed by mha4mysql-manager-0.56-0.el6.noarch
perl(Log::Dispatch) is needed by mha4mysql-manager-0.56-0.el6.noarch
perl(Log::Dispatch::File) is needed by mha4mysql-manager-0.56-0.el6.noarch
perl(Log::Dispatch::Screen) is needed by mha4mysql-manager-0.56-0.el6.noarch
perl(Parallel::ForkManager) is needed by mha4mysql-manager-0.56-0.el6.noarch
perl(Time::HiRes) is needed by mha4mysql-manager-0.56-0.el6.noarch
yum install perl-Config-Tiny
yum install perl-Log-Dispatch
yum install perl-Parallel-ForkManager
yum install perl-Time-HiRes
rpm -ivh mha4mysql-manager-0.56-0.el6.noarch.rpm
----------------------------------------------------------------------------------------------
安裝位置
[root@server1 ~]# rpm -ql mha4mysql-node-0.56-0.el6.noarch
/usr/bin/apply_diff_relay_logs
/usr/bin/filter_mysqlbinlog
/usr/bin/purge_relay_logs
/usr/bin/save_binary_logs
/usr/share/man/man1/apply_diff_relay_logs.1.gz
/usr/share/man/man1/filter_mysqlbinlog.1.gz
/usr/share/man/man1/purge_relay_logs.1.gz
/usr/share/man/man1/save_binary_logs.1.gz
/usr/share/perl5/vendor_perl/MHA/BinlogHeaderParser.pm
/usr/share/perl5/vendor_perl/MHA/BinlogManager.pm
/usr/share/perl5/vendor_perl/MHA/BinlogPosFindManager.pm
/usr/share/perl5/vendor_perl/MHA/BinlogPosFinder.pm
/usr/share/perl5/vendor_perl/MHA/BinlogPosFinderElp.pm
/usr/share/perl5/vendor_perl/MHA/BinlogPosFinderXid.pm
/usr/share/perl5/vendor_perl/MHA/NodeConst.pm
/usr/share/perl5/vendor_perl/MHA/NodeUtil.pm
/usr/share/perl5/vendor_perl/MHA/SlaveUtil.pm
文件作用: apply_diff_relay_logs: APPLY_DIFF_RELAY_LOGS(User Contributed Perl DocumentatAPPLY_DIFF_RELAY_LOGS(1) NAME apply_diff_relay_logs - Generating differential relay logs between the latest slave and target slave, and applying all binlog/relay log files. This command is automatically executed from MHA Manager on failover, and manual execution should not be needed normally. SYNOPSIS # For checking apply_diff_relay_logs --command=test --target_version=5.1.56--relay_log_info=s --slave_user=s --slave_host=s --slave_ip=s --slave_port=i --workdir=s # For generating differential log events apply_diff_relay_logs --command=generate_and_send --target_version=5.1.56 --scp_user=s --scp_host=s --latest_mlf=s --target_mlf=s --target_rmlp=i --relay_log_info=s --server_id=i --diff_file_read- tolatest=s --target_version=s --workdir=s --timestamp=s # For applying log files apply_diff_relay_logs --command=apply --target_version=5.1.56 --slave_user=s --slave_host=s --slave_ip=s --slave_port=i --apply_files=file1,file2.. --workdir=s --timestamp=s --slave_pass=xxx perl v5.8.8 2014-03-31 APPLY_DIFF_RELAY_LOGS(1) filter_mysqlbinlog : FILTER_MYSQLBINLOG(1) User Contributed Perl DocumentationFILTER_MYSQLBINLOG(1) NAME filter_mysqlbinlog - Trimming ROLLBACK statements and equivalent BINLOG events added by mysqlbinlog. This script is now obsolete. SYNOPSIS mysqlbinlog binary_or_relay_log_file 鈹?filter_mysqlbinlog Note that this script is now obsolete and not used by MHA by default. DESCRIPTION mysqlbinlog command provided by Oracle implicitly adds ROLLBACK statements and equivalent BINLOG events. But this causes problems when recovering slave servers. To recover slaves, MHA might need to apply the following binlog events. 1) Relay log events from Relay_Log_Pos to the end of the relay log file 2) Differential relay log events from the latest slave 3) Differential binary log events from the dead master mysqlbinlog command needs to be executed on these files separately. If a transaction does not end by 1) or 2), implicit ROLLBACK event rolls back the transaction, which will result in inconsistency. filter_mysqlbinlog is a tool to fix this issue. Note that ROLL- BACK statements themselves are added in usual situations. For example, when you execute 1. BEGIN; 2. Updating transactional tables 3. Updating non-transactional tables 4. ROLLBACK, a ROLLBACK statement is written to the binary log to roll- back transactional queries. This is normal situation so filter_mysqlbinlog must not remove all ROLLBACK events. Note that this script is now obsolete and not used by MHA by default. purge_relay_logs: PURGE_RELAY_LOGS(1) User Contributed Perl Documentation PURGE_RELAY_LOGS(1) NAME purge_relay_logs - Deleting relay logs without blocking SQL threads SYNOPSIS purge_relay_logs --user=root --password=rootpass --host=127.0.0.1 save_binary_logs: SAVE_BINARY_LOGS(1) User Contributed Perl Documentation SAVE_BINARY_LOGS(1) NAME save_binary_logs - Concatenating binary or relay logs from the specified file/position to the end of the log. This command is automatically executed from MHA Manager on failover, and manual execution should not be needed normally. SYNOPSIS # Test $ save_binary_logs --command=test --binlog_dir=/var/lib/mysql --start_file=mysqld-bin.000002 # Saving binary logs $ save_binary_logs --command=save --binlog_dir=/var/lib/mysql --start_file=mysqld-bin.000002 --start_pos=312 --output_file=/var/tmp/aggre- gate.binlog # Saving relay logs $ save_binary_logs --command=save --start_file=mysqld-relay-bin.000002 --start_pos=312 --relay_log_info=/var/lib/mysql/relay-log.info --out- put_file=/var/tmp/aggregate.binlog save_binary_logs concatenates binary or relay logs from the specified log file/position to the end of the log. This tool is intended to be invoked from the master failover script(MHA Manager), and manual execution is normally not needed. DESCRIPTION Suppose that master is crashed and the latest slave server has received binary logs up to mysqld-bin.000002:312. It is likely that master has more binary logs. If it is not sent to the slave, slaves will lose all binlogs from mysqld-bin.000002:312. The purpose of the save_binary_logs is to save binary logs that are not replicated to slaves. If master is reachable through SSH and binary logs are readable, saving binary logs is possible. Here is an example: $ save_binary_logs --command=save --start_file=mysqld-bin.000002 --start_pos=312 --output_file=/var/tmp/aggregate.binlog Then all binary logs starting from mysqld-bin.000002:312 are concatenated and stored into /var/tmp/aggregate.binlog. If you have binary logs up to mysqld-bin.000004, the following mysqlbinlog outputs are written. mysqld-bin.000002:Format Description Event(FDE), plus from 312 to the tail mysqld-bin.000003:from 0 to the tail, excluding FDE mysqld-bin.000004:from 0 to the tail, excluding FDE
[root@monitor ~]# rpm -ql mha4mysql-manager-0.56-0.el6.noarch
/usr/bin/masterha_check_repl
/usr/bin/masterha_check_ssh
/usr/bin/masterha_check_status
/usr/bin/masterha_conf_host
/usr/bin/masterha_manager
/usr/bin/masterha_master_monitor
/usr/bin/masterha_master_switch
/usr/bin/masterha_secondary_check
/usr/bin/masterha_stop
/usr/share/man/man1/masterha_check_repl.1.gz
/usr/share/man/man1/masterha_check_ssh.1.gz
/usr/share/man/man1/masterha_check_status.1.gz
/usr/share/man/man1/masterha_conf_host.1.gz
/usr/share/man/man1/masterha_manager.1.gz
/usr/share/man/man1/masterha_master_monitor.1.gz
/usr/share/man/man1/masterha_master_switch.1.gz
/usr/share/man/man1/masterha_secondary_check.1.gz
/usr/share/man/man1/masterha_stop.1.gz
/usr/share/perl5/vendor_perl/MHA/Config.pm
/usr/share/perl5/vendor_perl/MHA/DBHelper.pm
/usr/share/perl5/vendor_perl/MHA/FileStatus.pm
/usr/share/perl5/vendor_perl/MHA/HealthCheck.pm
/usr/share/perl5/vendor_perl/MHA/ManagerAdmin.pm
/usr/share/perl5/vendor_perl/MHA/ManagerAdminWrapper.pm
/usr/share/perl5/vendor_perl/MHA/ManagerConst.pm
/usr/share/perl5/vendor_perl/MHA/ManagerUtil.pm
/usr/share/perl5/vendor_perl/MHA/MasterFailover.pm
/usr/share/perl5/vendor_perl/MHA/MasterMonitor.pm
/usr/share/perl5/vendor_perl/MHA/MasterRotate.pm
/usr/share/perl5/vendor_perl/MHA/SSHCheck.pm
/usr/share/perl5/vendor_perl/MHA/Server.pm
/usr/share/perl5/vendor_perl/MHA/ServerManager.pm
masterha_check_repl: MASTERHA_CHECK_REPL(1)User Contributed Perl DocumentatioMASTERHA_CHECK_REPL(1) NAME masterha_check_repl - Checking MySQL replication health SYNOPSIS masterha_check_repl --conf=/usr/local/masterha/conf/app1.cnf See online reference (http://code.google.com/p/mysql-master-ha/wiki/mas- terha_check_repl) for details. masterha_check_ssh: MASTERHA_CHECK_SSH(1) User Contributed Perl DocumentationMASTERHA_CHECK_SSH(1) NAME masterha_check_ssh - Checking SSH connections SYNOPSIS masterha_check_ssh --global_conf=/etc/masterha_default.cnf --conf=/etc/conf/mas-terha/app1.cnf masterha_secondary_check: masterha_secondary_check - Checking master availability from additional network routes SYNOPSIS masterha_secondary_check -s secondary_host1 -s secondary_host2 ..--user=ssh_username --master_host=host --master_ip=ip --master_port=port masterha_check_status: MASTERHA_CHECK_STATUS(User Contributed Perl DocumentatMASTERHA_CHECK_STATUS(1) NAME masterha_check_status - Returns target MySQL master status monitored by MHA Man-ager SYNOPSIS masterha_check_status --conf=/usr/local/masterha/conf/app1.cnf masterha_conf_host: MASTERHA_CONF_HOST(1) User Contributed Perl DocumentationMASTERHA_CONF_HOST(1) NAME masterha_conf_host - Adding new host entry to, or removing existing host entry from a config file SYNOPSIS masterha_conf_host --command=add --conf=/etc/conf/masterha/app1.cnf --host-name=db101 The following lines will be added to the conf file. [server_db101] hostname=db101 masterha_conf_host --command=add --conf=/etc/conf/masterha/app1.cnf --host-name=db101 --block=100 --params="no_master=1;ignore_fail=1"
The following lines will be added to the conf file. [server_100] hostname=db101 no_master=1 ignore_fail=1 masterha_conf_host --command=delete --conf=/etc/conf/masterha/app1.cnf--block=server100 Then entire block [server100] will be removed.
masterha_manager: MASTERHA_MANAGER(1) User Contributed Perl Documentation MASTERHA_MANAGER(1) NAME masterha_manager - Monitoring MySQL master server availability and do failover if it detects master failure SYNOPSIS masterha_manager --global_conf=/etc/masterha_default.cnf --conf=/usr/local/mas-terha/conf/app1.cnf
masterha_master_monitor: MASTERHA_MASTER_MONITOUser Contributed Perl DocumentMASTERHA_MASTER_MONITOR(1) NAME masterha_master_monitor - Monitoring MySQL master server availability SYNOPSIS masterha_master_monitor --global_conf=/etc/masterha_default.cnf --conf=/usr/local/masterha/app1.cnf DESCRIPTION masterha_master_monitor is intended to be invoked from masterha_manager.
masterha_master_switch: MASTERHA_MASTER_SWITCHUser Contributed Perl DocumentaMASTERHA_MASTER_SWITCH(1) NAME masterha_master_switch - Switching MySQL master server to one of other slaveservers SYNOPSIS # For master failover
masterha_master_switch --master_state=dead --global_conf=/etc/mas-terha_default.cnf --conf=/usr/local/masterha/conf/app1.cnf --dead_mas-ter_host=host1 # For online master switch masterha_master_switch --master_state=alive --global_conf=/etc/mas-terha_default.cnf --conf=/usr/local/masterha/conf/app1.cnf
masterha_stop: MASTERHA_STOP(1) User Contributed Perl Documentation MASTERHA_STOP(1) NAME masterha_stop - Stopping MHA Manager process SYNOPSIS masterha_stop --conf=/usr/local/masterha/conf/app1.cnf See online reference (http://code.google.com/p/mysql-master-ha/wiki/mas- terha_stop) for details.
文件說明:
monitor工具包
masterha_check_ssh 檢查MHA的SSH配置狀況 masterha_check_repl 檢查MySQL復制狀況 masterha_manger 啟動MHA masterha_check_status 檢測當前MHA運行狀態 masterha_master_monitor 檢測master是否宕機 masterha_master_switch 控制故障轉移(自動或者手動) masterha_conf_host 添加或刪除配置的server信息
Node工具包(這些工具通常由MHA Manager的腳本觸發,無需人為操作)主要包括以下幾個工具:
save_binary_logs 保存和復制master的二進制日志 apply_diff_relay_logs 識別差異的中繼日志事件並將其差異的事件應用於其他的slave filter_mysqlbinlog 去除不必要的ROLLBACK事件(MHA已不再使用這個工具) purge_relay_logs 清除中繼日志(不會阻塞SQL線程)
內網IP:eth0
角色 ip地址 主機名 server_id 類型 Monitor host 10.169.216.172 monitor - 監控復制組 Master 10.24.220.232 master 1 寫入 Candicate master 10.24.220.70 slave1 2 讀 Slave 10.169.214.33 slave2 3 讀
監控主機:10.169.216.172
解壓包不安裝:
cd /root
rpm2cpio mha4mysql-manager-0.56-0.el6.noarch.rpm | cpio -div 無samples/scripts
tar -zxvf mha4mysql-manager-0.56.tar.gz
實例腳本:
[root@monitor scripts]# pwd
/root/mha4mysql-manager-0.56/samples/scripts
[root@monitor scripts]# ls
master_ip_failover
master_ip_online_change
power_manager
send_report
腳本作用:
master_ip_failover
#自動切換時vip管理的腳本,
#不是必須如果我們使用keepalived的,我們可以自己編寫腳本完成對vip的管理,比喻監控MYSQL,如果mysql異常,
#我們停止keepalived就行,這樣vip就會自動漂移
master_ip_online_change
#在線切換時vip的管理,不是必須,同樣可以可以自行編寫簡單的shell完成
power_manager
#故障發生后關閉主機的腳本,不是必須
send_report
#因故障切換后發送報警的腳本,不是必須,可自行編寫簡單的shell完成。
[root@monitor scripts]#cp * /usr/local/bin/
實例配制:
[root@monitor conf]# pwd
/root/mha4mysql-manager-0.56/samples/conf
[root@monitor conf]# ls
app1.cnf masterha_default.cnf
mkdir -p /etc/masterha
cp /root/mha4mysql-manager-0.56/samples/scripts/app1.cnf /etc/masterha/
[root@monitor local]# cd /etc/masterha [root@monitor masterha]# ll total 4 -rw-r--r-- 1 root root 2697 May 8 18:28 app1.conf [root@monitor masterha]# cat app1.conf [server default] # 這兩個參數需要根據不同的集群進行修改 manager_workdir=/var/log/masterha/app1 manager_log=/var/log/masterha/app1/manager.log # 按照master服務器存放binlog的實際路徑進行修改,主要為了讓MHA拉取binlog master_binlog_dir=/data/mysql-bing-log/bin.log //每台機子都這樣設 # 設置自動failover的腳本 master_ip_failover_script= /usr/bin/master_ip_failover # 設置手動切換時候的腳本 (供(masterha_master_switch使用) master_ip_online_change_script= /usr/bin/master_ip_online_change log_level=debug # 監控的用戶 user=root //master slave1 slave2 都要 給 monitor訪問 # 監控用戶的密碼 password=123 # 監控主庫的時間間隔,默認是3秒,嘗試三次沒有回應的時候自動進行railover ping_interval=3 # 檢測方式是insert,MHA-0.56開始支持insert # 會在Master中生成一個 infra 數據庫 ping_type=INSERT # 設置遠端mysql在發生切換時binlog的保存位置 remote_workdir=/tmp # 復制用的密碼 repl_password=123 # 復制的用戶 repl_user=rpl # 告警腳本,可自行修改,這里沒有使用 #report_script=/usr/local/send_report # 通過從機進行二次探測的腳本, IP地址按照實際的情況進行修改 secondary_check_script=/usr/local/bin/masterha_secondary_check -s 10.169.214.33 -s 10.24.220.70 --user=root --master_host=10.24.220.232 --master_port=3306 # 設置故障發生后關閉故障主機的腳本(主要作用是關閉主機防止發生腦裂,這里沒有使用,類似Fence功能) #shutdown_script="/usr/local/bin/power_manager --command=stopssh2 --host=test-1 --ssh_user=root" # 定義ssh的用戶 ssh_user=root [server1] # 這個hostname也可以配置成IP地址,同 ip 參數一樣 # 如果這里寫名字,需要DNS配合,或者使用 /etc/hosts hostname=server1 ip=10.24.220.232 port=3306 # candidate_master參數的意思為:設置為候選Master,如果發生主從切換,該主機會被提升為Master,即使這個服務器上的數據不是最新的(會用relay-log補全) candidate_master=1 [server2] hostname=Slave1 ip=10.24.220.70 port=3306 candidate_master=1 # check_repl_delay參數的意思為:默認情況下如果一個slave落后master 100M的relay logs的話,MHA將不會選擇該slave作為一個新的master; # 因為對於這個slave的恢復需要花費很長時間; # 通過設置check_repl_delay=0,MHA觸發切換在選擇一個新的master的時候將會忽略復制延時; # 這個參數對於設置了candidate_master=1的主機非常有用,因為這個候選主在切換的過程中一定是新的master check_repl_delay=0 [server3] hostname=Slave2 ip=10.169.214.33 port=3306 # no_master 表示該主機不會被提升為Master no_master=1
1.sever1 slave1 slave2 monitor中執行 cd /root ssh-keygen -t rsa
2 分發公鑰
1.monitor ssh-copy-id -i ~/.ssh/id_rsa.pub root@10.24.220.232
ssh-copy-id -i ~/.ssh/id_rsa.pub root@10.24.220.70
ssh-copy-id -i ~/.ssh/id-rsa.pub root@10.169.214.33
2.server1
ssh-copy-id -i ~/.ssh/id_rsa.pub root@10.24.220.70
ssh-copy-id -i ~/.ssh/id-rsa.pub root@10.169.214.33
3.slave1
ssh-copy-id -i ~/.ssh/id_rsa.pub root@10.24.220.232
ssh-copy-id -i ~/.ssh/id-rsa.pub root@10.169.214.33
4.slave2
ssh-copy-id -i ~/.ssh/id_rsa.pub root@10.24.220.232
ssh-copy-id -i ~/.ssh/id_rsa.pub root@10.24.220.70
MHA 執行命令mysqlbinlog mysql. 創建軟連接在 /usr/bin
master slave1 slave2都要執行
ln -s /usr/local/mysql/bin/mysqlbinlog /usr/bin/mysqlbinlog
ln -s /usr/local/mysql/bin/mysql /usr/bin/mysql
SSH互通檢測:
[root@monitor masterha]# masterha_check_ssh --conf=/etc/masterha/app1.conf Mon May 9 10:20:44 2016 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping. Mon May 9 10:20:44 2016 - [info] Reading application default configuration from /etc/masterha/app1.conf.. Mon May 9 10:20:44 2016 - [info] Reading server configuration from /etc/masterha/app1.conf.. Mon May 9 10:20:44 2016 - [info] Starting SSH connection tests.. Mon May 9 10:20:45 2016 - [debug] Mon May 9 10:20:44 2016 - [debug] Connecting via SSH from root@server1(10.24.220.232:22) to root@Slave1(10.24.220.70:22).. Mon May 9 10:20:45 2016 - [debug] ok. Mon May 9 10:20:45 2016 - [debug] Connecting via SSH from root@server1(10.24.220.232:22) to root@Slave2(10.169.214.33:22).. Mon May 9 10:20:45 2016 - [debug] ok. Mon May 9 10:20:45 2016 - [debug] Mon May 9 10:20:45 2016 - [debug] Connecting via SSH from root@Slave1(10.24.220.70:22) to root@server1(10.24.220.232:22).. Mon May 9 10:20:45 2016 - [debug] ok. Mon May 9 10:20:45 2016 - [debug] Connecting via SSH from root@Slave1(10.24.220.70:22) to root@Slave2(10.169.214.33:22).. Mon May 9 10:20:45 2016 - [debug] ok. Mon May 9 10:20:46 2016 - [debug] Mon May 9 10:20:45 2016 - [debug] Connecting via SSH from root@Slave2(10.169.214.33:22) to root@server1(10.24.220.232:22).. Mon May 9 10:20:46 2016 - [debug] ok. Mon May 9 10:20:46 2016 - [debug] Connecting via SSH from root@Slave2(10.169.214.33:22) to root@Slave1(10.24.220.70:22).. Mon May 9 10:20:46 2016 - [debug] ok. Mon May 9 10:20:46 2016 - [info] All SSH connection tests passed successfully.
MYSQL 集群中每台機需要兩種賬號
1.復制帳號
2.monitor帳號(root)
[root@monitor ~]# masterha_check_repl --conf=/etc/masterha/app1.conf Tue May 10 22:09:15 2016 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping. Tue May 10 22:09:15 2016 - [info] Reading application default configuration from /etc/masterha/app1.conf.. Tue May 10 22:09:15 2016 - [info] Reading server configuration from /etc/masterha/app1.conf.. Tue May 10 22:09:15 2016 - [info] MHA::MasterMonitor version 0.56. Tue May 10 22:09:15 2016 - [debug] Connecting to servers.. Tue May 10 22:09:15 2016 - [debug] Connected to: server1(10.24.220.232:3306), user=monitor Tue May 10 22:09:16 2016 - [debug] Number of slave worker threads on host server1(10.24.220.232:3306): 0 Tue May 10 22:09:16 2016 - [debug] Connected to: Slave1(10.24.220.70:3306), user=monitor Tue May 10 22:09:16 2016 - [debug] Number of slave worker threads on host Slave1(10.24.220.70:3306): 0 Tue May 10 22:09:16 2016 - [debug] Connected to: Slave2(10.169.214.33:3306), user=monitor Tue May 10 22:09:16 2016 - [debug] Number of slave worker threads on host Slave2(10.169.214.33:3306): 0 Tue May 10 22:09:16 2016 - [debug] Comparing MySQL versions.. Tue May 10 22:09:16 2016 - [debug] Comparing MySQL versions done. Tue May 10 22:09:16 2016 - [debug] Connecting to servers done. Tue May 10 22:09:16 2016 - [info] GTID failover mode = 1 Tue May 10 22:09:16 2016 - [info] Dead Servers: Tue May 10 22:09:16 2016 - [info] Alive Servers: Tue May 10 22:09:16 2016 - [info] server1(10.24.220.232:3306) Tue May 10 22:09:16 2016 - [info] Slave1(10.24.220.70:3306) Tue May 10 22:09:16 2016 - [info] Slave2(10.169.214.33:3306) Tue May 10 22:09:16 2016 - [info] Alive Slaves: Tue May 10 22:09:16 2016 - [info] Slave1(10.24.220.70:3306) Version=5.7.11-log (oldest major version between slaves) log-bin:enabled Tue May 10 22:09:16 2016 - [info] GTID ON Tue May 10 22:09:16 2016 - [debug] Relay log info repository: FILE Tue May 10 22:09:16 2016 - [info] Replicating from 10.24.220.232(10.24.220.232:3306) Tue May 10 22:09:16 2016 - [info] Primary candidate for the new Master (candidate_master is set) Tue May 10 22:09:16 2016 - [info] Slave2(10.169.214.33:3306) Version=5.7.11-log (oldest major version between slaves) log-bin:enabled Tue May 10 22:09:16 2016 - [info] GTID ON Tue May 10 22:09:16 2016 - [debug] Relay log info repository: FILE Tue May 10 22:09:16 2016 - [info] Replicating from 10.24.220.232(10.24.220.232:3306) Tue May 10 22:09:16 2016 - [info] Not candidate for the new Master (no_master is set) Tue May 10 22:09:16 2016 - [info] Current Alive Master: server1(10.24.220.232:3306) Tue May 10 22:09:16 2016 - [info] Checking slave configurations.. Tue May 10 22:09:16 2016 - [info] Checking replication filtering settings.. Tue May 10 22:09:16 2016 - [info] binlog_do_db= , binlog_ignore_db= Tue May 10 22:09:16 2016 - [info] Replication filtering check ok. Tue May 10 22:09:16 2016 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking. Tue May 10 22:09:16 2016 - [info] Checking SSH publickey authentication settings on the current master.. Tue May 10 22:09:16 2016 - [debug] SSH connection test to server1, option -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o BatchMode=yes -o ConnectTimeout=5, timeout 5 Tue May 10 22:09:16 2016 - [info] HealthCheck: SSH to server1 is reachable. Tue May 10 22:09:16 2016 - [info] server1(10.24.220.232:3306) (current master) +--Slave1(10.24.220.70:3306) +--Slave2(10.169.214.33:3306) Tue May 10 22:09:16 2016 - [info] Checking replication health on Slave1.. Tue May 10 22:09:16 2016 - [info] ok. Tue May 10 22:09:16 2016 - [info] Checking replication health on Slave2.. Tue May 10 22:09:16 2016 - [info] ok. Tue May 10 22:09:16 2016 - [warning] master_ip_failover_script is not defined. Tue May 10 22:09:16 2016 - [warning] shutdown_script is not defined. Tue May 10 22:09:16 2016 - [debug] Disconnected from server1(10.24.220.232:3306) Tue May 10 22:09:16 2016 - [debug] Disconnected from Slave1(10.24.220.70:3306) Tue May 10 22:09:16 2016 - [debug] Disconnected from Slave2(10.169.214.33:3306) Tue May 10 22:09:16 2016 - [info] Got exit code 0 (Not master dead).
MySQL Replication Health is OK.
[root@monitor ~]# masterha_check_repl --conf=/etc/masterha/app1.conf Tue May 10 22:30:01 2016 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping. Tue May 10 22:30:01 2016 - [info] Reading application default configuration from /etc/masterha/app1.conf.. Tue May 10 22:30:01 2016 - [info] Reading server configuration from /etc/masterha/app1.conf.. Tue May 10 22:30:01 2016 - [info] MHA::MasterMonitor version 0.56. Tue May 10 22:30:01 2016 - [debug] Connecting to servers.. Tue May 10 22:30:01 2016 - [debug] Connected to: server1(10.24.220.232:3306), user=monitor Tue May 10 22:30:01 2016 - [debug] Number of slave worker threads on host server1(10.24.220.232:3306): 0 Tue May 10 22:30:01 2016 - [debug] Connected to: Slave1(10.24.220.70:3306), user=monitor Tue May 10 22:30:01 2016 - [debug] Number of slave worker threads on host Slave1(10.24.220.70:3306): 0 Tue May 10 22:30:01 2016 - [debug] Connected to: Slave2(10.169.214.33:3306), user=monitor Tue May 10 22:30:01 2016 - [debug] Number of slave worker threads on host Slave2(10.169.214.33:3306): 0 Tue May 10 22:30:01 2016 - [debug] Comparing MySQL versions.. Tue May 10 22:30:01 2016 - [debug] Comparing MySQL versions done. Tue May 10 22:30:01 2016 - [debug] Connecting to servers done. Tue May 10 22:30:01 2016 - [info] GTID failover mode = 1 Tue May 10 22:30:01 2016 - [info] Dead Servers: Tue May 10 22:30:01 2016 - [info] Alive Servers: Tue May 10 22:30:01 2016 - [info] server1(10.24.220.232:3306) Tue May 10 22:30:01 2016 - [info] Slave1(10.24.220.70:3306) Tue May 10 22:30:01 2016 - [info] Slave2(10.169.214.33:3306) Tue May 10 22:30:01 2016 - [info] Alive Slaves: Tue May 10 22:30:01 2016 - [info] Slave1(10.24.220.70:3306) Version=5.7.11-log (oldest major version between slaves) log-bin:enabled Tue May 10 22:30:01 2016 - [info] GTID ON Tue May 10 22:30:01 2016 - [debug] Relay log info repository: FILE Tue May 10 22:30:01 2016 - [info] Replicating from 10.24.220.232(10.24.220.232:3306) Tue May 10 22:30:01 2016 - [info] Primary candidate for the new Master (candidate_master is set) Tue May 10 22:30:01 2016 - [info] Slave2(10.169.214.33:3306) Version=5.7.11-log (oldest major version between slaves) log-bin:enabled Tue May 10 22:30:01 2016 - [info] GTID ON Tue May 10 22:30:01 2016 - [debug] Relay log info repository: FILE Tue May 10 22:30:01 2016 - [info] Replicating from 10.24.220.232(10.24.220.232:3306) Tue May 10 22:30:01 2016 - [info] Not candidate for the new Master (no_master is set) Tue May 10 22:30:01 2016 - [info] Current Alive Master: server1(10.24.220.232:3306) Tue May 10 22:30:01 2016 - [info] Checking slave configurations.. Tue May 10 22:30:01 2016 - [info] Checking replication filtering settings.. Tue May 10 22:30:01 2016 - [info] binlog_do_db= , binlog_ignore_db= Tue May 10 22:30:01 2016 - [info] Replication filtering check ok. Tue May 10 22:30:01 2016 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking. Tue May 10 22:30:01 2016 - [info] Checking SSH publickey authentication settings on the current master.. Tue May 10 22:30:01 2016 - [debug] SSH connection test to server1, option -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o BatchMode=yes -o ConnectTimeout=5, timeout 5 Tue May 10 22:30:01 2016 - [info] HealthCheck: SSH to server1 is reachable. Tue May 10 22:30:01 2016 - [info] server1(10.24.220.232:3306) (current master) +--Slave1(10.24.220.70:3306) +--Slave2(10.169.214.33:3306) Tue May 10 22:30:01 2016 - [info] Checking replication health on Slave1.. Tue May 10 22:30:01 2016 - [info] ok. Tue May 10 22:30:01 2016 - [info] Checking replication health on Slave2.. Tue May 10 22:30:01 2016 - [info] ok. Tue May 10 22:30:01 2016 - [info] Checking master_ip_failover_script status: Tue May 10 22:30:01 2016 - [info] /usr/local/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host=server1 --orig_master_ip=10.24.220.232 --orig_master_port=3306 IN SCRIPT TEST====/sbin/ifconfig eth0:88 down==/sbin/ifconfig eth0:88 10.169.214.100/24=== Checking the Status of the script.. OK Tue May 10 22:30:01 2016 - [info] OK. Tue May 10 22:30:01 2016 - [warning] shutdown_script is not defined. Tue May 10 22:30:01 2016 - [debug] Disconnected from server1(10.24.220.232:3306) Tue May 10 22:30:01 2016 - [debug] Disconnected from Slave1(10.24.220.70:3306) Tue May 10 22:30:01 2016 - [debug] Disconnected from Slave2(10.169.214.33:3306) Tue May 10 22:30:01 2016 - [info] Got exit code 0 (Not master dead). MySQL Replication Health is OK.
[root@monitor ~]# nohup masterha_manager --conf=/etc/masterha/app1.conf --remove_dead_master_conf --ignore_last_failover < /dev/null > /var/log/masterha/app1/manager.log 2>&1 & [3] 6485 [root@monitor ~]# masterha_check_status --conf=/etc/masterha/app1.conf app1 (pid:6485) is running(0:PING_OK), master:server1
自動Failover測試
server mysql.server stop
[root@monitor app1]# cat manager.log 0 Sat May 14 21:26:34 2016 - [warning] Got error on MySQL select ping: 2006 (MySQL server has gone away) Sat May 14 21:26:34 2016 - [info] Executing SSH check script: exit 0 Sat May 14 21:26:34 2016 - [debug] SSH connection test to server1, option -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o BatSat May 14 21:26:43 2016 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping. Sat May 14 21:26:43 2016 - [info] Reading application default configuration from /etc/masterha/app1.conf.. Sat May 14 21:26:43 2016 - [info] Reading server configuration from /etc/masterha/app1.conf.. aster_port=3306 --master_user=root --master_password=971448976 --ping_type=SELECT Sat May 14 21:26:35 2016 - [info] HealthCheck: SSH to server1 is reachable. Monitoring server 10.169.214.33 is reachable, Master is not reachable from 10.169.214.33. OK. Monitoring server 10.24.220.70 is reachable, Master is not reachable from 10.24.220.70. OK. Sat May 14 21:26:36 2016 - [info] Master is not reachable from all other monitoring servers. Failover should start. Sat May 14 21:26:37 2016 - [warning] Got error on MySQL connect: 2013 (Lost connection to MySQL server at 'reading initial communication packet', system error: 111) Sat May 14 21:26:37 2016 - [warning] Connection failed 2 time(s).. Sat May 14 21:26:40 2016 - [warning] Got error on MySQL connect: 2013 (Lost connection to MySQL server at 'reading initial communication packet', system error: 111) Sat May 14 21:26:40 2016 - [warning] Connection failed 3 time(s).. Sat May 14 21:26:43 2016 - [warning] Got error on MySQL connect: 2013 (Lost connection to MySQL server at 'reading initial communication packet', system error: 111) Sat May 14 21:26:43 2016 - [warning] Connection failed 4 time(s).. Sat May 14 21:26:43 2016 - [warning] Master is not reachable from health checker! Sat May 14 21:26:43 2016 - [warning] Master server1(10.24.220.232:3306) is not reachable! Sat May 14 21:26:43 2016 - [warning] SSH is reachable. Sat May 14 21:26:43 2016 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha_default.cnf and /etc/masterha/app1.conf again, and trying to connect to all servers to check server status.. Sat May 14 21:26:43 2016 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping. Sat May 14 21:26:43 2016 - [info] Reading application default configuration from /etc/masterha/app1.conf.. Sat May 14 21:26:43 2016 - [info] Reading server configuration from /etc/masterha/app1.conf.. Sat May 14 21:26:43 2016 - [debug] Skipping connecting to dead master server1(10.24.220.232:3306). Sat May 14 21:26:43 2016 - [debug] Connecting to servers.. Sat May 14 21:26:43 2016 - [debug] Connected to: Slave1(10.24.220.70:3306), user=root Sat May 14 21:26:43 2016 - [debug] Number of slave worker threads on host Slave1(10.24.220.70:3306): 0 Sat May 14 21:26:43 2016 - [debug] Connected to: Slave2(10.169.214.33:3306), user=root Sat May 14 21:26:43 2016 - [debug] Number of slave worker threads on host Slave2(10.169.214.33:3306): 0 Sat May 14 21:26:43 2016 - [debug] Comparing MySQL versions.. Sat May 14 21:26:43 2016 - [debug] Comparing MySQL versions done. Sat May 14 21:26:43 2016 - [debug] Connecting to servers done. Sat May 14 21:26:43 2016 - [info] GTID failover mode = 1 Sat May 14 21:26:43 2016 - [info] Dead Servers: Sat May 14 21:26:43 2016 - [info] server1(10.24.220.232:3306) Sat May 14 21:26:43 2016 - [info] Alive Servers: Sat May 14 21:26:43 2016 - [info] Slave1(10.24.220.70:3306) Sat May 14 21:26:43 2016 - [info] Slave2(10.169.214.33:3306) Sat May 14 21:26:43 2016 - [info] Alive Slaves: Sat May 14 21:26:43 2016 - [info] Slave1(10.24.220.70:3306) Version=5.7.11-log (oldest major version between slaves) log-bin:enabled Sat May 14 21:26:43 2016 - [info] GTID ON Sat May 14 21:26:43 2016 - [debug] Relay log info repository: FILE Sat May 14 21:26:43 2016 - [info] Replicating from 10.24.220.232(10.24.220.232:3306) Sat May 14 21:26:43 2016 - [info] Primary candidate for the new Master (candidate_master is set) Sat May 14 21:26:43 2016 - [info] Slave2(10.169.214.33:3306) Version=5.7.11-log (oldest major version between slaves) log-bin:enabled Sat May 14 21:26:43 2016 - [info] GTID ON Sat May 14 21:26:43 2016 - [debug] Relay log info repository: FILE Sat May 14 21:26:43 2016 - [info] Replicating from 10.24.220.232(10.24.220.232:3306) Sat May 14 21:26:43 2016 - [info] Not candidate for the new Master (no_master is set) Sat May 14 21:26:43 2016 - [info] Checking slave configurations.. Sat May 14 21:26:43 2016 - [info] Checking replication filtering settings.. Sat May 14 21:26:43 2016 - [info] Replication filtering check ok. Sat May 14 21:26:43 2016 - [info] Master is down! Sat May 14 21:26:43 2016 - [info] Terminating monitoring script. Sat May 14 21:26:43 2016 - [info] Got exit code 20 (Master dead). Sat May 14 21:26:43 2016 - [info] MHA::MasterFailover version 0.56. Sat May 14 21:26:43 2016 - [info] Starting master failover. Sat May 14 21:26:43 2016 - [info] Sat May 14 21:26:43 2016 - [info] * Phase 1: Configuration Check Phase.. Sat May 14 21:26:43 2016 - [info] Sat May 14 21:26:43 2016 - [debug] Skipping connecting to dead master server1. Sat May 14 21:26:43 2016 - [debug] Connecting to servers.. Sat May 14 21:26:43 2016 - [debug] Connected to: Slave1(10.24.220.70:3306), user=root Sat May 14 21:26:44 2016 - [debug] Number of slave worker threads on host Slave1(10.24.220.70:3306): 0 Sat May 14 21:26:44 2016 - [debug] Connected to: Slave2(10.169.214.33:3306), user=root Sat May 14 21:26:44 2016 - [debug] Number of slave worker threads on host Slave2(10.169.214.33:3306): 0 Sat May 14 21:26:44 2016 - [debug] Comparing MySQL versions.. Sat May 14 21:26:44 2016 - [debug] Comparing MySQL versions done. Sat May 14 21:26:44 2016 - [debug] Connecting to servers done. Sat May 14 21:26:44 2016 - [info] GTID failover mode = 1 Sat May 14 21:26:44 2016 - [info] Dead Servers: Sat May 14 21:26:44 2016 - [info] server1(10.24.220.232:3306) Sat May 14 21:26:44 2016 - [info] Checking master reachability via MySQL(double check)... Sat May 14 21:26:44 2016 - [info] ok. Sat May 14 21:26:44 2016 - [info] Alive Servers: Sat May 14 21:26:44 2016 - [info] Slave1(10.24.220.70:3306) Sat May 14 21:26:44 2016 - [info] Slave2(10.169.214.33:3306) Sat May 14 21:26:44 2016 - [info] Alive Slaves: Sat May 14 21:26:44 2016 - [info] Slave1(10.24.220.70:3306) Version=5.7.11-log (oldest major version between slaves) log-bin:enabled Sat May 14 21:26:44 2016 - [info] GTID ON Sat May 14 21:26:44 2016 - [debug] Relay log info repository: FILE Sat May 14 21:26:44 2016 - [info] Replicating from 10.24.220.232(10.24.220.232:3306) Sat May 14 21:26:44 2016 - [info] Primary candidate for the new Master (candidate_master is set) Sat May 14 21:26:44 2016 - [info] Slave2(10.169.214.33:3306) Version=5.7.11-log (oldest major version between slaves) log-bin:enabled Sat May 14 21:26:44 2016 - [info] GTID ON Sat May 14 21:26:44 2016 - [debug] Relay log info repository: FILE Sat May 14 21:26:44 2016 - [info] Replicating from 10.24.220.232(10.24.220.232:3306) Sat May 14 21:26:44 2016 - [info] Not candidate for the new Master (no_master is set) Sat May 14 21:26:44 2016 - [info] Starting GTID based failover. Sat May 14 21:26:44 2016 - [info] Sat May 14 21:26:44 2016 - [info] ** Phase 1: Configuration Check Phase completed. Sat May 14 21:26:44 2016 - [info] Sat May 14 21:26:44 2016 - [info] * Phase 2: Dead Master Shutdown Phase.. Sat May 14 21:26:44 2016 - [info] Sat May 14 21:26:44 2016 - [info] Forcing shutdown so that applications never connect to the current master.. Sat May 14 21:26:44 2016 - [info] Executing master IP deactivation script: Sat May 14 21:26:44 2016 - [info] /usr/local/bin/master_ip_failover --orig_master_host=server1 --orig_master_ip=10.24.220.232 --orig_master_port=3306 --command=stopssh --ssh_user=root Sat May 14 21:26:44 2016 - [debug] Stopping IO thread on Slave2(10.169.214.33:3306).. Sat May 14 21:26:44 2016 - [debug] Stopping IO thread on Slave1(10.24.220.70:3306).. Sat May 14 21:26:44 2016 - [debug] Stop IO thread on Slave1(10.24.220.70:3306) done. IN SCRIPT TEST====/sbin/ifconfig eth0:88 down==/sbin/ifconfig eth0:88 10.24.220.100/24=== Disabling the VIP on old master: server1 Sat May 14 21:26:44 2016 - [debug] Stop IO thread on Slave2(10.169.214.33:3306) done. SIOCSIFFLAGS: Cannot assign requested address Sat May 14 21:26:44 2016 - [info] done. Sat May 14 21:26:44 2016 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master. Sat May 14 21:26:44 2016 - [info] * Phase 2: Dead Master Shutdown Phase completed. Sat May 14 21:26:44 2016 - [info] Sat May 14 21:26:44 2016 - [info] * Phase 3: Master Recovery Phase.. Sat May 14 21:26:44 2016 - [info] Sat May 14 21:26:44 2016 - [info] * Phase 3.1: Getting Latest Slaves Phase.. Sat May 14 21:26:44 2016 - [info] Sat May 14 21:26:44 2016 - [debug] Fetching current slave status.. Sat May 14 21:26:44 2016 - [debug] Fetching current slave status done. Sat May 14 21:26:44 2016 - [info] The latest binary log file/position on all slaves is log.000003:1234 Sat May 14 21:26:44 2016 - [info] Retrieved Gtid Set: 191f7a9f-ffa2-11e5-a825-00163e00242a:1-5 Sat May 14 21:26:44 2016 - [info] Latest slaves (Slaves that received relay log files to the latest): Sat May 14 21:26:44 2016 - [info] Slave1(10.24.220.70:3306) Version=5.7.11-log (oldest major version between slaves) log-bin:enabled Sat May 14 21:26:44 2016 - [info] GTID ON Sat May 14 21:26:44 2016 - [debug] Relay log info repository: FILE Sat May 14 21:26:44 2016 - [info] Replicating from 10.24.220.232(10.24.220.232:3306) Sat May 14 21:26:44 2016 - [info] Primary candidate for the new Master (candidate_master is set) Sat May 14 21:26:44 2016 - [info] Slave2(10.169.214.33:3306) Version=5.7.11-log (oldest major version between slaves) log-bin:enabled Sat May 14 21:26:44 2016 - [info] GTID ON Sat May 14 21:26:44 2016 - [debug] Relay log info repository: FILE Sat May 14 21:26:44 2016 - [info] Replicating from 10.24.220.232(10.24.220.232:3306) Sat May 14 21:26:44 2016 - [info] Not candidate for the new Master (no_master is set) Sat May 14 21:26:44 2016 - [info] The oldest binary log file/position on all slaves is log.000003:1234 Sat May 14 21:26:44 2016 - [info] Retrieved Gtid Set: 191f7a9f-ffa2-11e5-a825-00163e00242a:1-5 Sat May 14 21:26:44 2016 - [info] Oldest slaves: Sat May 14 21:26:44 2016 - [info] Slave1(10.24.220.70:3306) Version=5.7.11-log (oldest major version between slaves) log-bin:enabled Sat May 14 21:26:44 2016 - [info] GTID ON Sat May 14 21:26:44 2016 - [debug] Relay log info repository: FILE Sat May 14 21:26:44 2016 - [info] Replicating from 10.24.220.232(10.24.220.232:3306) Sat May 14 21:26:44 2016 - [info] Primary candidate for the new Master (candidate_master is set) Sat May 14 21:26:44 2016 - [info] Slave2(10.169.214.33:3306) Version=5.7.11-log (oldest major version between slaves) log-bin:enabled Sat May 14 21:26:44 2016 - [info] GTID ON Sat May 14 21:26:44 2016 - [debug] Relay log info repository: FILE Sat May 14 21:26:44 2016 - [info] Replicating from 10.24.220.232(10.24.220.232:3306) Sat May 14 21:26:44 2016 - [info] Not candidate for the new Master (no_master is set) Sat May 14 21:26:44 2016 - [info] Sat May 14 21:26:44 2016 - [info] * Phase 3.3: Determining New Master Phase.. Sat May 14 21:26:44 2016 - [info] Sat May 14 21:26:44 2016 - [info] Searching new master from slaves.. Sat May 14 21:26:44 2016 - [info] Candidate masters from the configuration file: Sat May 14 21:26:44 2016 - [info] Slave1(10.24.220.70:3306) Version=5.7.11-log (oldest major version between slaves) log-bin:enabled Sat May 14 21:26:44 2016 - [info] GTID ON Sat May 14 21:26:44 2016 - [debug] Relay log info repository: FILE Sat May 14 21:26:44 2016 - [info] Replicating from 10.24.220.232(10.24.220.232:3306) Sat May 14 21:26:44 2016 - [info] Primary candidate for the new Master (candidate_master is set) Sat May 14 21:26:44 2016 - [info] Non-candidate masters: Sat May 14 21:26:44 2016 - [info] Slave2(10.169.214.33:3306) Version=5.7.11-log (oldest major version between slaves) log-bin:enabled Sat May 14 21:26:44 2016 - [info] GTID ON Sat May 14 21:26:44 2016 - [debug] Relay log info repository: FILE Sat May 14 21:26:44 2016 - [info] Replicating from 10.24.220.232(10.24.220.232:3306) Sat May 14 21:26:44 2016 - [info] Not candidate for the new Master (no_master is set) Sat May 14 21:26:44 2016 - [info] Searching from candidate_master slaves which have received the latest relay log events.. Sat May 14 21:26:44 2016 - [info] New master is Slave1(10.24.220.70:3306) Sat May 14 21:26:44 2016 - [info] Starting master failover.. Sat May 14 21:26:44 2016 - [info] From: server1(10.24.220.232:3306) (current master) +--Slave1(10.24.220.70:3306) +--Slave2(10.169.214.33:3306) To: Slave1(10.24.220.70:3306) (new master) +--Slave2(10.169.214.33:3306) Sat May 14 21:26:44 2016 - [info] Sat May 14 21:26:44 2016 - [info] * Phase 3.3: New Master Recovery Phase.. Sat May 14 21:26:44 2016 - [info] Sat May 14 21:26:44 2016 - [info] Waiting all logs to be applied.. Sat May 14 21:26:44 2016 - [info] done. Sat May 14 21:26:44 2016 - [debug] Stopping slave IO/SQL thread on Slave1(10.24.220.70:3306).. Sat May 14 21:26:44 2016 - [debug] done. Sat May 14 21:26:44 2016 - [info] Getting new master's binlog name and position.. Sat May 14 21:26:44 2016 - [info] log.000001:1207 Sat May 14 21:26:44 2016 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='Slave1 or 10.24.220.70', MASTER_PORT=3306, MASTER_AUTO_POSITION=1, MASTER_USER='repl', MASTER_PASSWORD='xxx'; Sat May 14 21:26:44 2016 - [info] Master Recovery succeeded. File:Pos:Exec_Gtid_Set: log.000001, 1207, 191f7a9f-ffa2-11e5-a825-00163e00242a:1-5 Sat May 14 21:26:44 2016 - [info] Executing master IP activate script: Sat May 14 21:26:44 2016 - [info] /usr/local/bin/master_ip_failover --command=start --ssh_user=root --orig_master_host=server1 --orig_master_ip=10.24.220.232 --orig_master_port=3306 --new_master_host=Slave1 --new_master_ip=10.24.220.70 --new_master_port=3306 --new_master_user='root' --new_master_password='971448976' Unknown option: new_master_user Unknown option: new_master_password IN SCRIPT TEST====/sbin/ifconfig eth0:88 down==/sbin/ifconfig eth0:88 10.24.220.100/24=== Enabling the VIP - 10.24.220.100/24 on the new master - Slave1 Sat May 14 21:26:44 2016 - [info] OK. Sat May 14 21:26:44 2016 - [info] Setting read_only=0 on Slave1(10.24.220.70:3306).. Sat May 14 21:26:44 2016 - [info] ok. Sat May 14 21:26:44 2016 - [info] ** Finished master recovery successfully. Sat May 14 21:26:44 2016 - [info] * Phase 3: Master Recovery Phase completed. Sat May 14 21:26:44 2016 - [info] Sat May 14 21:26:44 2016 - [info] * Phase 4: Slaves Recovery Phase.. Sat May 14 21:26:44 2016 - [info] Sat May 14 21:26:44 2016 - [info] Sat May 14 21:26:44 2016 - [info] * Phase 4.1: Starting Slaves in parallel.. Sat May 14 21:26:44 2016 - [info] Sat May 14 21:26:44 2016 - [info] -- Slave recovery on host Slave2(10.169.214.33:3306) started, pid: 1817. Check tmp log /var/log/masterha/app1/Slave2_3306_20160514212643.log if it takes time.. Sat May 14 21:26:45 2016 - [info] Sat May 14 21:26:45 2016 - [info] Log messages from Slave2 ... Sat May 14 21:26:45 2016 - [info] Sat May 14 21:26:44 2016 - [info] Resetting slave Slave2(10.169.214.33:3306) and starting replication from the new master Slave1(10.24.220.70:3306).. Sat May 14 21:26:44 2016 - [debug] Stopping slave IO/SQL thread on Slave2(10.169.214.33:3306).. Sat May 14 21:26:44 2016 - [debug] done. Sat May 14 21:26:44 2016 - [info] Executed CHANGE MASTER. Sat May 14 21:26:44 2016 - [debug] Starting slave IO/SQL thread on Slave2(10.169.214.33:3306).. Sat May 14 21:26:45 2016 - [debug] done. Sat May 14 21:26:45 2016 - [info] Slave started. Sat May 14 21:26:45 2016 - [info] gtid_wait(191f7a9f-ffa2-11e5-a825-00163e00242a:1-5) completed on Slave2(10.169.214.33:3306). Executed 0 events. Sat May 14 21:26:45 2016 - [info] End of log messages from Slave2. Sat May 14 21:26:45 2016 - [info] -- Slave on host Slave2(10.169.214.33:3306) started. Sat May 14 21:26:45 2016 - [info] All new slave servers recovered successfully. Sat May 14 21:26:45 2016 - [info] Sat May 14 21:26:45 2016 - [info] * Phase 5: New master cleanup phase.. Sat May 14 21:26:45 2016 - [info] Sat May 14 21:26:45 2016 - [info] Resetting slave info on the new master.. Sat May 14 21:26:45 2016 - [debug] Clearing slave info.. Sat May 14 21:26:45 2016 - [debug] Stopping slave IO/SQL thread on Slave1(10.24.220.70:3306).. Sat May 14 21:26:45 2016 - [debug] done. Sat May 14 21:26:46 2016 - [debug] SHOW SLAVE STATUS shows new master does not replicate from anywhere. OK. Sat May 14 21:26:46 2016 - [info] Slave1: Resetting slave info succeeded. Sat May 14 21:26:46 2016 - [info] Master failover to Slave1(10.24.220.70:3306) completed successfully. Sat May 14 21:26:46 2016 - [info] Deleted server1 entry from /etc/masterha/app1.conf . Sat May 14 21:26:46 2016 - [debug] Disconnected from Slave1(10.24.220.70:3306) Sat May 14 21:26:46 2016 - [debug] Disconnected from Slave2(10.169.214.33:3306) Sat May 14 21:26:46 2016 - [info] ----- Failover Report ----- app1: MySQL Master failover server1(10.24.220.232:3306) to Slave1(10.24.220.70:3306) succeeded Master server1(10.24.220.232:3306) is down! Check MHA Manager logs at monitor:/var/log/masterha/app1/manager.log for details. Started automated(non-interactive) failover. Invalidated master IP address on server1(10.24.220.232:3306) Selected Slave1(10.24.220.70:3306) as a new master. Slave1(10.24.220.70:3306): OK: Applying all logs succeeded. Slave1(10.24.220.70:3306): OK: Activated master IP address. Slave2(10.169.214.33:3306): OK: Slave started, replicating from Slave1(10.24.220.70:3306) Slave1(10.24.220.70:3306): Resetting slave info succeeded. Master failover to Slave1(10.24.220.70:3306) completed successfully.
[root@monitor ~]# cat /etc/masterha/app1.conf [server default] log_level=debug manager_log=/var/log/masterha/app1/manager.log manager_workdir=/var/log/masterha/app1 master_binlog_dir=/data/mysql-bin-log/ master_ip_failover_script=/usr/local/bin/master_ip_failover master_ip_online_change_script=/usr/local/bin/master_ip_online_change password=971448976 ping_interval=3 remote_workdir=/tmp repl_password=123 repl_user=repl secondary_check_script=/usr/bin/masterha_secondary_check -s 10.169.214.33 -s 10.24.220.70 --user=root --master_host=10.24.220.232 --master_port=3306 ssh_user=root user=root [server2] candidate_master=1 check_repl_delay=0 hostname=Slave1 ip=10.24.220.70 port=3306 [server3] hostname=Slave2 ip=10.169.214.33 no_master=1 port=3306