[MySQL - MHA 구성 + failover/switchover] part 2

1편 MHA 구성에 이어 스크립트 생성 및 failover테스트를 진행하도록 하겠습니다.

 

 

 

 사전 구성

1편 URL :  https://jhdatabase.tistory.com/19

 

[Mysql - MHA 구성 + VIP생성] part 1

MHA란 Master DB가 장애로 서비스가 불가능한 상태가 되면, 자동으로 failover를 수행하여 slave DB를 master DB로 승격시켜 서비스 다운타임을 최소화 auto failover 솔루션입니다. Master와 Sla..

jhdatabase.tistory.com

최종 아키텍처

 

 

 테스트 시작

 

 Master ip_online_chanage 설정

[root@jh-mha-manager bin]# cp /root/mha4mysql-manager-0.57/samples/scripts/master_ip_online_change  /var/log/masterha/app1/custom_scripts


[root@jh-mha-manager bin]# vi /var/log/masterha/app1/custom_scripts/masterha_ip_online_change
:set nu
150, 151, 152, 245, 246, 247, 248 라인 주석 추가(#)
248 라인 밑에 아래 스크립트 추가 후 저장


###vip change
if ( $new_master_ip eq "192.168.100.50" ) {
system("/bin/sh /var/log/masterha/app1/custom_scripts/master_vip_up.sh");
}
elsif ( $new_master_ip eq "192.168.100.51" ) {
system("/bin/sh /var/log/masterha/app1/custom_scripts/slave_vip_up.sh");
}
else {}

 

 

 Master ip_failover 설정

[root@jh-mha-manager scripts]# cp /root/mha4mysql-manager-0.57/samples/scripts/master_ip_failover  /var/log/masterha/app1/custom_scripts/


[root@jh-mha-manager scripts]# vi /var/log/masterha/app1/custom_scripts/master_ip_failover
:set nu
87, 88, 89, 90, 93 라인 주석 추가(#)
93 라인 밑에 아래 스크립트 추가 후 저장


###vip change
if ( $new_master_ip eq "192.168.100.50" ) {
system("/bin/sh /var/log/masterha/app1/custom_scripts/master_vip_up.sh");
}
elsif ( $new_master_ip eq "192.168.100.51" ) {
system("/bin/sh /var/log/masterha/app1/custom_scripts/slave_vip_up.sh");
}
else {}

 

 

 

 Master_vip_up_sh 생성

[root@jh-mha-manager masterha]# vi /var/log/masterha/app1/custom_scripts/master_vip_up.sh

#!/bin/sh
# master : 192.168.100.50
# slave : 192.168.100.51
# VIP network ens224
ssh root@192.168.100.51 sudo /sbin/ifdown eth1:1
ssh root@192.168.100.50 sudo /sbin/ifup eth1:1

 

 

 

 Slave_vip_up_sh 생성

[root@jh-mha-manager custom_scripts]# vi slave_vip_up.sh

#!/bin/sh
# master : 192.168.100.50
# slave : 192.168.100.51
# VIP network ens224
ssh root@192.168.100.50 sudo /sbin/ifdown eth1:1
ssh root@192.168.100.51 sudo /sbin/ifup eth1:1

 

 

 

스크립트 확인

[root@jh-mha-manager custom_scripts]# ll
total 24
-rwxr-xr-x 1 root root  3917 Aug  6 11:20 master_ip_failover
-rwxr-xr-x 1 root root 10141 Aug  6 11:27 master_ip_online_change
-rwxr-xr-x 1 root root   178 Aug  6 13:44 master_vip_up.sh
-rwxr-xr-x 1 root root   178 Aug  6 10:51 slave_vip_up.sh

 

 

 

 MHA start

[root@jh-mha-manager bin]# start
[3] 31584
[root@jh-mha-manager bin]# Fri Aug  6 11:21:42 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Fri Aug  6 11:21:42 2021 - [info] Reading application default configuration from /etc/mha.cnf..
Fri Aug  6 11:21:42 2021 - [info] Reading server configuration from /etc/mha.cnf..



[root@jh-mha-manager bin]# status
mha (pid:31584) is running(0:PING_OK), master:192.168.100.50

 

 

 

 

 

Failover TEST

[root@jh-mha1 app5]# ps -ef | grep mysql
root      2091 30107  0 10:58 pts/1    00:00:00 /bin/sh /mysql/bin/mysqld_safe --user=mysql
mysql     2196  2091  0 10:58 pts/1    00:00:00 /mysql/bin/mysqld --basedir=/mysql --datadir=/data --plugin-dir=/mysql/lib/plugin --user=mysql --log-error=/log/mariadb.log --open-files-limit=5000 --pid-file=/log/mariadb.pid --socket=/tmp/mysql.sock
root      3773 30107  0 11:22 pts/1    00:00:00 grep --color=auto mysql


[root@jh-mha1 app5]# kill -9 2091
[root@jh-mha1 app5]# kill -9 2196
[1]+  Killed                  mysqld_safe --user=mysql




##vip 확인

[root@jh-mha1 ~]# ifconfig
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 10.41.181.201  netmask 255.255.254.0  broadcast 10.41.181.255
        ether f2:20:cd:59:d8:93  txqueuelen 1000  (Ethernet)
        RX packets 1165720  bytes 1096685649 (1.0 GiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 325160  bytes 98930106 (94.3 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

eth1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.100.50  netmask 255.255.255.0  broadcast 192.168.100.255
        ether f2:d7:cf:4d:e0:af  txqueuelen 1000  (Ethernet)
        RX packets 17025  bytes 1678589 (1.6 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 14086  bytes 1953754 (1.8 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        loop  txqueuelen 1  (Local Loopback)
        RX packets 87  bytes 8040 (7.8 KiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 87  bytes 8040 (7.8 KiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0




[root@jh-mha2 network-scripts]# ifconfig
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 10.41.180.182  netmask 255.255.254.0  broadcast 10.41.181.255
        ether f2:20:cd:dc:92:d4  txqueuelen 1000  (Ethernet)
        RX packets 1066121  bytes 587472881 (560.2 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 293129  bytes 91489858 (87.2 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

eth1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.100.51  netmask 255.255.255.0  broadcast 192.168.100.255
        ether f2:d7:cf:7e:50:45  txqueuelen 1000  (Ethernet)
        RX packets 12369  bytes 1410079 (1.3 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 11509  bytes 1743089 (1.6 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

eth1:1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.100.53  netmask 255.255.255.0  broadcast 192.168.100.255
        ether f2:d7:cf:7e:50:45  txqueuelen 1000  (Ethernet)

lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        loop  txqueuelen 1  (Local Loopback)
        RX packets 550  bytes 47286 (46.1 KiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 550  bytes 47286 (46.1 KiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0




[root@jh-mha-manager bin]# log
Fri Aug  6 11:23:15 2021 - [warning] Connection failed 3 time(s)..
Fri Aug  6 11:23:20 2021 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '192.168.100.50' (111))
Fri Aug  6 11:23:20 2021 - [warning] Connection failed 4 time(s)..
Fri Aug  6 11:23:20 2021 - [warning] Master is not reachable from health checker!
Fri Aug  6 11:23:20 2021 - [warning] Master 192.168.100.50(192.168.100.50:3306) is not reachable!
Fri Aug  6 11:23:20 2021 - [warning] SSH is reachable.
Fri Aug  6 11:23:20 2021 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha_default.cnf and /etc/mha.cnf again, and trying to connect to all servers to check server status..
Fri Aug  6 11:23:20 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Fri Aug  6 11:23:20 2021 - [info] Reading application default configuration from /etc/mha.cnf..
Fri Aug  6 11:23:20 2021 - [info] Reading server configuration from /etc/mha.cnf..
Fri Aug  6 11:23:21 2021 - [info] GTID failover mode = 0
Fri Aug  6 11:23:21 2021 - [info] Dead Servers:
Fri Aug  6 11:23:21 2021 - [info]   192.168.100.50(192.168.100.50:3306)
Fri Aug  6 11:23:21 2021 - [info] Alive Servers:
Fri Aug  6 11:23:21 2021 - [info]   192.168.100.51(192.168.100.51:3306)
Fri Aug  6 11:23:21 2021 - [info] Alive Slaves:
Fri Aug  6 11:23:21 2021 - [info]   192.168.100.51(192.168.100.51:3306)  Version=10.2.12-MariaDB-log (oldest major version between slaves) log-bin:enabled
Fri Aug  6 11:23:21 2021 - [info]     Replicating from 192.168.100.50(192.168.100.50:3306)
Fri Aug  6 11:23:21 2021 - [info] Checking slave configurations..
Fri Aug  6 11:23:21 2021 - [info] Checking replication filtering settings..
Fri Aug  6 11:23:21 2021 - [info]  Replication filtering check ok.
Fri Aug  6 11:23:21 2021 - [info] Master is down!
Fri Aug  6 11:23:21 2021 - [info] Terminating monitoring script.
Fri Aug  6 11:23:21 2021 - [info] Got exit code 20 (Master dead).
Fri Aug  6 11:23:21 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Fri Aug  6 11:23:21 2021 - [info] Reading application default configuration from /etc/mha.cnf..
Fri Aug  6 11:23:21 2021 - [info] Reading server configuration from /etc/mha.cnf..
Fri Aug  6 11:23:21 2021 - [info] MHA::MasterFailover version 0.57.
Fri Aug  6 11:23:21 2021 - [info] Starting master failover.
Fri Aug  6 11:23:21 2021 - [info]
Fri Aug  6 11:23:21 2021 - [info] * Phase 1: Configuration Check Phase..
Fri Aug  6 11:23:21 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] GTID failover mode = 0
Fri Aug  6 11:23:22 2021 - [info] Dead Servers:
Fri Aug  6 11:23:22 2021 - [info]   192.168.100.50(192.168.100.50:3306)
Fri Aug  6 11:23:22 2021 - [info] Checking master reachability via MySQL(double check)...
Fri Aug  6 11:23:22 2021 - [info]  ok.
Fri Aug  6 11:23:22 2021 - [info] Alive Servers:
Fri Aug  6 11:23:22 2021 - [info]   192.168.100.51(192.168.100.51:3306)
Fri Aug  6 11:23:22 2021 - [info] Alive Slaves:
Fri Aug  6 11:23:22 2021 - [info]   192.168.100.51(192.168.100.51:3306)  Version=10.2.12-MariaDB-log (oldest major version between slaves) log-bin:enabled
Fri Aug  6 11:23:22 2021 - [info]     Replicating from 192.168.100.50(192.168.100.50:3306)
Fri Aug  6 11:23:22 2021 - [info] Starting Non-GTID based failover.
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] ** Phase 1: Configuration Check Phase completed.
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] * Phase 2: Dead Master Shutdown Phase..
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] Forcing shutdown so that applications never connect to the current master..
Fri Aug  6 11:23:22 2021 - [info] Executing master IP deactivation script:
Fri Aug  6 11:23:22 2021 - [info]   /var/log/masterha/app1/custom_scripts/master_ip_failover --orig_master_host=192.168.100.50 --orig_master_ip=192.168.100.50 --orig_master_port=3306 --command=stopssh --ssh_user=root  
Fri Aug  6 11:23:22 2021 - [info]  done.
Fri Aug  6 11:23:22 2021 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master.
Fri Aug  6 11:23:22 2021 - [info] * Phase 2: Dead Master Shutdown Phase completed.
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] * Phase 3: Master Recovery Phase..
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] * Phase 3.1: Getting Latest Slaves Phase..
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] The latest binary log file/position on all slaves is mysql.000012:338
Fri Aug  6 11:23:22 2021 - [info] Latest slaves (Slaves that received relay log files to the latest):
Fri Aug  6 11:23:22 2021 - [info]   192.168.100.51(192.168.100.51:3306)  Version=10.2.12-MariaDB-log (oldest major version between slaves) log-bin:enabled
Fri Aug  6 11:23:22 2021 - [info]     Replicating from 192.168.100.50(192.168.100.50:3306)
Fri Aug  6 11:23:22 2021 - [info] The oldest binary log file/position on all slaves is mysql.000012:338
Fri Aug  6 11:23:22 2021 - [info] Oldest slaves:
Fri Aug  6 11:23:22 2021 - [info]   192.168.100.51(192.168.100.51:3306)  Version=10.2.12-MariaDB-log (oldest major version between slaves) log-bin:enabled
Fri Aug  6 11:23:22 2021 - [info]     Replicating from 192.168.100.50(192.168.100.50:3306)
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] * Phase 3.2: Saving Dead Master's Binlog Phase..
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] Fetching dead master's binary logs..
Fri Aug  6 11:23:22 2021 - [info] Executing command on the dead master 192.168.100.50(192.168.100.50:3306): save_binary_logs --command=save --start_file=mysql.000012  --start_pos=338 --binlog_dir=/data --output_file=/var/log/masterha/app5/saved_master_binlog_from_192.168.100.50_3306_20210806112321.binlog --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.57
  Creating /var/log/masterha/app5 if not exists..    ok.
Concat binary/relay logs from mysql.000012 pos 338 to mysql.000012 EOF into /var/log/masterha/app5/saved_master_binlog_from_192.168.100.50_3306_20210806112321.binlog ..
Binlog Checksum enabled
  Dumping binlog format description event, from position 0 to 256.. ok.
  No need to dump effective binlog data from /data/mysql.000012 (pos starts 338, filesize 338). Skipping.
Binlog Checksum enabled
/var/log/masterha/app5/saved_master_binlog_from_192.168.100.50_3306_20210806112321.binlog has no effective data events.
Event not exists.
Fri Aug  6 11:23:22 2021 - [info] Additional events were not found from the orig master. No need to save.
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] * Phase 3.3: Determining New Master Phase..
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] Finding the latest slave that has all relay logs for recovering other slaves..
Fri Aug  6 11:23:22 2021 - [info] All slaves received relay logs to the same position. No need to resync each other.
Fri Aug  6 11:23:22 2021 - [info] Searching new master from slaves..
Fri Aug  6 11:23:22 2021 - [info]  Candidate masters from the configuration file:
Fri Aug  6 11:23:22 2021 - [info]  Non-candidate masters:
Fri Aug  6 11:23:22 2021 - [info] New master is 192.168.100.51(192.168.100.51:3306)
Fri Aug  6 11:23:22 2021 - [info] Starting master failover..
Fri Aug  6 11:23:22 2021 - [info]
From:
192.168.100.50(192.168.100.50:3306) (current master)
+--192.168.100.51(192.168.100.51:3306)

To:
192.168.100.51(192.168.100.51:3306) (new master)
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] * Phase 3.3: New Master Diff Log Generation Phase..
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info]  This server has all relay logs. No need to generate diff files from the latest slave.
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] * Phase 3.4: Master Log Apply Phase..
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] *NOTICE: If any error happens from this phase, manual recovery is needed.
Fri Aug  6 11:23:22 2021 - [info] Starting recovery on 192.168.100.51(192.168.100.51:3306)..
Fri Aug  6 11:23:22 2021 - [info]  This server has all relay logs. Waiting all logs to be applied..
Fri Aug  6 11:23:22 2021 - [info]   done.
Fri Aug  6 11:23:22 2021 - [info]  All relay logs were successfully applied.
Fri Aug  6 11:23:22 2021 - [info] Getting new master's binlog name and position..
Fri Aug  6 11:23:22 2021 - [info]  mysql.000001:1229
Fri Aug  6 11:23:22 2021 - [info]  All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.100.51', MASTER_PORT=3306, MASTER_LOG_FILE='mysql.000001', MASTER_LOG_POS=1229, MASTER_USER='test', MASTER_PASSWORD='xxx';
Fri Aug  6 11:23:22 2021 - [info] Executing master IP activate script:
Fri Aug  6 11:23:22 2021 - [info]   /var/log/masterha/app1/custom_scripts/master_ip_failover --command=start --ssh_user=root --orig_master_host=192.168.100.50 --orig_master_ip=192.168.100.50 --orig_master_port=3306 --new_master_host=192.168.100.51 --new_master_ip=192.168.100.51 --new_master_port=3306 --new_master_user='test'   --new_master_password=xxx
Set read_only=0 on the new master.
/bin/sh: /var/log/masterha/app1/custom_scripts/slave_vip_up.sh: No such file or directory
Fri Aug  6 11:23:22 2021 - [info]  OK.
Fri Aug  6 11:23:22 2021 - [info] ** Finished master recovery successfully.
Fri Aug  6 11:23:22 2021 - [info] * Phase 3: Master Recovery Phase completed.
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] * Phase 4: Slaves Recovery Phase..
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] * Phase 4.1: Starting Parallel Slave Diff Log Generation Phase..
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] Generating relay diff files from the latest slave succeeded.
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] * Phase 4.2: Starting Parallel Slave Log Apply Phase..
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] All new slave servers recovered successfully.
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] * Phase 5: New master cleanup phase..
Fri Aug  6 11:23:22 2021 - [info]
Fri Aug  6 11:23:22 2021 - [info] Resetting slave info on the new master..
Fri Aug  6 11:23:22 2021 - [info]  192.168.100.51: Resetting slave info succeeded.
Fri Aug  6 11:23:22 2021 - [info] Master failover to 192.168.100.51(192.168.100.51:3306) completed successfully.
Fri Aug  6 11:23:22 2021 - [info]

----- Failover Report -----

mha: MySQL Master failover 192.168.100.50(192.168.100.50:3306) to 192.168.100.51(192.168.100.51:3306) succeeded

Master 192.168.100.50(192.168.100.50:3306) is down!

Check MHA Manager logs at jh-mha-manager:/var/log/masterha/app1/app1.log for details.

Started automated(non-interactive) failover.
Invalidated master IP address on 192.168.100.50(192.168.100.50:3306)
The latest slave 192.168.100.51(192.168.100.51:3306) has all relay logs for recovery.
Selected 192.168.100.51(192.168.100.51:3306) as a new master.
192.168.100.51(192.168.100.51:3306): OK: Applying all logs succeeded.
192.168.100.51(192.168.100.51:3306): OK: Activated master IP address.
Generating relay diff files from the latest slave succeeded.
192.168.100.51(192.168.100.51:3306): Resetting slave info succeeded.
Master failover to 192.168.100.51(192.168.100.51:3306) completed successfully.

 

 

 

Switchover TEST

## MHA를 끈상태에서 진행

MariaDB [(none)]> show slave status\G;

*************************** 1. row ***************************
               Slave_IO_State: Waiting for master to send event
                  Master_Host: 192.168.100.50
                  Master_User: test
                  Master_Port: 3306
                Connect_Retry: 10
              Master_Log_File: mysql.000015
          Read_Master_Log_Pos: 338
               Relay_Log_File: mysql-relay-bin.000004
                Relay_Log_Pos: 633
        Relay_Master_Log_File: mysql.000015
             Slave_IO_Running: Yes
            Slave_SQL_Running: Yes
              Replicate_Do_DB:
          Replicate_Ignore_DB:
           Replicate_Do_Table:
       Replicate_Ignore_Table:
      Replicate_Wild_Do_Table:
  Replicate_Wild_Ignore_Table:
                   Last_Errno: 0
                   Last_Error:
                 Skip_Counter: 0
          Exec_Master_Log_Pos: 338
              Relay_Log_Space: 1319
              Until_Condition: None
               Until_Log_File:
                Until_Log_Pos: 0
           Master_SSL_Allowed: No
           Master_SSL_CA_File:
           Master_SSL_CA_Path:
              Master_SSL_Cert:
            Master_SSL_Cipher:
               Master_SSL_Key:
        Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
                Last_IO_Errno: 0
                Last_IO_Error:
               Last_SQL_Errno: 0
               Last_SQL_Error:
  Replicate_Ignore_Server_Ids:
             Master_Server_Id: 1
               Master_SSL_Crl:
           Master_SSL_Crlpath:
                   Using_Gtid: No
                  Gtid_IO_Pos:
      Replicate_Do_Domain_Ids:
  Replicate_Ignore_Domain_Ids:
                Parallel_Mode: conservative
                    SQL_Delay: 0
          SQL_Remaining_Delay: NULL
      Slave_SQL_Running_State: Slave has read all relay log; waiting for the slave I/O thread to update it
1 row in set (0.00 sec)

ERROR: No query specified





[root@jh-mha-manager custom_scripts]# replcheck
Fri Aug  6 14:48:55 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Fri Aug  6 14:48:55 2021 - [info] Reading application default configuration from /etc/mha.cnf..
Fri Aug  6 14:48:55 2021 - [info] Reading server configuration from /etc/mha.cnf..
Fri Aug  6 14:48:55 2021 - [info] MHA::MasterMonitor version 0.57.
Fri Aug  6 14:48:57 2021 - [info] GTID failover mode = 0
Fri Aug  6 14:48:57 2021 - [info] Dead Servers:
Fri Aug  6 14:48:57 2021 - [info] Alive Servers:
Fri Aug  6 14:48:57 2021 - [info]   192.168.100.50(192.168.100.50:3306)
Fri Aug  6 14:48:57 2021 - [info]   192.168.100.51(192.168.100.51:3306)
Fri Aug  6 14:48:57 2021 - [info] Alive Slaves:
Fri Aug  6 14:48:57 2021 - [info]   192.168.100.51(192.168.100.51:3306)  Version=10.2.12-MariaDB-log (oldest major version between slaves) log-bin:enabled
Fri Aug  6 14:48:57 2021 - [info]     Replicating from 192.168.100.50(192.168.100.50:3306)
Fri Aug  6 14:48:57 2021 - [info] Current Alive Master: 192.168.100.50(192.168.100.50:3306)
Fri Aug  6 14:48:57 2021 - [info] Checking slave configurations..
Fri Aug  6 14:48:57 2021 - [info]  read_only=1 is not set on slave 192.168.100.51(192.168.100.51:3306).
Fri Aug  6 14:48:57 2021 - [warning]  relay_log_purge=0 is not set on slave 192.168.100.51(192.168.100.51:3306).
Fri Aug  6 14:48:57 2021 - [info] Checking replication filtering settings..
Fri Aug  6 14:48:57 2021 - [info]  binlog_do_db= , binlog_ignore_db=
Fri Aug  6 14:48:57 2021 - [info]  Replication filtering check ok.
Fri Aug  6 14:48:57 2021 - [info] GTID (with auto-pos) is not supported
Fri Aug  6 14:48:57 2021 - [info] Starting SSH connection tests..
Fri Aug  6 14:48:58 2021 - [info] All SSH connection tests passed successfully.
Fri Aug  6 14:48:58 2021 - [info] Checking MHA Node version..
Fri Aug  6 14:48:58 2021 - [info]  Version check ok.
Fri Aug  6 14:48:58 2021 - [info] Checking SSH publickey authentication settings on the current master..
Fri Aug  6 14:48:58 2021 - [info] HealthCheck: SSH to 192.168.100.50 is reachable.
Fri Aug  6 14:48:58 2021 - [info] Master MHA Node version is 0.57.
Fri Aug  6 14:48:58 2021 - [info] Checking recovery script configurations on 192.168.100.50(192.168.100.50:3306)..
Fri Aug  6 14:48:58 2021 - [info]   Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data --output_file=/var/log/masterha/app5/save_binary_logs_test --manager_version=0.57 --start_file=mysql.000015
Fri Aug  6 14:48:58 2021 - [info]   Connecting to root@192.168.100.50(192.168.100.50:22)..
  Creating /var/log/masterha/app5 if not exists..    ok.
  Checking output directory is accessible or not..
   ok.
  Binlog found at /data, up to mysql.000015
Fri Aug  6 14:48:58 2021 - [info] Binlog setting check done.
Fri Aug  6 14:48:58 2021 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers..
Fri Aug  6 14:48:58 2021 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user='test' --slave_host=192.168.100.51 --slave_ip=192.168.100.51 --slave_port=3306 --workdir=/var/log/masterha/app5 --target_version=10.2.12-MariaDB-log --manager_version=0.57 --relay_log_info=/data/relay-log.info  --relay_dir=/data/  --slave_pass=xxx
Fri Aug  6 14:48:58 2021 - [info]   Connecting to root@192.168.100.51(192.168.100.51:22)..
  Checking slave recovery environment settings..
    Opening /data/relay-log.info ... ok.
    Relay log found at /data, up to mysql-relay-bin.000004
    Temporary relay log file is /data/mysql-relay-bin.000004
    Testing mysql connection and privileges.. done.
    Testing mysqlbinlog output.. done.
    Cleaning up test file(s).. done.
Fri Aug  6 14:48:58 2021 - [info] Slaves settings check done.
Fri Aug  6 14:48:58 2021 - [info]
192.168.100.50(192.168.100.50:3306) (current master)
+--192.168.100.51(192.168.100.51:3306)

Fri Aug  6 14:48:58 2021 - [info] Checking replication health on 192.168.100.51..
Fri Aug  6 14:48:58 2021 - [info]  ok.
Fri Aug  6 14:48:58 2021 - [info] Checking master_ip_failover_script status:
Fri Aug  6 14:48:58 2021 - [info]   /var/log/masterha/app1/custom_scripts/master_ip_failover --command=status --ssh_user=root --orig_master_host=192.168.100.50 --orig_master_ip=192.168.100.50 --orig_master_port=3306
Fri Aug  6 14:48:58 2021 - [info]  OK.
Fri Aug  6 14:48:58 2021 - [warning] shutdown_script is not defined.
Fri Aug  6 14:48:58 2021 - [info] Got exit code 0 (Not master dead).

MySQL Replication Health is OK.





## swtichover
[root@jh-mha-manager custom_scripts]# masterha_master_switch --master_state=alive --conf=/etc/mha.cnf --orig_master_is_new_slave
Fri Aug  6 14:51:22 2021 - [info] MHA::MasterRotate version 0.57.
Fri Aug  6 14:51:22 2021 - [info] Starting online master switch..
Fri Aug  6 14:51:22 2021 - [info]
Fri Aug  6 14:51:22 2021 - [info] * Phase 1: Configuration Check Phase..
Fri Aug  6 14:51:22 2021 - [info]
Fri Aug  6 14:51:22 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Fri Aug  6 14:51:22 2021 - [info] Reading application default configuration from /etc/mha.cnf..
Fri Aug  6 14:51:22 2021 - [info] Reading server configuration from /etc/mha.cnf..
Fri Aug  6 14:51:23 2021 - [info] GTID failover mode = 0
Fri Aug  6 14:51:23 2021 - [info] Current Alive Master: 192.168.100.50(192.168.100.50:3306)
Fri Aug  6 14:51:23 2021 - [info] Alive Slaves:
Fri Aug  6 14:51:23 2021 - [info]   192.168.100.51(192.168.100.51:3306)  Version=10.2.12-MariaDB-log (oldest major version between slaves) log-bin:enabled
Fri Aug  6 14:51:23 2021 - [info]     Replicating from 192.168.100.50(192.168.100.50:3306)

It is better to execute FLUSH NO_WRITE_TO_BINLOG TABLES on the master before switching. Is it ok to execute on 192.168.100.50(192.168.100.50:3306)? (YES/no): YES      
Fri Aug  6 14:51:32 2021 - [info] Executing FLUSH NO_WRITE_TO_BINLOG TABLES. This may take long time..
Fri Aug  6 14:51:32 2021 - [info]  ok.
Fri Aug  6 14:51:32 2021 - [info] Checking MHA is not monitoring or doing failover..
Fri Aug  6 14:51:32 2021 - [info] Checking replication health on 192.168.100.51..
Fri Aug  6 14:51:32 2021 - [info]  ok.
Fri Aug  6 14:51:32 2021 - [info] Searching new master from slaves..
Fri Aug  6 14:51:32 2021 - [info]  Candidate masters from the configuration file:
Fri Aug  6 14:51:32 2021 - [info]  Non-candidate masters:
Fri Aug  6 14:51:32 2021 - [info]
From:
192.168.100.50(192.168.100.50:3306) (current master)
+--192.168.100.51(192.168.100.51:3306)

To:
192.168.100.51(192.168.100.51:3306) (new master)
+--192.168.100.50(192.168.100.50:3306)

Starting master switch from 192.168.100.50(192.168.100.50:3306) to 192.168.100.51(192.168.100.51:3306)? (yes/NO): yes
Fri Aug  6 14:51:34 2021 - [info] Checking whether 192.168.100.51(192.168.100.51:3306) is ok for the new master..
Fri Aug  6 14:51:34 2021 - [info]  ok.
Fri Aug  6 14:51:34 2021 - [info] 192.168.100.50(192.168.100.50:3306): SHOW SLAVE STATUS returned empty result. To check replication filtering rules, temporarily executing CHANGE MASTER to a dummy host.
Fri Aug  6 14:51:34 2021 - [info] 192.168.100.50(192.168.100.50:3306): Resetting slave pointing to the dummy host.
Fri Aug  6 14:51:34 2021 - [info] ** Phase 1: Configuration Check Phase completed.
Fri Aug  6 14:51:34 2021 - [info]
Fri Aug  6 14:51:34 2021 - [info] * Phase 2: Rejecting updates Phase..
Fri Aug  6 14:51:34 2021 - [info]
Fri Aug  6 14:51:34 2021 - [info] Executing master ip online change script to disable write on the current master:
Fri Aug  6 14:51:34 2021 - [info]   /var/log/masterha/app1/custom_scripts/master_ip_online_change --command=stop --orig_master_host=192.168.100.50 --orig_master_ip=192.168.100.50 --orig_master_port=3306 --orig_master_user='test' --new_master_host=192.168.100.51 --new_master_ip=192.168.100.51 --new_master_port=3306 --new_master_user='test' --orig_master_ssh_user=root --new_master_ssh_user=root   --orig_master_is_new_slave --orig_master_password=xxx --new_master_password=xxx
Fri Aug  6 14:51:34 2021 207808 Set read_only on the new master.. ok.
Fri Aug  6 14:51:34 2021 210270 Set read_only=1 on the orig master.. ok.
Fri Aug  6 14:51:34 2021 211260 Killing all application threads..
Fri Aug  6 14:51:34 2021 211277 done.
Fri Aug  6 14:51:34 2021 - [info]  ok.
Fri Aug  6 14:51:34 2021 - [info] Locking all tables on the orig master to reject updates from everybody (including root):
Fri Aug  6 14:51:34 2021 - [info] Executing FLUSH TABLES WITH READ LOCK..
Fri Aug  6 14:51:34 2021 - [info]  ok.
Fri Aug  6 14:51:34 2021 - [info] Orig master binlog:pos is mysql.000015:338.
Fri Aug  6 14:51:34 2021 - [info]  Waiting to execute all relay logs on 192.168.100.51(192.168.100.51:3306)..
Fri Aug  6 14:51:34 2021 - [info]  master_pos_wait(mysql.000015:338) completed on 192.168.100.51(192.168.100.51:3306). Executed 0 events.
Fri Aug  6 14:51:34 2021 - [info]   done.
Fri Aug  6 14:51:34 2021 - [info] Getting new master's binlog name and position..
Fri Aug  6 14:51:34 2021 - [info]  mysql.000002:338
Fri Aug  6 14:51:34 2021 - [info]  All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.100.51', MASTER_PORT=3306, MASTER_LOG_FILE='mysql.000002', MASTER_LOG_POS=338, MASTER_USER='test', MASTER_PASSWORD='xxx';
Fri Aug  6 14:51:34 2021 - [info] Executing master ip online change script to allow write on the new master:
Fri Aug  6 14:51:34 2021 - [info]   /var/log/masterha/app1/custom_scripts/master_ip_online_change --command=start --orig_master_host=192.168.100.50 --orig_master_ip=192.168.100.50 --orig_master_port=3306 --orig_master_user='test' --new_master_host=192.168.100.51 --new_master_ip=192.168.100.51 --new_master_port=3306 --new_master_user='test' --orig_master_ssh_user=root --new_master_ssh_user=root   --orig_master_is_new_slave --orig_master_password=xxx --new_master_password=xxx
Fri Aug  6 14:51:34 2021 346582 Set read_only=0 on the new master.
Fri Aug  6 14:51:35 2021 - [info]  ok.
Fri Aug  6 14:51:35 2021 - [info]
Fri Aug  6 14:51:35 2021 - [info] * Switching slaves in parallel..
Fri Aug  6 14:51:35 2021 - [info]
Fri Aug  6 14:51:35 2021 - [info] Unlocking all tables on the orig master:
Fri Aug  6 14:51:35 2021 - [info] Executing UNLOCK TABLES..
Fri Aug  6 14:51:35 2021 - [info]  ok.
Fri Aug  6 14:51:35 2021 - [info] Starting orig master as a new slave..
Fri Aug  6 14:51:35 2021 - [info]  Resetting slave 192.168.100.50(192.168.100.50:3306) and starting replication from the new master 192.168.100.51(192.168.100.51:3306)..
Fri Aug  6 14:51:35 2021 - [info]  Executed CHANGE MASTER.
Fri Aug  6 14:51:35 2021 - [info]  Slave started.
Fri Aug  6 14:51:35 2021 - [info] All new slave servers switched successfully.
Fri Aug  6 14:51:35 2021 - [info]
Fri Aug  6 14:51:35 2021 - [info] * Phase 5: New master cleanup phase..
Fri Aug  6 14:51:35 2021 - [info]
Fri Aug  6 14:51:35 2021 - [info]  192.168.100.51: Resetting slave info succeeded.
Fri Aug  6 14:51:35 2021 - [info] Switching master to 192.168.100.51(192.168.100.51:3306) completed successfully.







<db1>
[root@jh-mha1 ~]# mysql -uroot -proot
Welcome to the MariaDB monitor.  Commands end with ; or \g.
Your MariaDB connection id is 25
Server version: 10.2.12-MariaDB-log MariaDB Server
 
Copyright (c) 2000, 2017, Oracle, MariaDB Corporation Ab and others.
 
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
 
MariaDB [(none)]> show slave status\G;
*************************** 1. row ***************************
               Slave_IO_State: Waiting for master to send event
                  Master_Host: 192.168.100.51
                  Master_User: test
                  Master_Port: 3306
                Connect_Retry: 60
              Master_Log_File: mysql.000002
          Read_Master_Log_Pos: 338
               Relay_Log_File: mysql-relay-bin.000002
                Relay_Log_Pos: 551
        Relay_Master_Log_File: mysql.000002
             Slave_IO_Running: Yes
            Slave_SQL_Running: Yes
              Replicate_Do_DB:
          Replicate_Ignore_DB:
           Replicate_Do_Table:
       Replicate_Ignore_Table:
      Replicate_Wild_Do_Table:
  Replicate_Wild_Ignore_Table:
                   Last_Errno: 0
                   Last_Error:
                 Skip_Counter: 0
          Exec_Master_Log_Pos: 338
              Relay_Log_Space: 860
              Until_Condition: None
               Until_Log_File:
                Until_Log_Pos: 0
           Master_SSL_Allowed: No
           Master_SSL_CA_File:
           Master_SSL_CA_Path:
              Master_SSL_Cert:
            Master_SSL_Cipher:
               Master_SSL_Key:
        Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
                Last_IO_Errno: 0
                Last_IO_Error:
               Last_SQL_Errno: 0
               Last_SQL_Error:
  Replicate_Ignore_Server_Ids:
             Master_Server_Id: 2
               Master_SSL_Crl:
           Master_SSL_Crlpath:
                   Using_Gtid: No
                  Gtid_IO_Pos:
      Replicate_Do_Domain_Ids:
  Replicate_Ignore_Domain_Ids:
                Parallel_Mode: conservative
                    SQL_Delay: 0
          SQL_Remaining_Delay: NULL
      Slave_SQL_Running_State: Slave has read all relay log; waiting for the slave I/O thread to update it
1 row in set (0.00 sec)
 
ERROR: No query specified
 
 
 
 
 
[root@jh-mha1 ~]# ifconfig
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 10.41.181.201  netmask 255.255.254.0  broadcast 10.41.181.255
        ether f2:20:cd:59:d8:93  txqueuelen 1000  (Ethernet)
        RX packets 1170168  bytes 1097037588 (1.0 GiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 326034  bytes 99352252 (94.7 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
 
eth1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.100.50  netmask 255.255.255.0  broadcast 192.168.100.255
        ether f2:d7:cf:4d:e0:af  txqueuelen 1000  (Ethernet)
        RX packets 17391  bytes 1727470 (1.6 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 14403  bytes 2023206 (1.9 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
 
lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        loop  txqueuelen 1  (Local Loopback)
        RX packets 87  bytes 8040 (7.8 KiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 87  bytes 8040 (7.8 KiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
 
 
 
 
 
<db2>
MariaDB [(none)]> show master status\G;
*************************** 1. row ***************************
            File: mysql.000002
        Position: 338
    Binlog_Do_DB:
Binlog_Ignore_DB:
1 row in set (0.00 sec)
 
ERROR: No query specified
 
 
 
 
 
 
[root@jh-mha2 network-scripts]# ifconfig
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 10.41.180.182  netmask 255.255.254.0  broadcast 10.41.181.255
        ether f2:20:cd:dc:92:d4  txqueuelen 1000  (Ethernet)
        RX packets 1070148  bytes 587787579 (560.5 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 293925  bytes 91893626 (87.6 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
 
eth1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.100.51  netmask 255.255.255.0  broadcast 192.168.100.255
        ether f2:d7:cf:7e:50:45  txqueuelen 1000  (Ethernet)
        RX packets 12779  bytes 1460222 (1.3 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 11859  bytes 1832558 (1.7 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
 
eth1:1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.100.53  netmask 255.255.255.0  broadcast 192.168.100.255
        ether f2:d7:cf:7e:50:45  txqueuelen 1000  (Ethernet)
 
lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        loop  txqueuelen 1  (Local Loopback)
        RX packets 576  bytes 49515 (48.3 KiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 576  bytes 49515 (48.3 KiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

 

 

 

이상으로 failover&switchover까지 진행하였습니다.