1. 程式人生 > >在Azure雲上實現postgres主備切換

在Azure雲上實現postgres主備切換

  以下是工作上實現postgres主備切換功能所用到的程式碼和步驟,中間走了不少彎路,在此記錄下。所用到的作業系統為centos 7.5,安裝了兩臺伺服器,hostname為VM7的為Master,VM8則為Slave。

 

  1、安裝pg10

  vm7(Mater),vm8(Slave)均需安裝:

[root@springcloud-vm7 ~]# yum install –y https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm 
[root@springcloud-vm7 ~]# yum install postgresql10 -y
[root@springcloud-vm7 ~]# yum install postgresql10-server -y
[root@springcloud-vm7 ~]# systemctl enable postgresql-10
[root@springcloud-vm7 ~]# /usr/pgsql-10/bin/postgresql-10-setup initdb
[root@springcloud-vm7 ~]# systemctl start postgresql-10
[root@springcloud-vm7 ~]# systemctl status postgresql-10

 

  其他環境設定

  vm7(Mater),vm8(Slave)均需安裝:

# hosts
[root@springcloud-vm7 ~]# vi /etc/hosts
...
10.0.0.14 springcloud-vm7 vm7
10.0.0.15 springcloud-vm8 vm8

# disable selinux
[root@springcloud-vm7 ~]# sed -i '7s/=.*$/=disabled/' /etc/selinux/config
[root@springcloud-vm7 ~]# setenforce 0

# set timezone
[root@springcloud-vm7 ~]# timedatectl set-timezone Asia/Shanghai
# postgres⽤使用者環境
[root@springcloud-vm7 ~]# su - postgres
-bash-4.2$ vi ~/.bash_profile
。。。
PATH=$PATH:/usr/pgsql-10/bin;export PATH
-bash-4.2$ . ~/.bash_profile

#注意:~/.bash_profile改後為:
PGDATA=/usr/local/pgsql/data
PATH=/usr/local/pgsql/bin:$PATH
export PGDATA PATH

 

  安裝nginx充當給azure做探測的服務(probe):

[root@springcloud-vm7 ~]# yum install -y epel-release
[root@springcloud-vm7 ~]# yum install -y nginx
[root@springcloud-vm7 ~]# vi /etc/nginx/nginx.conf
...
listen 5999 default_server;   #修改監聽埠在5999上

 

  由於Azure上LB的floatingIP限制,它會將流量量導向probe成功的後端機器器,並且只能有⼀一臺probe成功。

  參考官⽅方例例⼦子,它是⽤用iptable來阻⽌止probe:https://github.com/Azure/azure-quickstart-templates/tree/master/haproxy-redundant-floatingip-ubuntu

 

  azure的LSB設定:

  注意:負載均衡的probe端⼝口設定成5999,floatingIP設定啟⽤用

 

  2、配置master

[root@springcloud-vm7 ~]# su – postgres

# 建立複製⽤使用者
-bash-4.2$ psql -c "create role repl replication login password 'postgres'"

# 建立⼀一個slot
-bash-4.2$ psql -c "select pg_create_physical_replication_slot('slot_vm7')"

pg_create_physical_replication_slot
-------------------------------------
(slot_vm7,)
(1 row)

# 配置引數
-bash-4.2$ cd $PGDATA
-bash-4.2$ vi postgresql.conf
。。。
listen_addresses = '*'
archive_mode = on
archive_command = 'cp -n %p $PGDATA/arch/%f'
log_timezone = 'Asia/Shanghai'
timezone = 'Asia/Shanghai'
。。。
# 建立arch⽬目錄
-bash-4.2$ mkdir $PGDATA/arch/

# 配置pg_hba.conf
-bash-4.2$ vi pg_hba.conf
。。。
host replication repl 10.0.0.0/24 md5

# 重啟
-bash-4.2$ pg_ctl restart


注意:如果是首次安裝,需要在防火牆中開放5432埠
#檢視各埠網路連線情況
[root@springcloud-vm7 ~]# netstat –na

#安裝iptables防火牆
[root@springcloud-vm7 ~]# yum install iptables-services 

#編輯iptables防火牆配置
[root@springcloud-vm7 ~]# vi /etc/sysconfig/iptables 
。。。
-A INPUT -m state --state NEW -m tcp -p tcp --dport 5432 -j ACCEPT

 

  3、配置slave

 [root@springcloud-vm8 ~]# su – postgres

# 直接⽤用repl⽤使用者備份到$PGDATA⽬目錄
-bash-4.2$ rm -rf /var/lib/pgsql/10/data
-bash-4.2$ /usr/pgsql-10/bin/pg_basebackup -R -Pv -h vm7 -U repl -D $PGDATA
Password: #postgres

pg_basebackup: initiating base backup, waiting for checkpoint to complete
pg_basebackup: checkpoint completed
pg_basebackup: write-ahead log start point: 0/2000028 on timeline 1
pg_basebackup: starting background WAL receiver
24421/24421 kB (100%), 1/1 tablespace
pg_basebackup: write-ahead log end point: 0/20000F8
pg_basebackup: waiting for background process to finish streaming ...
pg_basebackup: base backup completed

# 修改recovery.conf⽂檔案
-bash-4.2$ vi $PGDATA/recovery.conf

standby_mode = 'on'
primary_conninfo = 'user=repl password=postgres host=vm7 port=5432 sslmode=prefer sslcompression=1 krbsrvname=postgres target_session_attrs=any'
primary_slot_name = 'slot_vm7'
restore_command = 'cp $PGDATA/arch/%f %p'
archive_cleanup_command = 'pg_archivecleanup $PGDATA/arch %r'
recovery_target_timeline = 'latest'

# 重啟
-bash-4.2$ pg_ctl restart

 

  4、檢查

#主庫上檢查下:
-bash-4.2$ psql -xc "select * from pg_stat_replication"
-[ RECORD 1 ]----+------------------------------
pid | 90512
usesysid | 16384
usename | repl
application_name | walreceiver
client_addr | 10.0.0.11
client_hostname |
client_port | 44464
backend_start | 2019-07-09 18:36:25.005202+08
backend_xmin |
state | streaming
sent_lsn | 0/F0004A0
write_lsn | 0/F0004A0
flush_lsn | 0/F0004A0
replay_lsn | 0/F0004A0
write_lag |
flush_lag |
replay_lag |
sync_priority | 0
sync_state | async
-bash-4.2$ pg_controldata
...
-bash-4.2$ psql -xc "select * from pg_replication_slots"
...

#被庫上檢查:
-bash-4.2$ psql -xc "select * from pg_stat_wal_receiver"
...
-bash-4.2$ /usr/pgsql-10/bin/pg_controldata
...
-bash-4.2$ psql -xc "select pg_is_in_recovery()"
-[ RECORD 1 ]-----+--
pg_is_in_recovery | t

 

  5、安裝keepalived

  vm7(Mater),vm8(Slave)均需安裝:

[root@springcloud-vm7 data]# yum install keepalived -y
[root@springcloud-vm7 data]# systemctl enable keepalived
[root@springcloud-vm7 data]# systemctl start keepalived
[root@springcloud-vm7 data]# cd /etc/keepalived/
[root@springcloud-vm7 ~]# vi /etc/keepalived/keepalived.conf

! Configuration File for keepalived
global_defs {
notification_email {
[email protected]
}
notification_email_from [email protected]
smtp_server 127.0.0.1
smtp_connect_timeout 30
router_id PG_HA     #主備庫需要一致
}

vrrp_script chk_pg_alived {
script "/sbin/ss -ntlp4 | grep :5432 > /dev/null" # 探測端⼝口判斷資料庫存活,1分鐘失敗則認為失敗
interval 10
weight 20
fall 6
}

vrrp_instance VI_1 {
state MASTER # 主庫上填MASTER, 備庫上為BACKUP
interface eth0  # 填寫當前網絡卡名稱,可以用IP Ad命令檢視
virtual_router_id 61
priority 100 #備庫的優先順序設為90
advert_int 3
!nopreempt
preempt_delay 60
unicast_src_ip 10.0.0.14 # 雲主機只能使⽤用單播⽅方式,這⾥裡里填本機ip
unicast_peer {
10.0.0.15 #另⼀臺ip
}

authentication {
auth_type PASS
auth_pass 1111
}

virtual_ipaddress {
139.217.92.247  #虛擬ip
}

track_script {
chk_pg_alived
}

notify_master "/etc/keepalived/master.sh"
notify_backup "/etc/keepalived/backup.sh"
}

# 切換成master時會執⾏行行的指令碼,通過判斷資料庫狀態決定是否promote
[root@springcloud-vm7 keepalived]# vi master.sh

#!/bin/bash
export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin

# allow probe from azure
systemctl restart nginx
dbstate=`su - postgres -c "psql -Atc 'select pg_is_in_recovery()'"`
if [ $dbstate != "t" ]; then
exit 0
fi

# promote the slave to master
su - postgres -c "/usr/pgsql-10/bin/pg_ctl promote"
sleep 5

echo "select pg_create_physical_replication_slot('slot_vm7')" | su – postgres -c "psql" #注意slot的名字主備庫要相應修改⼀⼀對應(Slave中改為slot_vm8)


#儲存後修改指令碼檔案許可權
[root@springcloud-vm7 keepalived]# chmod 777 master.sh

#切換成slave時會執⾏行行的指令碼
[root@springcloud-vm7 keepalived]# vi backup.sh

#!/bin/bash
export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin
# block probe from azure
systemctl stop nginx
# check pg state
dbstate=`su - postgres -c "psql -Atc 'select pg_is_in_recovery()'"`
if [ $dbstate = "t" ]; then
exit 0
fi
# change master to slave
if [ ! -f /var/lib/pgsql/10/data/recovery.conf ] ; then
cat > /var/lib/pgsql/10/data/recovery.conf << EOF
standby_mode = 'on'
primary_conninfo = 'user=repl password=postgres host=vm8 port=5432 sslmode=prefer sslcompression=1 krbsrvname=postgres target_session_attrs=any'  #注意slot的名字主備庫要相應修改⼀⼀對應(Slave中改為vm7)
primary_slot_name = 'slot_vm8' #注意slot的名字主備庫要相應修改⼀⼀對應(Slave中改為slot_vm7)
restore_command = 'cp $PGDATA/arch/%f %p'
archive_cleanup_command = 'pg_archivecleanup $PGDATA/arch %r'
recovery_target_timeline = 'latest'
EOF
fi
sleep 60
su - postgres -c "/usr/pgsql-10/bin/pg_ctl stop"
systemctl start postgresql-10

#注意slot的名字主備庫要相應修改⼀⼀對應

#儲存後修改指令碼檔案許可權
[root@springcloud-vm7 keepalived]# chmod 777 backup.sh

 

  6、測試

  6.1、  關閉Master伺服器

 

 

  6.2、 在Slave伺服器中監控keepalived:  journalctl -f -u keepalived

 

  6.3、  在Slave伺服器中查詢postgres日誌:tail -fn20 /var/lib/pgsql/10/data/log/postgresql-Tue.log

 

  6.4、 在Slave伺服器中監控IP是否發生了漂移: ip a

 

 

  6.5、  在Slave伺服器中檢視主備狀態:  psql -xc "select pg_is_in_recovery()"

  

 

  6.6、在Slave伺服器中建立新表:psql -c 'create table t2 (id integer)'

 

 

  6.7、  Master伺服器啟動後再看主備狀態:psql -xc "select pg_is_in_recovery()"

 

&n