搭建高可用Kubernetes叢集實戰
阿新 • • 發佈:2022-04-10
1.安裝要求
在開始之前,部署Kubernetes叢集機器需要滿足以下幾個條件:
- 一臺或多臺機器,作業系統 CentOS7.x-86_x64
- 硬體配置:2GB或更多RAM,2個CPU或更多CPU,硬碟30GB或更多
- 可以訪問外網,需要拉取映象,如果伺服器不能上網,需要提前下載映象並匯入節點
- 禁止swap分割槽
角色 | IP |
master1 | 192.168.0.155 |
master2 | 192.168.0.156 |
node1 | 192.168.0.157 |
VIP(虛擬ip) | 192.168.0.158 |
1 # 關閉防火牆 2 systemctl stop firewalld 3 systemctl disable firewalld4 5 # 關閉selinux 6 sed -i 's/enforcing/disabled/' /etc/selinux/config # 永久 7 setenforce 0 # 臨時 8 9 # 關閉swap 10 swapoff -a # 臨時 11 sed -ri 's/.swap./#&/' /etc/fstab # 永久 12 13 # 根據規劃設定主機名 14 hostnamectl set-hostname <hostname> 15 16 # 在所有機器新增hosts 17 cat >> /etc/hosts << EOF 18 192.168.0.158 master.k8s.io k8s-vip 19 192.168.0.155 master01.k8s.io master1 20 192.168.0.156 master02.k8s.io master2 21 192.168.0.157 node01.k8s.io node1 22 EOF 23 24 # 將橋接的IPv4流量傳遞到iptables的鏈 25 cat <<EOF | tee /etc/modules-load.d/k8s.conf 26 br_netfilter 27 EOF 28 29 cat > /etc/sysctl.d/k8s.conf << EOF30 net.bridge.bridge-nf-call-ip6tables = 1 31 net.bridge.bridge-nf-call-iptables = 1 32 EOF 33 sysctl --system # 生效 34 35 # 時間同步 36 yum install ntpdate -y 37 ntpdate time.windows.com 38 */5 * * * * /sbin/ntpdate time.windows.com > /dev/null 2>&1
3. 所有master節點部署keepalived 3.1 安裝相關包和keepalived
1 yum install -y conntrack-tools libseccomp libtool-ltdl 2 yum install -y keepalived3.2配置master節點 master1節點配置
1 cat > /etc/keepalived/keepalived.conf <<EOF 2 ! Configuration File for keepalived 3 4 global_defs { 5 router_id k8s 6 } 7 8 vrrp_script check_haproxy { 9 script "killall -0 haproxy" 10 interval 3 11 weight -2 12 fall 10 13 rise 2 14 } 15 16 vrrp_instance VI_1 { 17 state MASTER 18 interface ens33 19 virtual_router_id 51 20 priority 250 21 advert_int 1 22 authentication { 23 auth_type PASS 24 auth_pass ceb1b3ec013d66163d6ab 25 } 26 virtual_ipaddress { 27 192.168.0.158 28 } 29 track_script { 30 check_haproxy 31 } 32 33 } 34 EOFmaster2節點配置
1 cat > /etc/keepalived/keepalived.conf <<EOF 2 ! Configuration File for keepalived 3 4 global_defs { 5 router_id k8s 6 } 7 8 vrrp_script check_haproxy { 9 script "killall -0 haproxy" 10 interval 3 11 weight -2 12 fall 10 13 rise 2 14 } 15 16 vrrp_instance VI_1 { 17 state BACKUP 18 interface ens33 19 virtual_router_id 51 20 priority 200 21 advert_int 1 22 authentication { 23 auth_type PASS 24 auth_pass ceb1b3ec013d66163d6ab 25 } 26 virtual_ipaddress { 27 192.168.0.158 28 } 29 track_script { 30 check_haproxy 31 } 32 33 } 34 EOF
編寫健康檢測指令碼
1 [root@k8s-master01 ~]# vim /etc/keepalived/check-apiserver.sh 2 #!/bin/bash 3 4 function check_apiserver(){ 5 for ((i=0;i<5;i++)) 6 do 7 apiserver_job_id=${pgrep kube-apiserver} 8 if [[ ! -z ${apiserver_job_id} ]];then 9 return 10 else 11 sleep 2 12 fi 13 apiserver_job_id=0 14 done 15 } 16 17 # 1->running 0->stopped 18 check_apiserver 19 if [[ $apiserver_job_id -eq 0 ]];then 20 /usr/bin/systemctl stop keepalived 21 exit 1 22 else 23 exit 0 24 fi
1 ### 指令碼2 2 [root@k8s-master01 ~]# vim /etc/keepalived/check_apiserver.sh 3 #!/bin/sh 4 5 errorExit() { 6 echo "*** $*" 1>&2 7 exit 1 8 } 9 10 curl --silent --max-time 2 --insecure https://localhost:16443/ -o /dev/null || errorExit "Error GET https://localhost:16443/" 11 if ip addr | grep -q 192.168.0.158; then 12 curl --silent --max-time 2 --insecure https://172.20.111.22:16443/ -o /dev/null || errorExit "Error GET https://172.20.111.22:16443/" 13 fi
3.3 啟動和檢查 在兩臺master節點都執行
1 # 啟動keepalived 2 systemctl start keepalived.service 3 設定開機啟動 4 systemctl enable keepalived.service 5 # 檢視啟動狀態 6 systemctl status keepalived.service啟動後檢視master1的網絡卡資訊
1 ip a s ens33
4. 部署haproxy
4.1 安裝
1 yum install -y haproxy4.2 配置 兩臺master節點的配置均相同,配置中聲明瞭後端代理的兩個master節點伺服器,指定了haproxy執行的埠為16443等,因此16443埠為叢集的入口
1 cat > /etc/haproxy/haproxy.cfg << EOF 2 #--------------------------------------------------------------------- 3 # Global settings 4 #--------------------------------------------------------------------- 5 global 6 # to have these messages end up in /var/log/haproxy.log you will 7 # need to: 8 # 1) configure syslog to accept network log events. This is done 9 # by adding the '-r' option to the SYSLOGD_OPTIONS in 10 # /etc/sysconfig/syslog 11 # 2) configure local2 events to go to the /var/log/haproxy.log 12 # file. A line like the following can be added to 13 # /etc/sysconfig/syslog 14 # 15 # local2.* /var/log/haproxy.log 16 # 17 log 127.0.0.1 local2 18 19 chroot /var/lib/haproxy 20 pidfile /var/run/haproxy.pid 21 maxconn 4000 22 user haproxy 23 group haproxy 24 daemon 25 26 # turn on stats unix socket 27 stats socket /var/lib/haproxy/stats 28 #--------------------------------------------------------------------- 29 # common defaults that all the 'listen' and 'backend' sections will 30 # use if not designated in their block 31 #--------------------------------------------------------------------- 32 defaults 33 mode http 34 log global 35 option httplog 36 option dontlognull 37 option http-server-close 38 option forwardfor except 127.0.0.0/8 39 option redispatch 40 retries 3 41 timeout http-request 10s 42 timeout queue 1m 43 timeout connect 10s 44 timeout client 1m 45 timeout server 1m 46 timeout http-keep-alive 10s 47 timeout check 10s 48 maxconn 3000 49 #--------------------------------------------------------------------- 50 # kubernetes apiserver frontend which proxys to the backends 51 #--------------------------------------------------------------------- 52 frontend kubernetes-apiserver 53 mode tcp 54 bind *:16443 55 option tcplog 56 default_backend kubernetes-apiserver 57 #--------------------------------------------------------------------- 58 # round robin balancing between the various backends 59 #--------------------------------------------------------------------- 60 backend kubernetes-apiserver 61 mode tcp 62 balance roundrobin 63 server master01.k8s.io 192.168.0.155:6443 check 64 server master02.k8s.io 192.168.0.156:6443 check 65 #--------------------------------------------------------------------- 66 # collection haproxy statistics message 67 #--------------------------------------------------------------------- 68 listen stats 69 bind *:1080 70 stats auth admin:awesomePassword 71 stats refresh 5s 72 stats realm HAProxy\ Statistics 73 stats uri /admin?stats 74 EOF4.3 啟動和檢查 兩臺master都啟動
1 # 設定開機啟動 2 $ systemctl enable haproxy 3 # 開啟haproxy 4 $ systemctl start haproxy 5 # 檢視啟動狀態 6 $ systemctl status haproxy檢查埠
1 netstat -lntup|grep haproxy5. 所有節點安裝Docker/kubeadm/kubelet Kubernetes預設CRI(容器執行時)為Docker,因此先安裝Docker。 5.1 安裝Docker
1 $ yum install -y yum-utils 2 $ yum-config-manager \ 3 --add-repo \ 4 http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo 5 $ yum -y install docker-ce-20.10.10-3.el7 6 $ systemctl enable docker && systemctl start docker 7 $ docker --version 8 Docker version 20.10.12, build e91ed57
1 $ cat > /etc/docker/daemon.json << EOF 2 { 3 "registry-mirrors": ["https://b9pmyelo.mirror.aliyuncs.com"] 4 } 5 EOF
5.2 新增阿里雲YUM軟體源
1 $ cat > /etc/yum.repos.d/kubernetes.repo << EOF 2 [kubernetes] 3 name=Kubernetes 4 baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64 5 enabled=1 6 gpgcheck=0 7 repo_gpgcheck=0 8 gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg 9 EOF5.3 安裝kubeadm,kubelet和kubectl 由於版本更新頻繁,這裡指定版本號部署:
1 yum install -y kubelet-1.20.13 kubeadm-1.20.13 kubectl-1.20.13 2 systemctl enable kubelet
6. 部署Kubernetes Master 6.1 建立kubeadm配置檔案 在具有vip的master上操作,這裡為master1
1 $ mkdir -p /usr/local/kubernetes/manifests 2 $ cd /usr/local/kubernetes/manifests/ 3 $ vi kubeadm-config.yaml 4 apiServer: 5 certSANs: 6 - master1 7 - master2 8 - master.k8s.io 9 - 192.168.0.158 10 - 192.168.0.155 11 - 192.168.0.156 12 - 127.0.0.1 13 extraArgs: 14 authorization-mode: Node,RBAC 15 timeoutForControlPlane: 4m0s 16 apiVersion: kubeadm.k8s.io/v1beta2 17 certificatesDir: /etc/kubernetes/pki 18 clusterName: kubernetes 19 controlPlaneEndpoint: "master.k8s.io:16443" 20 controllerManager: {} 21 dns: 22 type: CoreDNS 23 etcd: 24 local: 25 dataDir: /var/lib/etcd 26 imageRepository: registry.aliyuncs.com/google_containers 27 kind: ClusterConfiguration 28 kubernetesVersion: v1.20.13 29 networking: 30 dnsDomain: cluster.local 31 podSubnet: 10.244.0.0/16 32 serviceSubnet: 10.1.0.0/16 33 scheduler: {}6.2 在master1節點執行
1 # master初始化 2 $ kubeadm init --config kubeadm-config.yaml
1 Your Kubernetes control-plane has initialized successfully! 2 3 To start using your cluster, you need to run the following as a regular user: 4 5 mkdir -p $HOME/.kube 6 sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config 7 sudo chown $(id -u):$(id -g) $HOME/.kube/config 8 9 Alternatively, if you are the root user, you can run: 10 11 export KUBECONFIG=/etc/kubernetes/admin.conf 12 13 You should now deploy a pod network to the cluster. 14 Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at: 15 https://kubernetes.io/docs/concepts/cluster-administration/addons/ 16 17 You can now join any number of control-plane nodes by copying certificate authorities 18 and service account keys on each node and then running the following as root: 19 20 kubeadm join master.k8s.io:16443 --token ikc2c3.dr1hpaznmn1jljca \ 21 --discovery-token-ca-cert-hash sha256:fe47660c44cdae8b870a67d50920bea9306ec6a81fdebcaf70e4d8c86619c136 \ 22 --control-plane 23 24 Then you can join any number of worker nodes by running the following on each as root: 25 26 kubeadm join master.k8s.io:16443 --token ikc2c3.dr1hpaznmn1jljca \ 27 --discovery-token-ca-cert-hash sha256:fe47660c44cdae8b870a67d50920bea9306ec6a81fdebcaf70e4d8c86619c136按照提示配置環境變數,使用kubectl工具:
1 $ mkdir -p $HOME/.kube 2 $ sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config 3 $ sudo chown $(id -u):$(id -g) $HOME/.kube/config 4 $ kubectl get nodes 5 $ kubectl get pods -n kube-system按照提示儲存以下內容,一會要使用:
1 kubeadm join master.k8s.io:16443 --token ikc2c3.dr1hpaznmn1jljca \ 2 --discovery-token-ca-cert-hash sha256:fe47660c44cdae8b870a67d50920bea9306ec6a81fdebcaf70e4d8c86619c136 \ 3 --control-plane檢視叢集狀態
1 kubectl get cs 2 kubectl get pods -n kube-system7.安裝叢集網路 從官方地址獲取到flannel的yaml,在master1上執行
1 mkdir flannel & cd flannel 2 wget -c https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml 3 # 如無法下載,檢視附錄提供檔案安裝flannel網路
1 kubectl apply -f kube-flannel.yml
檢查
1 kubectl get pods -n kube-system
8、master2節點加入叢集
8.1 複製金鑰及相關檔案
從master1複製金鑰及相關檔案到master2
1 # ssh root@192.168.0.156 mkdir -p /etc/kubernetes/pki/etcd 2 # scp /etc/kubernetes/admin.conf root@192.168.0.156:/etc/kubernetes 3 # scp /etc/kubernetes/pki/{ca.*,sa.*,front-proxy-ca.*} root@192.168.0.156:/etc/kubernetes/pki 4 # scp /etc/kubernetes/pki/etcd/ca.* root@192.168.0.156:/etc/kubernetes/pki/etcd8.2 master2加入叢集 執行在master1上init後輸出的join命令,需要帶上引數--control-plane表示把master控制節點加入叢集
1 kubeadm join master.k8s.io:16443 --token ikc2c3.dr1hpaznmn1jljca \ 2 --discovery-token-ca-cert-hash sha256:fe47660c44cdae8b870a67d50920bea9306ec6a81fdebcaf70e4d8c86619c136 \ 3 --control-plane檢查狀態
1 kubectl get node 2 kubectl get pods --all-namespaces5. 加入Kubernetes Node 在node1上執行 向叢集新增新節點,執行在kubeadm init輸出的kubeadm join命令:
1 kubeadm join master.k8s.io:16443 --token ikc2c3.dr1hpaznmn1jljca \ 2 --discovery-token-ca-cert-hash sha256:fe47660c44cdae8b870a67d50920bea9306ec6a81fdebcaf70e4d8c86619c136叢集網路重新安裝,因為添加了新的node節點 檢查狀態
1 kubectl get node 2 kubectl get pods --all-namespaces7. 測試kubernetes叢集 在Kubernetes叢集中建立一個pod,驗證是否正常執行:
1 kubectl create deployment nginx --image=nginx 2 kubectl expose deployment nginx --port=80 --type=NodePort 3 kubectl get pod,svc訪問地址:http://NodeIP:Port 8.附錄 kube-flannel.yml
1 --- 2 apiVersion: policy/v1beta1 3 kind: PodSecurityPolicy 4 metadata: 5 name: psp.flannel.unprivileged 6 annotations: 7 seccomp.security.alpha.kubernetes.io/allowedProfileNames: docker/default 8 seccomp.security.alpha.kubernetes.io/defaultProfileName: docker/default 9 apparmor.security.beta.kubernetes.io/allowedProfileNames: runtime/default 10 apparmor.security.beta.kubernetes.io/defaultProfileName: runtime/default 11 spec: 12 privileged: false 13 volumes: 14 - configMap 15 - secret 16 - emptyDir 17 - hostPath 18 allowedHostPaths: 19 - pathPrefix: "/etc/cni/net.d" 20 - pathPrefix: "/etc/kube-flannel" 21 - pathPrefix: "/run/flannel" 22 readOnlyRootFilesystem: false 23 # Users and groups 24 runAsUser: 25 rule: RunAsAny 26 supplementalGroups: 27 rule: RunAsAny 28 fsGroup: 29 rule: RunAsAny 30 # Privilege Escalation 31 allowPrivilegeEscalation: false 32 defaultAllowPrivilegeEscalation: false 33 # Capabilities 34 allowedCapabilities: ['NET_ADMIN'] 35 defaultAddCapabilities: [] 36 requiredDropCapabilities: [] 37 # Host namespaces 38 hostPID: false 39 hostIPC: false 40 hostNetwork: true 41 hostPorts: 42 - min: 0 43 max: 65535 44 # SELinux 45 seLinux: 46 # SELinux is unsed in CaaSP 47 rule: 'RunAsAny' 48 --- 49 kind: ClusterRole 50 apiVersion: rbac.authorization.k8s.io/v1 51 metadata: 52 name: flannel 53 rules: 54 - apiGroups: ['extensions'] 55 resources: ['podsecuritypolicies'] 56 verbs: ['use'] 57 resourceNames: ['psp.flannel.unprivileged'] 58 - apiGroups: 59 - "" 60 resources: 61 - pods 62 verbs: 63 - get 64 - apiGroups: 65 - "" 66 resources: 67 - nodes 68 verbs: 69 - list 70 - watch 71 - apiGroups: 72 - "" 73 resources: 74 - nodes/status 75 verbs: 76 - patch 77 --- 78 kind: ClusterRoleBinding 79 apiVersion: rbac.authorization.k8s.io/v1 80 metadata: 81 name: flannel 82 roleRef: 83 apiGroup: rbac.authorization.k8s.io 84 kind: ClusterRole 85 name: flannel 86 subjects: 87 - kind: ServiceAccount 88 name: flannel 89 namespace: kube-system 90 --- 91 apiVersion: v1 92 kind: ServiceAccount 93 metadata: 94 name: flannel 95 namespace: kube-system 96 --- 97 kind: ConfigMap 98 apiVersion: v1 99 metadata: 100 name: kube-flannel-cfg 101 namespace: kube-system 102 labels: 103 tier: node 104 app: flannel 105 data: 106 cni-conf.json: | 107 { 108 "cniVersion": "0.2.0", 109 "name": "cbr0", 110 "plugins": [ 111 { 112 "type": "flannel", 113 "delegate": { 114 "hairpinMode": true, 115 "isDefaultGateway": true 116 } 117 }, 118 { 119 "type": "portmap", 120 "capabilities": { 121 "portMappings": true 122 } 123 } 124 ] 125 } 126 net-conf.json: | 127 { 128 "Network": "10.244.0.0/16", 129 "Backend": { 130 "Type": "vxlan" 131 } 132 } 133 --- 134 apiVersion: apps/v1 135 kind: DaemonSet 136 metadata: 137 name: kube-flannel-ds-amd64 138 namespace: kube-system 139 labels: 140 tier: node 141 app: flannel 142 spec: 143 selector: 144 matchLabels: 145 app: flannel 146 template: 147 metadata: 148 labels: 149 tier: node 150 app: flannel 151 spec: 152 affinity: 153 nodeAffinity: 154 requiredDuringSchedulingIgnoredDuringExecution: 155 nodeSelectorTerms: 156 - matchExpressions: 157 - key: beta.kubernetes.io/os 158 operator: In 159 values: 160 - linux 161 - key: beta.kubernetes.io/arch 162 operator: In 163 values: 164 - amd64 165 hostNetwork: true 166 tolerations: 167 - operator: Exists 168 effect: NoSchedule 169 serviceAccountName: flannel 170 initContainers: 171 - name: install-cni 172 image: easzlab/flannel:v0.15.1 173 command: 174 - cp 175 args: 176 - -f 177 - /etc/kube-flannel/cni-conf.json 178 - /etc/cni/net.d/10-flannel.conflist 179 volumeMounts: 180 - name: cni 181 mountPath: /etc/cni/net.d 182 - name: flannel-cfg 183 mountPath: /etc/kube-flannel/ 184 containers: 185 - name: kube-flannel 186 image: easzlab/flannel:v0.15.1 187 command: 188 - /opt/bin/flanneld 189 args: 190 - --ip-masq 191 - --kube-subnet-mgr 192 resources: 193 requests: 194 cpu: "100m" 195 memory: "50Mi" 196 limits: 197 cpu: "100m" 198 memory: "50Mi" 199 securityContext: 200 privileged: false 201 capabilities: 202 add: ["NET_ADMIN"] 203 env: 204 - name: POD_NAME 205 valueFrom: 206 fieldRef: 207 fieldPath: metadata.name 208 - name: POD_NAMESPACE 209 valueFrom: 210 fieldRef: 211 fieldPath: metadata.namespace 212 volumeMounts: 213 - name: run 214 mountPath: /run/flannel 215 - name: flannel-cfg 216 mountPath: /etc/kube-flannel/ 217 volumes: 218 - name: run 219 hostPath: 220 path: /run/flannel 221 - name: cni 222 hostPath: 223 path: /etc/cni/net.d 224 - name: flannel-cfg 225 configMap: 226 name: kube-flannel-cfgkube-flannel.yml