系統初始化
## 配置host解析各主機
cat << EOF > /etc/hosts
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
172.26.105.254 node01.master.k8s.test.czlun.com node01
172.26.105.253 node02.master.k8s.test.czlun.com node02
172.26.105.252 node03.master.k8s.test.czlun.com node03
172.26.105.255 proxy.master.k8s.test.czlun.com
EOF
## 設定主機名
hostnamectl set-hostname $(grep $(ip a list eth0 | grep "inet "| awk -F "[ /]"+ '{print $3}') /etc/hosts | awk '{print $2}')
## 關閉selinux
setenforce 0
sed -i 's/^SELINUX=enforcing$/SELINUX=permissive/' /etc/selinux/config
## 關閉swap
echo 'swapoff -a' >> /etc/rc.local
## 修改內核參數,開啟數據包轉發,防止iptables被繞過而導致流量無法正確路由的問題
cat << EOF >> /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
sysctl --system
## 內核中加載br_netfilter模塊
## 對內核版本有要求,可對內核進行更新
## yum -y install kernel kernel-devel kernel-headers
if [[ `lsmod | grep br_netfilter` == "" ]]; then
echo br_netfilter module is loading
modprobe br_netfilter
if [[ `lsmod | grep br_netfilter` == "" ]]; then
echo Failed to load br_netfilter module
else
echo Succeed to load br_netfilter module
fi
else
echo br_netfilter module is loaded
fi
可選:
- 安裝ipvsadm 為kube-proxy提供ipvs支持。
yum -y install ipvsadm ipset
# 臨時生效
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
# 永久生效
cat > /etc/sysconfig/modules/ipvs.modules <<EOF
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
EOF
安裝k8s集群
通過kubeadm,采用堆疊方案(etcd與control-plane不分離)創建高可用kubernetes集群
kubernetes版本兼容性
對kube-1.16來說,經過驗證兼容的docker版本為 1.13.1, 17.03, 17.06, 17.09, 18.06, 18.09
https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.16.md#unchanged
此次部署版本選型為
-
k8s 1.16.10
- 1.16.13 緊急升級內容:修復了從受感染節點到群集的權限升級
- 1.16.13 遇到bug,controler-manager 和scheduler無法順利啟動導致liveness健康檢查失敗故一直重啟。所以本次實施選擇次版本。
1 actual_state_of_world.go:506] Failed to update statusUpdateNeeded field in actual state of world: Failed to set statusUpdateNeeded to needed true, because nodeName="node01.master.k8s.test.czlun.com" does not exist
-
docker 18.09.3
實施安裝
下載aliyun docker-ce repo
curl https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo -so /etc/yum.repos.d/docker-ce.repo
下載aliyun k8s repo
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
安裝
## 清空並重建yum repo緩存
yum clean all && yum makecache
## remove old version
rpm -e $(rpm -qa | grep -E "^docker|^kube" | tr -t '\n' ' ')
### yum install
yum install -y --nogpgcheck kubelet-1.16.10 kubeadm-1.16.10 kubectl-1.16.10 docker-ce-18.09.9 docker-ce-cli-18.09.9
### reload systemd
systemctl enable --now kubelet docker
應用調優
修改docker配置
# cgroup驅動設定為systemd
# 設定阿里雲鏡像registry
mkdir -p /etc/docker
cat << EOF > /etc/docker/daemon.json
{
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
},
"storage-driver": "overlay2",
"storage-opts": [
"overlay2.override_kernel_check=true"
],
"registry-mirrors": ["https://ydjenbck.mirror.aliyuncs.com"]
}
EOF
systemctl restart docker
初始化集群
創建負載均衡節點
這里使用haproxy以tcp模式作為API Server負載均衡入口,監聽端口為12567
# install haproxy
yum install -y haproxy
# backup haproxy config
cp /etc/haproxy/haproxy.cfg{,.bak}
# 配置haproxy
cat << 'EOF' > /etc/haproxy/haproxy.cfg
global
log 127.0.0.1 local2
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4000
user haproxy
group haproxy
daemon
stats socket /var/lib/haproxy/stats
defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000
listen stats 0.0.0.0:12345
mode http
log global
maxconn 10
stats enable
stats hide-version
stats refresh 5s
stats show-node
stats uri /stats
frontend kube-api-https
bind 0.0.0.0:12567
mode tcp
default_backend kube-api-server
backend kube-api-server
balance roundrobin
mode tcp
server kube-node1 172.26.105.252:6443 check
server kube-node2 172.26.105.253:6443 check
server kube-node3 172.26.105.254:6443 check
EOF
# 設定為開機自啟並立即啟動haproxy
systemctl enable --now haproxy
初始化首個控制平面
- 初始化控制平面
kubeadm init \
--pod-network-cidr 192.168.0.0/16 \
--upload-certs \
--control-plane-endpoint proxy.master.k8s.test.czlun.com:12567 \
--kubernetes-version 1.16.10 \
--image-repository registry.aliyuncs.com/google_containers
# --pod-network-cidr 指定pod網絡
# --upload-certs 將證書上傳
# --control-plane-endpoint 指定控制平面地址,為上方配置的負載均衡節點
# --kubernetes-version 指定k8s版本
# --image-repository 初始化過程中的鏡像倉庫
執行完成后,應該會得到類似的結果並保存。
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of the control-plane node running the following command on each as root:
kubeadm join proxy.master.k8s.test.czlun.com:12567 --token achhjj.4cr1vm4z1k678fqm \
--discovery-token-ca-cert-hash sha256:0731d2cf02babef1d868b1109983c8cc8c9a0d2ed4dc832c0ddb74a71376cbd3 \
--control-plane --certificate-key e345ff76dede23fb9d0f714c99143d50c794a89bce7f1e215df2248e9fe04a7f
Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join proxy.master.k8s.test.czlun.com:12567 --token achhjj.4cr1vm4z1k678fqm \
--discovery-token-ca-cert-hash sha256:0731d2cf02babef1d868b1109983c8cc8c9a0d2ed4dc832c0ddb74a71376cbd3
- 根據提示執行以下命令開始使用集群
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
-
為集群部署pod網絡,此處選用calico。
使用Kubernetes API數據存儲庫(不超過50個節點)安裝Calico
-
下載Kubernetes API數據存儲區的Calico清單文件。
curl https://docs.projectcalico.org/manifests/calico.yaml -O
-
在清單中取消對CALICO_IPV4POOL_CIDR變量的注釋,並將其設置為與所選Pod CIDR相同的值。
-
應用清單文件
kubectl apply -f calico.yaml
-
查看calico組件的啟動狀況與節點狀況
kubectl get pod -n kube-system -w kubectl get node
-
注意:
-
主控制平面的證書被加密並上傳到
kubeadm-certs
密鑰中。 -
要重新生成引導令牌(--token),生成集群所需的完整 'kubeadm join' 參數,請在已加入集群節點的控制平面上使用以下命令:。
kubeadm token create --print-join-command
-
要重新上傳證書並生成新的解密密鑰,請在已加入集群節點的控制平面上使用以下命令:
kubeadm init phase upload-certs --upload-certs
-
您還可以在
init
期間指定自定義的--certificate-key
,以后可以由join
使用。 要生成這樣的密鑰,可以使用以下命令:kubeadm alpha certs certificate-key
可選:
- 修改kube-proxy mode為ipvs。所有k8s節點均需要開啟ipvs支持,參考此處。
# 將mode修改為ipvs
kubectl edit configmap kube-proxy -n kube-system
...
mode: "ipvs"
...
其余控制平面加入集群
對於剩下的所有控制平面節點,都需要執行以下步驟以控制平面的角色在集群中注冊。
- 執行首個控制平面初始化的信息輸出中提供的join 命令。
kubeadm join proxy.master.k8s.test.czlun.com:12567 --token achhjj.4cr1vm4z1k678fqm \
--discovery-token-ca-cert-hash sha256:0731d2cf02babef1d868b1109983c8cc8c9a0d2ed4dc832c0ddb74a71376cbd3 \
--control-plane --certificate-key e345ff76dede23fb9d0f714c99143d50c794a89bce7f1e215df2248e9fe04a7f
--control-plane
選項表示注冊為控制平面角色,而不是worker節點。--certificate-key
使用指定的密鑰解密從控制平面下載的證書。
執行完成后,應該會得到類似的結果並保存。
This node has joined the cluster and a new control plane instance was created:
* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
- 根據提示執行以下命令開始使用集群
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
- 執行以下命令查看該節點加入集群狀況
kubectl get nodes
集群部署完成效果。
[root@node01 ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
node01.master.k8s.test.czlun.com Ready master 2d20h v1.16.10
node02.master.k8s.test.czlun.com Ready master 2d19h v1.16.10
node03.master.k8s.test.czlun.com Ready master 2d19h v1.16.10
[root@node01 ~]# kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
calico-kube-controllers-59d85c5c84-7g4h6 1/1 Running 4 2d20h
calico-node-9nbk7 1/1 Running 1 2d19h
calico-node-krffv 1/1 Running 2 2d20h
calico-node-ph89w 1/1 Running 1 2d19h
coredns-58cc8c89f4-d7zch 1/1 Running 1 2d20h
coredns-58cc8c89f4-zg865 1/1 Running 1 2d20h
etcd-node01.master.k8s.test.czlun.com 1/1 Running 1 2d20h
etcd-node02.master.k8s.test.czlun.com 1/1 Running 1 2d19h
etcd-node03.master.k8s.test.czlun.com 1/1 Running 1 2d19h
kube-apiserver-node01.master.k8s.test.czlun.com 1/1 Running 1 2d20h
kube-apiserver-node02.master.k8s.test.czlun.com 1/1 Running 1 2d19h
kube-apiserver-node03.master.k8s.test.czlun.com 1/1 Running 1 2d19h
kube-controller-manager-node01.master.k8s.test.czlun.com 1/1 Running 2 2d20h
kube-controller-manager-node02.master.k8s.test.czlun.com 1/1 Running 1 2d19h
kube-controller-manager-node03.master.k8s.test.czlun.com 1/1 Running 1 2d19h
kube-proxy-5v2v6 1/1 Running 1 2d20h
kube-proxy-mdmjv 1/1 Running 1 2d19h
kube-proxy-vt6kh 1/1 Running 1 2d19h
kube-scheduler-node01.master.k8s.test.czlun.com 1/1 Running 3 2d20h
kube-scheduler-node02.master.k8s.test.czlun.com 1/1 Running 1 2d19h
kube-scheduler-node03.master.k8s.test.czlun.com 1/1 Running 1 2d19h