1. 簡介
測試環境Kubernetes 1.14.2版本高可用搭建文檔,搭建方式為kubeadm
2. 服務器版本和架構信息
系統版本:CentOS Linux release 7.6.1810 (Core)
內核:4.4.184-1.el7.elrepo.x86_64 注意:有可能后面安裝的內核版本高於此版本
Kubernetes: v1.14.2
Docker-ce: 18.06
網絡組件:calico
硬件配置:16核64G
Keepalived保證apiserever服務器的IP高可用
Haproxy實現apiserver的負載均衡
3. 服務器角色規划
一定注意對應自己的服務器IP和主機名
master01/02節點上面部署了kubelet、keepalived、haproxy、controllmanager、apiserver、scheduler、docker、kube-proxy、calico組件
master03節點上面部署了kubelet、controllmanager、apiserver、scheduler、docker、kube-proxy、calico組件
node01/node02節點上面部署了kubelet、kube-proxy、docker、calico組件
其中除了kubelet和docker組件,其他組件都是以靜態pod模式存在
節點名稱 | 角色 | IP | 安裝軟件 |
---|---|---|---|
負載VIP | VIP | 192.168.4.110 | |
master-01 | master | 192.168.4.129 | kubeadm、kubelet、kubectl、docker、haproxy、keepalived |
master-02 | master | 192.168.4.130 | kubeadm、kubelet、kubectl、docker、haproxy、keepalived |
master-03 | master | 192.168.4.133 | kubeadm、kubelet、kubectl、docker |
node-01 | node | 192.168.4.128 | kubeadm、kubelet、kubectl、docker |
node-03 | node | 192.168.4.132 | kubeadm、kubelet、kubectl、docker |
service網段 | 10.209.0.0/16 |
4. 服務器初始化
4.1 關閉Selinux/firewalld/iptables(所有機器執行)
setenforce 0 \
&& sed -i 's/^SELINUX=.*$/SELINUX=disabled/' /etc/selinux/config \
&& getenforce
systemctl stop firewalld \
&& systemctl daemon-reload \
&& systemctl disable firewalld \
&& systemctl daemon-reload \
&& systemctl status firewalld
yum install -y iptables-services \
&& systemctl stop iptables \
&& systemctl disable iptables \
&& systemctl status iptables
4.2 為每台服務器添加host解析記錄(所有機器執行)
cat >>/etc/hosts<<EOF
192.168.4.129 master01
192.168.4.130 master02
192.168.4.133 master03
192.168.4.128 node01
192.168.4.132 node03
EOF
4.3 更換阿里源(所有機器執行)
yum install wget -y
cp -r /etc/yum.repos.d /etc/yum.repos.d.bak
rm -f /etc/yum.repos.d/*.repo
wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo \
&& wget -O /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo
yum clean all && yum makecache
4.4 設置limits.conf(所有機器執行)
cat >> /etc/security/limits.conf <<EOF
# End of file
* soft nproc 10240000
* hard nproc 10240000
* soft nofile 10240000
* hard nofile 10240000
EOF
4.5 設置sysctl.conf(所有機器執行)
[ ! -e "/etc/sysctl.conf_bk" ] && /bin/mv /etc/sysctl.conf{,_bk} \
&& cat > /etc/sysctl.conf << EOF
fs.file-max=20480000
fs.nr_open=20480000
net.ipv4.tcp_max_tw_buckets = 180000
net.ipv4.tcp_sack = 1
net.ipv4.tcp_window_scaling = 1
net.ipv4.tcp_rmem = 4096 87380 4194304
net.ipv4.tcp_wmem = 4096 16384 4194304
net.ipv4.tcp_max_syn_backlog = 16384
net.core.netdev_max_backlog = 32768
net.core.somaxconn = 32768
net.core.wmem_default = 8388608
net.core.rmem_default = 8388608
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.ipv4.tcp_timestamps = 0
net.ipv4.tcp_fin_timeout = 20
net.ipv4.tcp_synack_retries = 2
net.ipv4.tcp_syn_retries = 2
net.ipv4.tcp_syncookies = 1
#net.ipv4.tcp_tw_len = 1
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_mem = 94500000 915000000 927000000
net.ipv4.tcp_max_orphans = 3276800
net.ipv4.ip_local_port_range = 1024 65000
#net.nf_conntrack_max = 6553500
#net.netfilter.nf_conntrack_max = 6553500
#net.netfilter.nf_conntrack_tcp_timeout_close_wait = 60
#net.netfilter.nf_conntrack_tcp_timeout_fin_wait = 120
#net.netfilter.nf_conntrack_tcp_timeout_time_wait = 120
#net.netfilter.nf_conntrack_tcp_timeout_established = 3600
EOF
sysctl -p
4.6 配置時間同步(所有機器執行)
ntpdate -u pool.ntp.org
crontab -e #加入定時任務
*/15 * * * * /usr/sbin/ntpdate -u pool.ntp.org >/dev/null 2>&1
4.7 配置k8s.conf(所有機器執行)
cat <<EOF > /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_nonlocal_bind = 1
net.ipv4.ip_forward = 1
vm.swappiness=0
EOF
#執行命令使其修改生效
modprobe br_netfilter \
&& sysctl -p /etc/sysctl.d/k8s.conf
4.8 關閉交換分區(所有機器執行)
swapoff -a
yes | cp /etc/fstab /etc/fstab_bak
cat /etc/fstab_bak |grep -v swap > /etc/fstab
4.9 升級系統內核(所有機器執行)
yum update -y
rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-2.el7.elrepo.noarch.rpm ;yum --enablerepo=elrepo-kernel install kernel-lt-devel kernel-lt -y
查看內核修改結果
grub2-editenv list
#注意,這里執行下面的命令會出現多個內核版本
[root@master01 ~]# cat /boot/grub2/grub.cfg |grep "menuentry "
menuentry 'CentOS Linux (4.4.184-1.el7.elrepo.x86_64) 7 (Core)' --class centos --class gnu-linux --class gnu --class os --unrestricted $menuentry_id_option 'gnulinux-3.10.0-862.el7.x86_64-advanced-021a955b-781d-425a-8250-f39857437658'
設置默認內核版本,改版本必須已經存在,請注意執行命令cat /boot/grub2/grub.cfg |grep "menuentry "后生成的內容,切勿隨意復制
grub2-set-default 'CentOS Linux (4.4.184-1.el7.elrepo.x86_64) 7 (Core)'
查看內核修改結果
grub2-editenv list
# 檢查默認內核版本高於4.1,否則請調整默認啟動參數
# 查看內核修改結果
grub2-editenv list
#重啟以更換內核使其生效
reboot
4.10 加載ipvs模塊(所有機器執行)
cat > /etc/sysconfig/modules/ipvs.modules <<EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
EOF
chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack_ipv4
4.11 添加k8s yum源(所有機器執行)
cat << EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
4.12 安裝服務器必備軟件
yum -y install wget vim iftop iotop net-tools nmon telnet lsof iptraf nmap httpd-tools lrzsz mlocate ntp ntpdate strace libpcap nethogs iptraf iftop nmon bridge-utils bind-utils telnet nc nfs-utils rpcbind nfs-utils dnsmasq python python-devel tcpdump mlocate tree
5. 安裝keepalived和haproxy
5.1 在master01和master02上安裝keepalived和haproxy
master01的priority
為250,master02的priority
為200,其他配置一樣。
master01(192.168.4.129)
vim /etc/keepalived/keepalived.conf
注意interface這個配置,配置成你服務器的網卡,切勿隨意粘貼
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script check_haproxy {
script "killall -0 haproxy"
interval 3
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state MASTER
interface ens160
virtual_router_id 51
priority 250
advert_int 1
authentication {
auth_type PASS
auth_pass 35f18af7190d51c9f7f78f37300a0cbd
}
virtual_ipaddress {
192.168.4.110
}
track_script {
check_haproxy
}
}
master02(192.168.4.130)
vim /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script check_haproxy {
script "killall -0 haproxy"
interval 3
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state BACKUP
interface ens160
virtual_router_id 51
priority 200
advert_int 1
authentication {
auth_type PASS
auth_pass 35f18af7190d51c9f7f78f37300a0cbd
}
virtual_ipaddress {
192.168.4.110
}
track_script {
check_haproxy
}
}
5.2 haproxy配置
master01和master02的haproxy配置是一樣的。此處我們監聽的是192.168.4.110的8443端口,因為haproxy是和k8s apiserver是部署在同一台服務器上,都用6443會沖突。
192.168.4.129
vim /etc/haproxy/haproxy.cfg
#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global
# to have these messages end up in /var/log/haproxy.log you will
# need to:
#
# 1) configure syslog to accept network log events. This is done
# by adding the '-r' option to the SYSLOGD_OPTIONS in
# /etc/sysconfig/syslog
#
# 2) configure local2 events to go to the /var/log/haproxy.log
# file. A line like the following can be added to
# /etc/sysconfig/syslog
#
# local2.* /var/log/haproxy.log
#
#log 127.0.0.1 local2
log 127.0.0.1 local0 info
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4000
user haproxy
group haproxy
daemon
# turn on stats unix socket
stats socket /var/lib/haproxy/stats
#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------
defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000
#---------------------------------------------------------------------
# kubernetes apiserver frontend which proxys to the backends
#---------------------------------------------------------------------
frontend kubernetes-apiserver
mode tcp
bind *:8443
option tcplog
default_backend kubernetes-apiserver
#---------------------------------------------------------------------
# round robin balancing between the various backends
#---------------------------------------------------------------------
backend kubernetes-apiserver
mode tcp
balance roundrobin
server master01 192.168.4.129:6443 check
server master02 192.168.4.130:6443 check
server master03 192.168.4.133:6443 check
#---------------------------------------------------------------------
# collection haproxy statistics message
#---------------------------------------------------------------------
listen stats
bind *:1080
stats auth admin:awesomePassword
stats refresh 5s
stats realm HAProxy\ Statistics
stats uri /admin?stats
192.168.4.130
vim /etc/haproxy/haproxy.cfg
#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global
# to have these messages end up in /var/log/haproxy.log you will
# need to:
#
# 1) configure syslog to accept network log events. This is done
# by adding the '-r' option to the SYSLOGD_OPTIONS in
# /etc/sysconfig/syslog
#
# 2) configure local2 events to go to the /var/log/haproxy.log
# file. A line like the following can be added to
# /etc/sysconfig/syslog
#
# local2.* /var/log/haproxy.log
#
#log 127.0.0.1 local2
log 127.0.0.1 local0 info
```
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4000
user haproxy
group haproxy
daemon
# turn on stats unix socket
stats socket /var/lib/haproxy/stats
```
#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------
defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000
#---------------------------------------------------------------------
# kubernetes apiserver frontend which proxys to the backends
#---------------------------------------------------------------------
frontend kubernetes-apiserver
mode tcp
bind *:8443
option tcplog
default_backend kubernetes-apiserver
#---------------------------------------------------------------------
# round robin balancing between the various backends
#---------------------------------------------------------------------
backend kubernetes-apiserver
mode tcp
balance roundrobin
server master01 192.168.4.129:6443 check
server master02 192.168.4.130:6443 check
server master03 192.168.4.133:6443 check
#---------------------------------------------------------------------
# collection haproxy statistics message
#---------------------------------------------------------------------
listen stats
bind *:1080
stats auth admin:awesomePassword
stats refresh 5s
stats realm HAProxy\ Statistics
stats uri /admin?stats
5.3 設置服務啟動順序及依賴關系(master01和master02操作)
vim /usr/lib/systemd/system/keepalived.service
[Unit]
Description=LVS and VRRP High Availability Monitor
After=syslog.target network-online.target haproxy.service
Requires=haproxy.service
5.4 啟動服務
systemctl enable keepalived && systemctl start keepalived \
&& systemctl enable haproxy && systemctl start haproxy && systemctl status keepalived && systemctl status haproxy
6.安裝docker
6.1 安裝必要的一些系統工具(所有服務器安裝)
yum install -y yum-utils device-mapper-persistent-data lvm2
6.2 添加軟件源信息(所有服務器配置)
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum list docker-ce --showduplicates | sort -r
yum -y install docker-ce-18.06.3.ce-3.el7
usermod -aG docker bumblebee
6.3 配置daemon.json文件(所有服務器配置)
mkdir -p /etc/docker/ \
&& cat > /etc/docker/daemon.json << EOF
{
"registry-mirrors":[
"https://c6ai9izk.mirror.aliyuncs.com"
],
"max-concurrent-downloads":3,
"data-root":"/data/docker",
"log-driver":"json-file",
"log-opts":{
"max-size":"100m",
"max-file":"1"
},
"max-concurrent-uploads":5,
"storage-driver":"overlay2",
"storage-opts": [
"overlay2.override_kernel_check=true"
]
}
EOF
6.4 啟動檢查docker服務
systemctl enable docker \
&& systemctl restart docker \
&& systemctl status docker
7 使用kubeadm部署kubernetes
7.1 配置kubernetes.repo(每台機器都需要配置)
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
7.2 安裝必備軟件(所有機器安裝)
yum install -y kubelet-1.14.2 kubeadm-1.14.2 kubectl-1.14.2 ipvsadm ipset
#設置kubelet開機自啟動,注意:這一步不能直接執行 systemctl start kubelet,會報錯,成功初始化完后kubelet會自動起來
systemctl enable kubelet
7.3 修改初始化配置
使用kubeadm config print init-defaults > kubeadm-init.yaml 打印出默認配置,然后在根據自己的環境修改配置
注意需要修改advertiseAddress、controlPlaneEndpoint、imageRepository、serviceSubnet
其中advertiseAddress為master01的ip,controlPlaneEndpoint為VIP+8443端口,imageRepository修改為阿里的源,serviceSubnet找網絡組要一段沒人使用的IP段
[root@master01 ~]# cat kubeadm-init.yaml
apiVersion: kubeadm.k8s.io/v1beta1
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 192.168.4.129
bindPort: 6443
nodeRegistration:
criSocket: /var/run/dockershim.sock
name: master01
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta1
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controlPlaneEndpoint: "192.168.4.110:8443"
controllerManager: {}
dns:
type: CoreDNS
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers
kind: ClusterConfiguration
kubernetesVersion: v1.14.2
networking:
dnsDomain: cluster.local
podSubnet: "10.209.0.0/16"
serviceSubnet: ""
scheduler: {}
7.4 預下載鏡像
[root@master01 ~]# kubeadm config images pull --config kubeadm-init.yaml
[config/images] Pulled registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.14.2
[config/images] Pulled registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.14.2
[config/images] Pulled registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.14.2
[config/images] Pulled registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy:v1.14.2
[config/images] Pulled registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.1
[config/images] Pulled registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.3.10
[config/images] Pulled registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:1.3.1
7.5 初始化
[root@master01 ~]# kubeadm init --config kubeadm-init.yaml
[init] Using Kubernetes version: v1.14.2
[preflight] Running pre-flight checks
[WARNING IsDockerSystemdCheck]: detected "cgroupfs" as the Docker cgroup driver. The recommended driver is "systemd". Please follow the guide at https://kubernetes.io/docs/setup/cri/
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Activating the kubelet service
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [master01 localhost] and IPs [192.168.4.129 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [master01 localhost] and IPs [192.168.4.129 127.0.0.1 ::1]
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [master01 kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.209.0.1 192.168.4.129 192.168.4.110]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "admin.conf" kubeconfig file
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[apiclient] All control plane components are healthy after 17.506253 seconds
[upload-config] storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config-1.14" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Skipping phase. Please see --experimental-upload-certs
[mark-control-plane] Marking the node master01 as control-plane by adding the label "node-role.kubernetes.io/master=''"
[mark-control-plane] Marking the node master01 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
[bootstrap-token] Using token: abcdef.0123456789abcdef
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] creating the "cluster-info" ConfigMap in the "kube-public" namespace
[addons] Applied essential addon: CoreDNS
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[addons] Applied essential addon: kube-proxy
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of control-plane nodes by copying certificate authorities
and service account keys on each node and then running the following as root:
kubeadm join 192.168.4.110:8443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:0ca0a5fd28409faecba5d2f21aeb010a945a5dae42023fe361424d621708edc1 \
--experimental-control-plane
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.168.4.110:8443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:0ca0a5fd28409faecba5d2f21aeb010a945a5dae42023fe361424d621708edc1
kubeadm init主要執行了以下操作:
- [init]:指定版本進行初始化操作
- [preflight] :初始化前的檢查和下載所需要的Docker鏡像文件
- [kubelet-start] :生成kubelet的配置文件”/var/lib/kubelet/config.yaml”,沒有這個文件kubelet無法啟動,所以初始化之前的kubelet實際上啟動失敗。
- [certificates]:生成Kubernetes使用的證書,存放在/etc/kubernetes/pki目錄中。
- [kubeconfig] :生成 KubeConfig 文件,存放在/etc/kubernetes目錄中,組件之間通信需要使用對應文件。
- [control-plane]:使用/etc/kubernetes/manifest目錄下的YAML文件,安裝 Master 組件。
- [etcd]:使用/etc/kubernetes/manifest/etcd.yaml安裝Etcd服務。
- [wait-control-plane]:等待control-plan部署的Master組件啟動。
- [apiclient]:檢查Master組件服務狀態。
- [uploadconfig]:更新配置
- [kubelet]:使用configMap配置kubelet。
- [patchnode]:更新CNI信息到Node上,通過注釋的方式記錄。
- [mark-control-plane]:為當前節點打標簽,打了角色Master,和不可調度標簽,這樣默認就不會使用Master節點來運行Pod。
- [bootstrap-token]:生成token記錄下來,后邊使用kubeadm join往集群中添加節點時會用到
- [addons]:安裝附加組件CoreDNS和kube-proxy
7.6 為kubectl准備Kubeconfig文件
kubectl默認會在執行的用戶家目錄下面的.kube目錄下尋找config文件。這里是將在初始化時[kubeconfig]步驟生成的admin.conf拷貝到.kube/config。
[root@master01 ~]# mkdir -p $HOME/.kube
[root@master01 ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master01 ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config
在該配置文件中,記錄了API Server的訪問地址,所以后面直接執行kubectl命令就可以正常連接到API Server中。
7.7 查看組件狀態
[root@master01 ~]# kubectl get cs
NAME STATUS MESSAGE ERROR
controller-manager Healthy ok
scheduler Healthy ok
etcd-0 Healthy {"health":"true"}
[root@master01 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 NotReady master 4m20s v1.14.2
目前只有一個節點,角色是Master,狀態是NotReady,狀態是NotReady狀態是因為還沒有安裝網絡插件
7.8 其他master部署(在master01機器上執行)
在master01將證書文件拷貝至master02、master03節點
#拷貝正式至master02節點
USER=root
CONTROL_PLANE_IPS="master02"
for host in ${CONTROL_PLANE_IPS}; do
ssh "${USER}"@$host "mkdir -p /etc/kubernetes/pki/etcd"
scp /etc/kubernetes/pki/ca.* "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/sa.* "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/front-proxy-ca.* "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/etcd/ca.* "${USER}"@$host:/etc/kubernetes/pki/etcd/
scp /etc/kubernetes/admin.conf "${USER}"@$host:/etc/kubernetes/
done
#拷貝正式至master03節點
USER=root
CONTROL_PLANE_IPS="master03"
for host in ${CONTROL_PLANE_IPS}; do
ssh "${USER}"@$host "mkdir -p /etc/kubernetes/pki/etcd"
scp /etc/kubernetes/pki/ca.* "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/sa.* "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/front-proxy-ca.* "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/etcd/ca.* "${USER}"@$host:/etc/kubernetes/pki/etcd/
scp /etc/kubernetes/admin.conf "${USER}"@$host:/etc/kubernetes/
done
在master02上執行,注意注意--experimental-control-plane
參數
[root@master02 ~]# kubeadm join 192.168.4.110:8443 --token abcdef.0123456789abcdef \
> --discovery-token-ca-cert-hash sha256:0ca0a5fd28409faecba5d2f21aeb010a945a5dae42023fe361424d621708edc1 \
> --experimental-control-plane
[preflight] Running pre-flight checks
[WARNING IsDockerSystemdCheck]: detected "cgroupfs" as the Docker cgroup driver. The recommended driver is "systemd". Please follow the guide at https://kubernetes.io/docs/setup/cri/
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[preflight] Running pre-flight checks before initializing the new control plane instance
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [master02 kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.209.0.1 192.168.4.130 192.168.4.110]
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [master02 localhost] and IPs [192.168.4.130 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [master02 localhost] and IPs [192.168.4.130 127.0.0.1 ::1]
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Valid certificates and keys now exist in "/etc/kubernetes/pki"
[certs] Using the existing "sa" key
[kubeconfig] Generating kubeconfig files
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Using existing kubeconfig file: "/etc/kubernetes/admin.conf"
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[check-etcd] Checking that the etcd cluster is healthy
[kubelet-start] Downloading configuration for the kubelet from the "kubelet-config-1.14" ConfigMap in the kube-system namespace
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Activating the kubelet service
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...
[etcd] Announced new etcd member joining to the existing etcd cluster
[etcd] Wrote Static Pod manifest for a local etcd member to "/etc/kubernetes/manifests/etcd.yaml"
[etcd] Waiting for the new etcd member to join the cluster. This can take up to 40s
[upload-config] storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[mark-control-plane] Marking the node master02 as control-plane by adding the label "node-role.kubernetes.io/master=''"
[mark-control-plane] Marking the node master02 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
This node has joined the cluster and a new control plane instance was created:
* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
注意**:token有效期是有限的,如果舊的token過期,可以使用
kubeadm token create --print-join-command
重新創建一條token。
mkdir -p $HOME/.kube \
&& cp -i /etc/kubernetes/admin.conf $HOME/.kube/config \
&& chown $(id -u):$(id -g) $HOME/.kube/config
在master03上執行,注意注意--experimental-control-plane
參數
[root@master03 ~]# kubeadm join 192.168.4.110:8443 --token abcdef.0123456789abcdef \
> --discovery-token-ca-cert-hash sha256:0ca0a5fd28409faecba5d2f21aeb010a945a5dae42023fe361424d621708edc1 \
> --experimental-control-plane
[preflight] Running pre-flight checks
[WARNING IsDockerSystemdCheck]: detected "cgroupfs" as the Docker cgroup driver. The recommended driver is "systemd". Please follow the guide at https://kubernetes.io/docs/setup/cri/
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[preflight] Running pre-flight checks before initializing the new control plane instance
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [master03 localhost] and IPs [192.168.4.133 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [master03 localhost] and IPs [192.168.4.133 127.0.0.1 ::1]
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [master03 kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.209.0.1 192.168.4.133 192.168.4.110]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Valid certificates and keys now exist in "/etc/kubernetes/pki"
[certs] Using the existing "sa" key
[kubeconfig] Generating kubeconfig files
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Using existing kubeconfig file: "/etc/kubernetes/admin.conf"
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[check-etcd] Checking that the etcd cluster is healthy
[kubelet-start] Downloading configuration for the kubelet from the "kubelet-config-1.14" ConfigMap in the kube-system namespace
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Activating the kubelet service
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...
[etcd] Announced new etcd member joining to the existing etcd cluster
[etcd] Wrote Static Pod manifest for a local etcd member to "/etc/kubernetes/manifests/etcd.yaml"
[etcd] Waiting for the new etcd member to join the cluster. This can take up to 40s
[upload-config] storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[mark-control-plane] Marking the node master03 as control-plane by adding the label "node-role.kubernetes.io/master=''"
[mark-control-plane] Marking the node master03 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
This node has joined the cluster and a new control plane instance was created:
* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
[root@master03 ~]# mkdir -p $HOME/.kube \
> && cp -i /etc/kubernetes/admin.conf $HOME/.kube/config \
> && chown $(id -u):$(id -g) $HOME/.kube/config
[root@master03 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 NotReady master 15m v1.14.2
master02 NotReady master 3m40s v1.14.2
master03 NotReady master 2m1s v1.14.2
8. node節點部署
在node01、node02執行,注意沒有--experimental-control-plane
參數
注意**:token有效期是有限的,如果舊的token過期,可以在master節點上使用
kubeadm token create --print-join-command
重新創建一條token。
在node01和node02上執行下面這條命令
kubeadm join 192.168.4.110:8443 --token lwsk91.y2ywpq0y74wt03tb --discovery-token-ca-cert-hash sha256:0ca0a5fd28409faecba5d2f21aeb010a945a5dae42023fe361424d621708edc1
9. 部署網絡插件calico
9.1 下載calico.yaml文件
wget -c https://docs.projectcalico.org/v3.6/getting-started/kubernetes/installation/hosted/kubernetes-datastore/calico-networking/1.7/calico.yaml
9.2 修改calico.yaml(根據實際情況配置)
修改CALICO_IPV4POOL_CIDR這個下面的vaule值,默認是192.168.0.0/16
# The default IPv4 pool to create on startup if none exists. Pod IPs will be
# chosen from this range. Changing this value after installation will have
# no effect. This should fall within `--cluster-cidr`.
- name: CALICO_IPV4POOL_CIDR
value: "10.209.0.0/16"
9.3 執行kubectl apply -f calico.yaml
[root@master01 ~]# kubectl apply -f calico.yaml
configmap/calico-config created
customresourcedefinition.apiextensions.k8s.io/felixconfigurations.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/ipamblocks.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/blockaffinities.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/ipamhandles.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/ipamconfigs.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/bgppeers.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/bgpconfigurations.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/ippools.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/hostendpoints.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/clusterinformations.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/globalnetworkpolicies.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/globalnetworksets.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/networkpolicies.crd.projectcalico.org created
clusterrole.rbac.authorization.k8s.io/calico-kube-controllers created
clusterrolebinding.rbac.authorization.k8s.io/calico-kube-controllers created
clusterrole.rbac.authorization.k8s.io/calico-node created
clusterrolebinding.rbac.authorization.k8s.io/calico-node created
daemonset.extensions/calico-node created
serviceaccount/calico-node created
deployment.extensions/calico-kube-controllers created
serviceaccount/calico-kube-controllers created
9.4 查看節點狀態
一開始沒安裝網絡組件,是顯示notReady的,裝完cailco后就變成Ready,說明集群已就緒了,可以進行下一步驗證集群是否搭建成功
[root@master01 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 Ready master 23h v1.14.2
master02 Ready master 22h v1.14.2
master03 Ready master 22h v1.14.2
node01 NotReady <none> 19m v1.14.2
node03 NotReady <none> 5s v1.14.2
10. kube-proxy開啟ipvs[單個master節點執行]
10.1 修改ConfigMap的kube-system/kube-proxy中的config.conf,mode: "ipvs"
:
kubectl edit cm kube-proxy -n kube-system
10.2 之后重啟各個節點上的kube-proxy pod:
[root@master01 ~]# kubectl get pod -n kube-system | grep kube-proxy | awk '{system("kubectl delete pod "$1" -n kube-system")}'
pod "kube-proxy-8fpjb" deleted
pod "kube-proxy-dqqxh" deleted
pod "kube-proxy-mxvz2" deleted
pod "kube-proxy-np9x9" deleted
pod "kube-proxy-rtzcn" deleted
10.3 查看kube-proxy pod狀態
[root@master01 ~]# kubectl get pod -n kube-system | grep kube-proxy
kube-proxy-4fhpg 1/1 Running 0 81s
kube-proxy-9f2x6 1/1 Running 0 109s
kube-proxy-cxl5m 1/1 Running 0 89s
kube-proxy-lvp9q 1/1 Running 0 78s
kube-proxy-v4mg8 1/1 Running 0 99s
10.4 查看是否開啟了ipvs
日志中打印出了
Using ipvs Proxier
,說明ipvs模式已經開啟
[root@master01 ~]# kubectl logs kube-proxy-4fhpg -n kube-system
I0705 07:53:05.254157 1 server_others.go:176] Using ipvs Proxier.
W0705 07:53:05.255130 1 proxier.go:380] clusterCIDR not specified, unable to distinguish between internal and external traffic
W0705 07:53:05.255181 1 proxier.go:386] IPVS scheduler not specified, use rr by default
I0705 07:53:05.255599 1 server.go:562] Version: v1.14.2
I0705 07:53:05.280930 1 conntrack.go:52] Setting nf_conntrack_max to 131072
I0705 07:53:05.281426 1 config.go:102] Starting endpoints config controller
I0705 07:53:05.281473 1 controller_utils.go:1027] Waiting for caches to sync for endpoints config controller
I0705 07:53:05.281523 1 config.go:202] Starting service config controller
I0705 07:53:05.281548 1 controller_utils.go:1027] Waiting for caches to sync for service config controller
I0705 07:53:05.381724 1 controller_utils.go:1034] Caches are synced for endpoints config controller
I0705 07:53:05.381772 1 controller_utils.go:1034] Caches are synced for service config controller
11. 查看ipvs狀態
[root@master01 ~]# ipvsadm -L -n
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 10.209.0.1:443 rr
-> 192.168.4.129:6443 Masq 1 0 0
-> 192.168.4.130:6443 Masq 1 0 0
-> 192.168.4.133:6443 Masq 1 0 0
TCP 10.209.0.10:53 rr
-> 10.209.59.193:53 Masq 1 0 0
-> 10.209.59.194:53 Masq 1 0 0
TCP 10.209.0.10:9153 rr
-> 10.209.59.193:9153 Masq 1 0 0
-> 10.209.59.194:9153 Masq 1 0 0
UDP 10.209.0.10:53 rr
-> 10.209.59.193:53 Masq 1 0 0
-> 10.209.59.194:53 Masq 1 0 0
12. 測試一個運行一個容器
[root@master01 ~]# kubectl run nginx --image=nginx:1.14 --replicas=2
kubectl run --generator=deployment/apps.v1 is DEPRECATED and will be removed in a future version. Use kubectl run --generator=run-pod/v1 or kubectl create instead.
deployment.apps/nginx created
12.1 查看nginx pod
[root@master01 ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-84b67f57c4-d9k8m 1/1 Running 0 59s 10.209.196.129 node01 <none> <none>
nginx-84b67f57c4-zcrxn 1/1 Running 0 59s 10.209.186.193 node03 <none> <none>
12.2 通過curl命令測試nginx
[root@master01 ~]# curl 10.209.196.129
<!DOCTYPE html>
<html>
<head>
<title>Welcome to nginx!</title>
<style>
body {
width: 35em;
margin: 0 auto;
font-family: Tahoma, Verdana, Arial, sans-serif;
}
</style>
</head>
<body>
<h1>Welcome to nginx!</h1>
<p>If you see this page, the nginx web server is successfully installed and
working. Further configuration is required.</p>
<p>For online documentation and support please refer to
<a href="http://nginx.org/">nginx.org</a>.<br/>
Commercial support is available at
<a href="http://nginx.com/">nginx.com</a>.</p>
<p><em>Thank you for using nginx.</em></p>
</body>
</html>
[root@master01 ~]# curl 10.209.186.193
<!DOCTYPE html>
<html>
<head>
<title>Welcome to nginx!</title>
<style>
body {
width: 35em;
margin: 0 auto;
font-family: Tahoma, Verdana, Arial, sans-serif;
}
</style>
</head>
<body>
<h1>Welcome to nginx!</h1>
<p>If you see this page, the nginx web server is successfully installed and
working. Further configuration is required.</p>
<p>For online documentation and support please refer to
<a href="http://nginx.org/">nginx.org</a>.<br/>
Commercial support is available at
<a href="http://nginx.com/">nginx.com</a>.</p>
<p><em>Thank you for using nginx.</em></p>
</body>
</html>
能顯示出Welcome to nginx,說明pod運行正常,間接也說明集群可以正常使用
13. 測試dns
進入后執行
nslookup kubernetes.default
[root@master01 ~]# kubectl run curl --image=radial/busyboxplus:curl -it
kubectl run --generator=deployment/apps.v1 is DEPRECATED and will be removed in a future version. Use kubectl run --generator=run-pod/v1 or kubectl create instead.
If you don't see a command prompt, try pressing enter.
[ root@curl-66bdcf564-njcqk:/ ]$ nslookup kubernetes.default
Server: 10.209.0.10
Address 1: 10.209.0.10 kube-dns.kube-system.svc.cluster.local
Name: kubernetes.default
Address 1: 10.209.0.1 kubernetes.default.svc.cluster.local #能顯示類似這樣的輸出,說明dns是okay的
至此kubernetes集群部署完成。
14.典型報錯
14.1 鏡像無法拉取報錯
ct: connection timed out
Warning FailedCreatePodSandBox 3m44s (x17 over 28m) kubelet, node01 Failed create pod sandbox: rpc error: code = Unknown desc = failed pulling image "k8s.gcr.io/pause:3.1": Error response from daemon: Get https://k8s.gcr.io/v2/: dial tcp 74.125.204.82:443: connect: connection timed out
解決辦法
先去其他渠道找到對應的鏡像,然后docker tag下
docker pull mirrorgooglecontainers/pause:3.1
docker tag mirrorgooglecontainers/pause:3.1 k8s.gcr.io/pause:3.1