本文搭建的环境不建议在生产环境使用,因为 k8s 所有数据都保存在 etcd 中, 生产环境你最起码得有个 etcd 集群吧..., 开发自己本地测试学习应该是没问题的
本文环境
docker: 20.10.8
k8s: 1.21.0
kubeadm: 1.21.4
kubelet: 1.21.4
kubectl: 1.21.4
系统版本: CentOS Linux release 8.4.2105
网络插件: flannel
两台虚拟机:
192.168.5.128 k8s-master
192.168.5.129 k8s-node-1
配置均为 2核2G/20GB
前置工作
前置工作需要在所有的节点上执行
配置要求
CPU 推荐两核或者更多
内存 不得小于 2G
MAC地址 保证唯一
交换分区 禁用
节点之间保持网络通畅
修改主机名
各个节点修改成自己的名字
hostnamectl set-hostname <name>
修改 hosts
配置各个节点的
ip
和主机名
映射
# vim /etc/hosts
192.168.5.128 k8s-master
关闭防火墙, 开启内核网络参数
systemctl stop firewalld
systemctl disable firewalld
# vi /etc/sysctl.conf # 编辑配置文件
# 追加下面两行
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
sysctl -p # 应用配置
关闭 SELinux
setenforce 0
sed -i "s/SELINUX=enforcing/SELINUX=disabled/g" /etc/selinux/config
关闭 swap
注释掉 /etc/fstab 文件中包含 swap 哪一行, 如下文件内容示例注释
# vim /etc/fstab
# Created by anaconda on Wed Jan 6 20:22:34 2021
#
# Accessible filesystems, by reference, are maintained under '/dev/disk'
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info
#
/dev/mapper/centos-root / ext4 defaults 1 1
UUID=b6a81016-1920-44c6-b713-2547ccbc9adf /boot ext4 defaults 1 2
/dev/mapper/centos-home /home ext4 defaults 1 2
# /dev/mapper/centos-swap swap swap defaults 0 0
重启
reboot
安装 Docker
所有的节点都必须安装
docker
且设置服务为开机自动启动
# 移除机器上已经安装的 docker
yum remove docker \
docker-client \
docker-client-latest \
docker-common \
docker-latest \
docker-latest-logrotate \
docker-logrotate \
docker-engine
# 安装依赖
yum install -y yum-utils \
device-mapper-persistent-data \
lvm2
# 添加镜像源
yum-config-manager \
--add-repo \
https://mirrors.ustc.edu.cn/docker-ce/linux/centos/docker-ce.repo
# 安装
yum install docker-ce docker-ce-cli containerd.io -y
# 安装指定版本 docker, 安装其他软件也是一样
yum list docker-ce --showduplicates | sort -r
# Last metadata expiration check: 0:32:36 ago on Mon 16 Aug 2021 02:15:13 PM CST.
# Installed Packages
# docker-ce.x86_64 3:20.10.8-3.el8 docker-ce-stable
# docker-ce.x86_64 3:20.10.8-3.el8 @docker-ce-stable
# docker-ce.x86_64 3:20.10.7-3.el8 docker-ce-stable
# docker-ce.x86_64 3:20.10.6-3.el8 docker-ce-stable
# docker-ce.x86_64 3:20.10.5-3.el8 docker-ce-stable
# docker-ce.x86_64 3:20.10.4-3.el8 docker-ce-stable
# docker-ce.x86_64 3:20.10.3-3.el8 docker-ce-stable
# docker-ce.x86_64 3:20.10.2-3.el8 docker-ce-stable
# docker-ce.x86_64 3:20.10.1-3.el8 docker-ce-stable
# docker-ce.x86_64 3:20.10.0-3.el8 docker-ce-stable
# docker-ce.x86_64 3:19.03.15-3.el8 docker-ce-stable
# docker-ce.x86_64 3:19.03.14-3.el8 docker-ce-stable
# docker-ce.x86_64 3:19.03.13-3.el8 docker-ce-stable
# Available Packages
# 选择上面 列出的版本进行安装,比如这里安装最新版的 20.10.8
yum install docker-ce-20.10.8-3.el8
# 启动服务,并设置为开机自启
systemctl start docker
systemctl enable docker
# 更换 docker 的镜像源
# vim /etc/docker/daemon.json
{
"registry-mirrors" : [
"https://registry.docker-cn.com",
"https://docker.mirrors.ustc.edu.cn",
"http://hub-mirror.c.163.com",
"https://cr.console.aliyun.com/"
]
}
# 如果当前用户非 root 用户,需要加入 docker 的用户组
# 加入 docker 组后,需要重启下系统,才能不使用 sudo 使用docker命令
sudo usermod -aG docker <your username>
# 重启docker
sudo systemctl restart docker
安装 kubeadm,kubelet,kubectl
这三个组件有版本兼容性要求
具体版本要求参考官网
三个工具所有节点都需要安装
添加镜像源
# vim /etc/yum.repos.d/kubernetes.repo
# 内容, 注意 gpgkey 是一行, 两个 https 中间使用空格拆分
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
# 清理缓存,重建
yum clean all&&yum makecache
开始安装
当前时间: 2021年8月16
yum install -y kubelet-1.21.4 kubeadm-1.21.4 kubectl-1.21.4
准备初始化集群<Master节点>
查看默认的初始化配置文件, 并导出成文件
kubeadm config print init-defaults > init-defaults.yaml
按照下方示例提示文字,进行修改
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456780abcdef # token 设置
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 192.168.5.128 # master 对外访问ip
bindPort: 6443
nodeRegistration:
criSocket: /var/run/dockershim.sock
name: k8s-master # master节点名称, 此名称须加入 hosts 文件解析
taints: null
---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns:
type: CoreDNS
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: registry.aliyuncs.com/k8sxio # 修改镜像源地址
kind: ClusterConfiguration
kubernetesVersion: 1.21.0 # 待安装的 k8s 版本
networking:
dnsDomain: cluster.local
serviceSubnet: 10.244.0.0/16 # flannel 默认网段
scheduler: {}
查看并下载镜像
可以事先下载然后导入到自己本地的
docker
中
# 查看需要下载那些镜像
kubeadm config images list --config init-defaults.yaml
# registry.aliyuncs.com/k8sxio/kube-apiserver:v1.22.0
# registry.aliyuncs.com/k8sxio/kube-controller-manager:v1.22.0
# registry.aliyuncs.com/k8sxio/kube-scheduler:v1.22.0
# registry.aliyuncs.com/k8sxio/kube-proxy:v1.22.0
# registry.aliyuncs.com/k8sxio/pause:3.5
# registry.aliyuncs.com/k8sxio/etcd:3.5.0-0
# registry.aliyuncs.com/k8sxio/coredns:v1.8.4
# 开始下载镜像,防止直接安装因为某个镜像下载失败,导致整体安装失败
kubeadm config images pull --config init-defaults.yaml
# 如果发生下面这种拉取镜像错误,尝试使用docker直接搜索镜像,然后使用 docker tag 重新打标即可
# [root@k8s-master k8s-install-file]# kubeadm config images pull --config init-defaults.yaml
# [config/images] Pulled registry.aliyuncs.com/k8sxio/kube-apiserver:v1.21.0
# [config/images] Pulled registry.aliyuncs.com/k8sxio/kube-controller-manager:v1.21.0
# [config/images] Pulled registry.aliyuncs.com/k8sxio/kube-scheduler:v1.21.0
# [config/images] Pulled registry.aliyuncs.com/k8sxio/kube-proxy:v1.21.0
# [config/images] Pulled registry.aliyuncs.com/k8sxio/pause:3.4.1
# [config/images] Pulled registry.aliyuncs.com/k8sxio/etcd:3.4.13-0
# failed to pull image "registry.aliyuncs.com/k8sxio/coredns:v1.8.0": output: Error response from daemon: manifest for registry.aliyuncs.com/k8sxio/coredns:v1.8.0 not found: manifest unknown: manifest unknown
# , error: exit status 1
# To see the stack trace of this error execute with --v=5 or higher
# 这里搜索镜像
# [root@k8s-master k8s-install-file]# docker search coredns:v1.8.0
# NAME DESCRIPTION STARS OFFICIAL AUTOMATED
# louwy001/coredns-coredns k8s.gcr.io/coredns/coredns:v1.8.0 1
# ninokop/coredns k8s.gcr.io/coredns/coredns:v1.8.0 0
# xwjh/coredns from k8s.gcr.io/coredns/coredns:v1.8.0 0
# hhhlhh/coredns-coredns FROM k8s.gcr.io/coredns/coredns:v1.8.0 0
# suxishuo/coredns k8s.gcr.io/coredns/coredns:v1.8.0 0
# fengbb/coredns k8s.gcr.io/coredns/coredns:v1.8.0 0
# 拉取镜像
# [root@k8s-master k8s-install-file]# docker pull louwy001/coredns-coredns:v1.8.0
# v1.8.0: Pulling from louwy001/coredns-coredns
# c6568d217a00: Pull complete
# 5984b6d55edf: Pull complete
# Digest: sha256:10ecc12177735e5a6fd6fa0127202776128d860ed7ab0341780ddaeb1f6dfe61
# Status: Downloaded newer image for louwy001/coredns-coredns:v1.8.0
# docker.io/louwy001/coredns-coredns:v1.8.0
# 重新打标,并取消多余的tag名称
# [root@k8s-master k8s-install-file]# docker tag louwy001/coredns-coredns:v1.8.0 registry.aliyuncs.com/k8sxio/coredns:v1.8.0
# [root@k8s-master k8s-install-file]#
# [root@k8s-master k8s-install-file]# docker rmi louwy001/coredns-coredns:v1.8.0
# Untagged: louwy001/coredns-coredns:v1.8.0
# Untagged: louwy001/coredns-coredns@sha256:10ecc12177735e5a6fd6fa0127202776128d860ed7ab0341780ddaeb1f6dfe61
# [root@k8s-master k8s-install-file]#
卸载集群
如果 初始化集群失败了,或者参数错误,直接执行下面的命令还原设置
kubeadm reset
iptables -F
iptables -X
ipvsadm -C
rm -rf /etc/cni/net.d
rm -rf $HOME/.kube/config
开始初始化
kubeadm init --config init-defaults.yaml
初始化完成后, 根据提示执行初始设置, 并记录下 加入集群的命令和参数
# 集群配置文件
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
# 开机自启 kubelet
systemctl enable kubelet.service
# 加入集群
kubeadm join 192.168.5.128:6443 --token abcdef.0123456780abcdef \
--discovery-token-ca-cert-hash sha256:d27cf2fd4a45c3ce8c59cdf0163edbf7cd4bc55a994a34404c0e175a47770798
其他节点接入集群
确认安装好 kubeadm , kubelet, kubectl
在节点机器上执行上面提示的 加入集群命令, 并设置kubelet
为开机自启
如果没复制保存上面提示的加入集群命令,可以在master
节点上执行下面的命令来查看加入命令
kubeadm token create --print-join-command
在 master
节点上拷贝集群配置文件给node
, 这样 node
才能正常使用kubectl
命令,也可以不操作这一步
systemctl enable kubelet.service
scp /etc/kubernetes/admin.conf k8s-node-1:~/.kube/config
配置 Flannel 网络
安装 flannel 保证各个节点的pod之间网络通信
修改集群 kube-controller-manager.yaml
文件,追加网络参数
vim /etc/kubernetes/manifests/kube-controller-manager.yaml
# 在 command 下面追加两行
--allocate-node-cidrs=true
--cluster-cidr=10.244.0.0/16
# 重启 kubelet
systemctl restart kubelet
如果是多网卡的机器,可能需要指定下网卡, 参考这个大佬的文章 文章 "安装 Pod Network" 中提到的 :
"另外需要注意的是如果你的节点有多个网卡的话,需要在 kube-flannel.yml 中使用
--iface
参数指定集群主机内网网卡的名称,否则可能会出现 dns 无法解析。"
我猜应该是在flannel部署文件
中,下面的位置加, 注意Kind
和metadata
中的信息
注意: flannel部署文件
在下面给出下载信息
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kube-flannel-ds
namespace: kube-system
labels:
tier: node
app: flannel
spec:
selector:
matchLabels:
app: flannel
template:
metadata:
labels:
tier: node
app: flannel
spec:
....
...
..
.
containers:
- name: kube-flannel
image: quay.io/coreos/flannel:v0.14.0
command:
- /opt/bin/flanneld
args:
- --ip-masq
- --kube-subnet-mgr
- --iface=ens33 # 这里追加参数 <----------------
resources:
requests:
...
....
......
获取flannel
部署文件,并下载镜像
curl https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml > kube-flannel.yml
# 查看需要的镜像
cat kube-flannel.yml | grep image
# image: quay.io/coreos/flannel:v0.14.0
# image: quay.io/coreos/flannel:v0.14.0
# 直接下载如果失败的话,就用docker搜索下别人上传的镜像
docker search flannel:v0.14.0
# NAME DESCRIPTION STARS OFFICIAL AUTOMATED
# xwjh/flannel from quay.io/coreos/flannel:v0.14.0 1
# 下载镜像并重新进行打tag, 完事后删除多余的 tag
docker pull xwjh/flannel:v0.14.0
docker tag xwjh/flannel:v0.14.0 quay.io/coreos/flannel:v0.14.0
docker rmi xwjh/flannel:v0.14.0
# 应用配置
kubectl create -f kube-flannel.yml
# [root@k8s-master k8s-install-file]# kubectl create -f kube-flannel.yml
# Warning: policy/v1beta1 PodSecurityPolicy is deprecated in v1.21+, unavailable in v1.25+
# podsecuritypolicy.policy/psp.flannel.unprivileged created
# clusterrole.rbac.authorization.k8s.io/flannel created
# clusterrolebinding.rbac.authorization.k8s.io/flannel created
# serviceaccount/flannel created
# configmap/kube-flannel-cfg created
# daemonset.apps/kube-flannel-ds created
# [root@k8s-master k8s-install-file]#
验证&其他设置
至此k8s简单搭建版到此结束, 后续多个节点,多
master
之类的查资料设置加入集群即可
验证节点状态
执行kubectl get node
查看集群节点状态, 如果你之前没装 flannel
直接执行会看到如下信息
[root@k8s-master ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
k8s-master NotReady control-plane,master 21h v1.21.4
k8s-node-1 NotReady <none> 21h v1.21.4
[root@k8s-master ~]#
当你flannel
正确安装后,会变成如下样式, 两个节点都会变成 Ready
状态
[root@k8s-master k8s-install-file]# kubectl get node
NAME STATUS ROLES AGE VERSION
k8s-master Ready control-plane,master 22h v1.21.4
k8s-node-1 Ready <none> 21h v1.21.4
[root@k8s-master k8s-install-file]#
验证 coredns 状态
安装完成后查看pod
状态可能会出现coredns
错误,无法启动:
[root@k8s-master k8s-install-file]# kubectl get pod --all-namespaces
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system coredns-67574f65b-fh2kq 0/1 ImagePullBackOff 0 22h
kube-system coredns-67574f65b-qspjm 0/1 ImagePullBackOff 0 22h
kube-system etcd-k8s-master 1/1 Running 1 22h
kube-system kube-apiserver-k8s-master 1/1 Running 1 22h
kube-system kube-controller-manager-k8s-master 1/1 Running 1 5h44m
kube-system kube-flannel-ds-h5fd6 1/1 Running 0 7m33s
kube-system kube-flannel-ds-z945p 1/1 Running 0 7m33s
kube-system kube-proxy-rmwcx 1/1 Running 1 21h
kube-system kube-proxy-vzmjw 1/1 Running 1 22h
kube-system kube-scheduler-k8s-master 1/1 Running 1 22h
[root@k8s-master k8s-install-file]#
我们查看下pod
的错误信息
root@k8s-master k8s-install-file]# kubectl -n kube-system describe pod coredns-67574f65b-fh2kq
Name: coredns-67574f65b-fh2kq
Namespace: kube-system
Priority: 2000000000
Priority Class Name: system-cluster-critical
Node: k8s-node-1/192.168.5.129
Start Time: Tue, 17 Aug 2021 14:54:36 +0800
Labels: k8s-app=kube-dns
pod-template-hash=67574f65b
Annotations: <none>
Status: Pending
IP: 10.244.1.3
IPs:
IP: 10.244.1.3
Controlled By: ReplicaSet/coredns-67574f65b
Containers:
coredns:
Container ID:
Image: registry.aliyuncs.com/k8sxio/coredns:v1.8.0
Image ID:
Ports: 53/UDP, 53/TCP, 9153/TCP
Host Ports: 0/UDP, 0/TCP, 0/TCP
Args:
-conf
/etc/coredns/Corefile
State: Waiting
Reason: ImagePullBackOff
Ready: False
Restart Count: 0
Limits:
memory: 170Mi
Requests:
cpu: 100m
memory: 70Mi
Liveness: http-get http://:8080/health delay=60s timeout=5s period=10s #success=1 #failure=5
Readiness: http-get http://:8181/ready delay=0s timeout=1s period=10s #success=1 #failure=3
Environment: <none>
Mounts:
/etc/coredns from config-volume (ro)
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-trjcg (ro)
Conditions:
Type Status
Initialized True
Ready False
ContainersReady False
PodScheduled True
Volumes:
config-volume:
Type: ConfigMap (a volume populated by a ConfigMap)
Name: coredns
Optional: false
kube-api-access-trjcg:
Type: Projected (a volume that contains injected data from multiple sources)
TokenExpirationSeconds: 3607
ConfigMapName: kube-root-ca.crt
ConfigMapOptional: <nil>
DownwardAPI: true
QoS Class: Burstable
Node-Selectors: kubernetes.io/os=linux
Tolerations: CriticalAddonsOnly op=Exists
node-role.kubernetes.io/control-plane:NoSchedule
node-role.kubernetes.io/master:NoSchedule
node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Warning FailedScheduling 4h53m (x1020 over 21h) default-scheduler 0/2 nodes are available: 2 node(s) had taint {node.kubernetes.io/not-ready:}, that the pod didn't tolerate.
Warning FailedScheduling 8m6s (x9 over 14m) default-scheduler 0/2 nodes are available: 2 node(s) had taint {node.kubernetes.io/not-ready:}, that the pod didn't tolerate.
Normal Scheduled 7m56s default-scheduler Successfully assigned kube-system/coredns-67574f65b-fh2kq to k8s-node-1
Normal Pulling 6m27s (x4 over 7m54s) kubelet Pulling image "registry.aliyuncs.com/k8sxio/coredns:v1.8.0"
Warning Failed 6m26s (x4 over 7m53s) kubelet Failed to pull image "registry.aliyuncs.com/k8sxio/coredns:v1.8.0": rpc error: code = Unknown desc = Error response from daemon: manifest for registry.aliyuncs.com/k8sxio/coredns:v1.8.0 not found: manifest unknown: manifestunknown
Warning Failed 6m26s (x4 over 7m53s) kubelet Error: ErrImagePull
Warning Failed 6m15s (x6 over 7m53s) kubelet Error: ImagePullBackOff
Normal BackOff 2m45s (x21 over 7m53s) kubelet Back-off pulling image "registry.aliyuncs.com/k8sxio/coredns:v1.8.0"
发现错误是拉取镜像失败, 但是master
节点确实存在这个镜像, 那这个指的就是 node
节点上缺少镜像,我们导出master
上的registry.aliyuncs.com/k8sxio/coredns:v1.8.0
拷贝给node
节点导入即可
docker save -o coredns.zip registry.aliyuncs.com/k8sxio/coredns:v1.8.0
scp coredns.zip k8s-node-1:~
# node 节点
docker load -i coredns.zip
重新查看状态
[root@k8s-master k8s-install-file]# kubectl -n kube-system get pods
NAME READY STATUS RESTARTS AGE
coredns-67574f65b-fh2kq 1/1 Running 0 22h
coredns-67574f65b-qspjm 1/1 Running 0 22h
etcd-k8s-master 1/1 Running 1 22h
kube-apiserver-k8s-master 1/1 Running 1 22h
kube-controller-manager-k8s-master 1/1 Running 1 5h58m
kube-flannel-ds-h5fd6 1/1 Running 0 21m
kube-flannel-ds-z945p 1/1 Running 0 21m
kube-proxy-rmwcx 1/1 Running 1 22h
kube-proxy-vzmjw 1/1 Running 1 22h
kube-scheduler-k8s-master 1/1 Running 1 22h
[root@k8s-master k8s-install-file]#
node 节点角色为 none
查看节点详细信息, 可以看到node
节点为none
角色, 我们手动指定节点为node
[root@k8s-master k8s-install-file]# kubectl get node -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
k8s-master Ready control-plane,master 22h v1.21.4 192.168.5.128 <none> CentOS Linux 8 4.18.0-305.12.1.el8_4.x86_64 docker://20.10.8
k8s-node-1 Ready <none> 22h v1.21.4 192.168.5.129 <none> CentOS Linux 8 4.18.0-305.12.1.el8_4.x86_64 docker://20.10.8
[root@k8s-master k8s-install-file]
执行下面的命令修改节点角色
kubectl label node <node name> node-role.kubernetes.io/node=
[root@k8s-master k8s-install-file]# kubectl label node k8s-node-1 node-role.kubernetes.io/node=
node/k8s-node-1 labeled
[root@k8s-master k8s-install-file]#
[root@k8s-master k8s-install-file]# kubectl get node
NAME STATUS ROLES AGE VERSION
k8s-master Ready control-plane,master 22h v1.21.4
k8s-node-1 Ready node 22h v1.21.4
[root@k8s-master k8s-install-file]#
设置节点角色
# 设置节点为 master
kubectl label node <node name> node-role.kubernetes.io/master=
# 设置 test2 为 node 角色
kubectl label node <node name> node-role.kubernetes.io/node=
# 设置 master 一般情况下不接受负载
kubectl taint node <node name> node-role.kubernetes.io/master=true:NoSchedule
# 设置 master 不运行pod
kubectl taint node <node name> node-role.kubernetes.io/master=:NoSchedule
# 删除节点标签<角色>, 只要修改 = 号为 - 号即可
kubectl label node k8s-node-1 node-role.kubernetes.io/node-
所有节点都允许运行pod
kubectl taint nodes --all node-role.kubernetes.io/master-
# 多次运行后是这个提示, 正常可以参考官网提示
# [root@k8s-master k8s-install-file]# kubectl taint nodes --all node-role.kubernetes.io/master-
# taint "node-role.kubernetes.io/master" not found
# taint "node-role.kubernetes.io/master" not found
# [root@k8s-master k8s-install-file]#
修改 NodePort 端口范围
默认端口号范围是 30000-32767
修改后等一会儿就可以生效
# vim /etc/kubernetes/manifests/kube-apiserver.yaml
# 在 command 末尾追加下面的参数,设置范围为 0-65535 全端口
- --service-node-port-range=0-65535