# mkdir /container # cd /container/ # mkdir rootfs 准备容器镜像的文件系统,从 busybox 镜像中提取 # docker export $(docker create busybox) | tar -C rootfs -xvf - # ls rootfs/ bin dev etc home proc root sys tmp usr var 有了rootfs之后,我们还要按照 OCI 标准有一个配置文件 config.json 说明如何运行容器, 包括要运行的命令、权限、环境变量等等内容,runc 提供了一个命令可以自动帮我们生成 # docker-runc spec # ls config.json rootfs # docker-runc run simplebusybox #启动容器 / # ls bin dev etc home proc root sys tmp usr var / # hostname runc
root@ubuntu:~/docker/container# ls rootfs root@ubuntu:~/docker/container# ls rootfs/ bin dev etc home proc root sys tmp usr var root@ubuntu:~/docker/container# ls rootfs root@ubuntu:~/docker/container#
root@ubuntu:~/docker/container# apt install docker-runc -y
root@ubuntu:~/docker/container# docker-runc spec root@ubuntu:~/docker/container# ls config.json rootfs root@ubuntu:~/docker/container# cat config.json { "ociVersion": "1.0.0-rc2-dev", "platform": { "os": "linux", "arch": "arm64" }, "process": { "terminal": true, "consoleSize": { "height": 0, "width": 0 }, "user": { "uid": 0, "gid": 0 }, "args": [ "sh" ], "env": [ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "TERM=xterm" ], "cwd": "/", "capabilities": [ "CAP_AUDIT_WRITE", "CAP_KILL", "CAP_NET_BIND_SERVICE" ], "rlimits": [ { "type": "RLIMIT_NOFILE", "hard": 1024, "soft": 1024 } ], "noNewPrivileges": true }, "root": { "path": "rootfs", "readonly": true }, "hostname": "runc", "mounts": [ { "destination": "/proc", "type": "proc", "source": "proc" }, { "destination": "/dev", "type": "tmpfs", "source": "tmpfs", "options": [ "nosuid", "strictatime", "mode=755", "size=65536k" ] }, { "destination": "/dev/pts", "type": "devpts", "source": "devpts", "options": [ "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5" ] }, { "destination": "/dev/shm", "type": "tmpfs", "source": "shm", "options": [ "nosuid", "noexec", "nodev", "mode=1777", "size=65536k" ] }, { "destination": "/dev/mqueue", "type": "mqueue", "source": "mqueue", "options": [ "nosuid", "noexec", "nodev" ] }, { "destination": "/sys", "type": "sysfs", "source": "sysfs", "options": [ "nosuid", "noexec", "nodev", "ro" ] }, { "destination": "/sys/fs/cgroup", "type": "cgroup", "source": "cgroup", "options": [ "nosuid", "noexec", "nodev", "relatime", "ro" ] } ], "hooks": {}, "linux": { "resources": { "devices": [ { "allow": false, "access": "rwm" } ] }, "namespaces": [ { "type": "pid" }, { "type": "network" }, { "type": "ipc" }, { "type": "uts" }, { "type": "mount" } ], "maskedPaths": [ "/proc/kcore", "/proc/latency_stats", "/proc/timer_list", "/proc/timer_stats", "/proc/sched_debug", "/sys/firmware" ], "readonlyPaths": [ "/proc/asound", "/proc/bus", "/proc/fs", "/proc/irq", "/proc/sys", "/proc/sysrq-trigger" ] } }root@ubuntu:~/docker/container#
root@ubuntu:~/docker/container# ls rootfs/ bin dev etc home proc root sys tmp usr var root@ubuntu:~/docker/container# docker-runc run simplebusybox / # ls bin dev etc home proc root sys tmp usr var / # hostname runc / #
/ # hostname runc / # ip a 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue qlen 1000 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 inet 127.0.0.1/8 scope host lo valid_lft forever preferred_lft forever inet6 ::1/128 scope host valid_lft forever preferred_lft forever
root@ubuntu:~/docker/container# tree -d ../container ../container └── rootfs ├── bin ├── dev │ ├── pts │ └── shm ├── etc │ └── network │ ├── if-down.d │ ├── if-post-down.d │ ├── if-pre-up.d │ └── if-up.d ├── home ├── proc ├── root ├── sys ├── tmp ├── usr │ └── sbin └── var ├── spool │ └── mail └── www 22 directories root@ubuntu:~/docker/container#
root@ubuntu:/home/ubuntu# runc list load container simplebusybox: json: cannot unmarshal string into Go struct field State.init_process_start of type uint64 ID PID STATUS BUNDLE CREATED OWNER root@ubuntu:/home/ubuntu# docker-runc list ID PID STATUS BUNDLE CREATED simplebusybox 1837 running /root/docker/container 2020-10-10T02:05:29.166335818Z root@ubuntu:/home/ubuntu#
root@ubuntu:~/docker/container# docker-runc run simplebusybox / # ps aux PID USER TIME COMMAND 1 root 0:00 sh 7 root 0:00 ps aux / #
root@ubuntu:/home/ubuntu# docker ps -a CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 5fb4a5304212 busybox "sh" 22 minutes ago Created nervous_noyce
root@ubuntu:~/docker/container# docker-runc run mycontainer / # runc --help sh: runc: not found / # ls bin dev etc home proc root sys tmp usr var / #
root@ubuntu:/home/ubuntu# ps -elf | grep docker 4 S root 2506 1 0 80 0 - 1059410 futex_ 10:26 ? 00:00:01 /usr/bin/dockerd 4 S root 3690 1212 0 80 0 - 132537 futex_ 10:41 pts/0 00:00:00 docker-runc run mycontainer 0 S root 3874 1876 0 80 0 - 1097 pipe_w 10:43 pts/2 00:00:00 grep --color=auto docker
没有containerd 和shim
containerd
root@ubuntu:~/docker/container# docker run -d busybox sleep 100 801dd825ef221189e2844b4ad3012b6b99b4a1483562c825d0e37d182fb39496 root@ubuntu:~/docker/container#
dockerd
dockerd本身实属是对容器相关操作的api的最上层封装,直接面向操作用户。
containerd
dockerd实际真实调用的还是containerd的api接口(rpc方式实现),containerd是dockerd和runc之间的一个中间交流组件。
docker-shim
docker-shim是一个真实运行的容器的真实垫片载体,每启动一个容器都会起一个新的docker-shim的一个进程,
他直接通过指定的三个参数:容器id,boundle目录(containerd的对应某个容器生成的目录,一般位于:/var/run/docker/libcontainerd/containerID),
运行是二进制(默认为runc)来调用runc的api创建一个容器(比如创建容器:最后拼装的命令如下:runc create 。。。。。)
runc
runc是一个命令行工具端,他根据oci(开放容器组织)的标准来创建和运行容器。
他们之间的关系如下图:
docker 配置
docker-init
我们都知道UNIX系统中,1号进程是init进程,也是所有孤儿进程的父进程。而使用docker时,如果不加 --init
参数,容器中的1号进程 就是所给的ENTRYPOINT,例如下面例子中的 sh
。而加上 --init
之后,1号进程就会是 init:
jiajun@ubuntu:~$ docker run -it busybox sh / # ps aux PID USER TIME COMMAND 1 root 0:00 sh 6 root 0:00 ps aux / # exit jiajun@ubuntu:~$ docker run -it --init busybox sh / # ps aux PID USER TIME COMMAND 1 root 0:00 /dev/init -- sh 6 root 0:00 sh 7 root 0:00 ps aux / # exit
root@ubuntu:/home/ubuntu# ps -elf | grep docker 4 S root 2506 1 0 80 0 - 653820 futex_ 10:26 ? 00:00:00 /usr/bin/dockerd 0 S root 3045 1212 0 80 0 - 391104 futex_ 10:27 pts/0 00:00:00 docker run -it busybox sh 0 S root 3099 17232 0 80 0 - 27055 futex_ 10:27 ? 00:00:00 containerd-shim -namespace moby -workdir /var/lib/containerd/io.containerd.runtime.v1.linux/moby/6729d7c5d3117307259a3848628d4a5e73fc20a0d19b644332bf2e1b9d18071e -address /run/containerd/containerd.sock -containerd-binary /usr/bin/containerd -runtime-root /var/run/docker/runtime-runc 0 S root 3186 1876 0 80 0 - 1097 pipe_w 10:27 pts/2 00:00:00 grep --color=auto docker root@ubuntu:/home/ubuntu#
网络
host侧
nat规则
docker-proxy
我猜测这个是用来做端口映射的,因为——名字里有proxy嘛,还能用来干啥,因此就验证一下:
jiajun@ubuntu:~$ docker run -d -p 10010:10010 busybox sleep 10000 be88279118ad7f8cfd3d418db00872aa4f3b1753278b67c28727f16d68f37ae5 jiajun@ubuntu:~$ docker ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES be88279118ad busybox "sleep 10000" 2 seconds ago Up 1 second 0.0.0.0:10010->10010/tcp awesome_jackson jiajun@ubuntu:~$ ps aux | grep docker root 897 0.1 3.8 736592 78444 ? Ssl 06:20 0:33 /usr/bin/dockerd -H fd:// root 1188 0.0 1.8 665876 37964 ? Ssl 06:20 0:25 docker-containerd --config /var/run/docker/containerd/containerd.toml root 5579 0.0 0.1 378868 3076 ? Sl 14:57 0:00 /usr/bin/docker-proxy -proto tcp -host-ip 0.0.0.0 -host-port 10010 -container-ip 172.17.0.2 -container-port 10010 root 5585 0.0 0.1 7376 3808 ? Sl 14:57 0:00 docker-containerd-shim -namespace moby -workdir /var/lib/docker/containerd/daemon/io.containerd.runtime.v1.linux/moby/be88279118ad7f8cfd3d418db00872aa4f3b1753278b67c28727f16d68f37ae5 -address /var/run/docker/containerd/docker-containerd.sock -containerd-binary /usr/bin/docker-containerd -runtime-root /var/run/docker/runtime-runc jiajun 5666 0.0 0.0 13136 1076 pts/0 S+ 14:57 0:00 grep --color=auto docker
可以看到这么一行 /usr/bin/docker-proxy -proto tcp -host-ip 0.0.0.0 -host-port 10010 -container-ip 172.17.0.2 -container-port 10010
,其底层是使用iptables来完成的,参考:https://windsock.io/the-docker-proxy/。
root@ubuntu:/home/ubuntu# ps -elf | grep docker 4 S root 2506 1 0 80 0 - 1004111 futex_ 10:26 ? 00:00:01 /usr/bin/dockerd 4 S root 3538 2506 0 80 0 - 137439 futex_ 10:36 ? 00:00:00 /usr/bin/docker-proxy -proto tcp -host-ip 0.0.0.0 -host-port 10010 -container-ip 172.17.0.2 -container-port 10010 0 S root 3547 17232 0 80 0 - 27439 futex_ 10:36 ? 00:00:00 containerd-shim -namespace moby -workdir /var/lib/containerd/io.containerd.runtime.v1.linux/moby/51deea7da623be0d0fb6a26fdcd883e7c5c9630f26852f140cd3a2ebebf13d6f -address /run/containerd/containerd.sock -containerd-binary /usr/bin/containerd -runtime-root /var/run/docker/runtime-runc 0 S root 3687 1876 0 80 0 - 1097 pipe_w 10:37 pts/2 00:00:00 grep --color=auto docker root@ubuntu:/home/ubuntu# iptables -nvL -t nat Chain PREROUTING (policy ACCEPT 115 packets, 5340 bytes) pkts bytes target prot opt in out source destination 2 190 DOCKER all -- * * 0.0.0.0/0 0.0.0.0/0 ADDRTYPE match dst-type LOCAL Chain INPUT (policy ACCEPT 5 packets, 940 bytes) pkts bytes target prot opt in out source destination Chain OUTPUT (policy ACCEPT 0 packets, 0 bytes) pkts bytes target prot opt in out source destination 0 0 DOCKER all -- * * 0.0.0.0/0 !127.0.0.0/8 ADDRTYPE match dst-type LOCAL Chain POSTROUTING (policy ACCEPT 0 packets, 0 bytes) pkts bytes target prot opt in out source destination 1 84 MASQUERADE all -- * !docker0 172.17.0.0/16 0.0.0.0/0 9 554 MASQUERADE all -- * enahisic2i0 10.10.104.83 0.0.0.0/0 0 0 MASQUERADE tcp -- * * 172.17.0.2 172.17.0.2 tcp dpt:10010 Chain DOCKER (2 references) pkts bytes target prot opt in out source destination 1 84 RETURN all -- docker0 * 0.0.0.0/0 0.0.0.0/0 0 0 DNAT tcp -- !docker0 * 0.0.0.0/0 0.0.0.0/0 tcp dpt:10010 to:172.17.0.2:10010 root@ubuntu:/home/ubuntu#