綁定前網卡驅動 [root@localhost ~]# ethtool -i enp5s0 driver: hinic version: firmware-version: expansion-rom-version: bus-info: 0000:05:00.0 supports-statistics: no supports-test: no supports-eeprom-access: no supports-register-dump: no supports-priv-flags: no [root@localhost ~]#
安裝ovs遇到一個問題那就是ovs version和dpdk version不匹配,華為鯤鵬920上安裝的dpdk是dpdk-19.11
ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev
[root@localhost ~]# ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk options:dpdk-devargs=0000:05:00.0
ovs-vsctl: Error detected while setting up 'dpdk1': could not open network device dpdk1 (Address family not supported by protocol). See ovs-vswitchd log for details.
ovs-vsctl: The default log directory is "/var/log/openvswitch".
[root@localhost ~]# ovs-vsctl show
bb6aa323-f96b-4872-98f8-7aa87fc99958
Bridge "br0"
datapath_type: netdev
Port "dpdk1"
Interface "dpdk1"
type: dpdk
options: {dpdk-devargs="0000:05:00.0"}
error: "could not open network device dpdk1 (Address family not supported by protocol)"
Port "br0"
Interface "br0"
type: internal
[root@localhost ~]#
查看網卡與dpdk綁定狀態
[root@localhost dpdk-19.11]# ./usertools/dpdk-devbind.py --status Network devices using DPDK-compatible driver ============================================ 0000:05:00.0 'Hi1822 Family (2*25GE) 0200' drv=vfio-pci unused=hinic 0000:06:00.0 'Hi1822 Family (2*25GE) 0200' drv=vfio-pci unused=hinic Network devices using kernel driver =================================== 0000:7d:00.0 'HNS GE/10GE/25GE RDMA Network Controller a222' if=enp125s0f0 drv=hns3 unused=hns_roce_hw_v2,vfio-pci *Active* 0000:7d:00.1 'HNS GE/10GE/25GE Network Controller a221' if=enp125s0f1 drv=hns3 unused=vfio-pci 0000:7d:00.2 'HNS GE/10GE/25GE RDMA Network Controller a222' if=enp125s0f2 drv=hns3 unused=hns_roce_hw_v2,vfio-pci 0000:7d:00.3 'HNS GE/10GE/25GE Network Controller a221' if=enp125s0f3 drv=hns3 unused=vfio-pci No 'Baseband' devices detected ============================== No 'Crypto' devices detected ============================ No 'Eventdev' devices detected ============================== No 'Mempool' devices detected ============================= No 'Compress' devices detected ============================== No 'Misc (rawdev)' devices detected =================================== [root@localhost dpdk-19.11]#
編譯dpdk
修改配置文件config/common_linuxapp,這里我們測試vhost模式,所以需要把下面兩個配置項,配置為yes。 CONFIG_RTE_BUILD_COMBINE_LIBS=y CONFIG_RTE_LIBRTE_VHOST=y
[root@localhost dpdk-19.11]# grep -n CONFIG_RTE_LIBRTE_VHOST config/common_base
1030:CONFIG_RTE_LIBRTE_VHOST=y
1031:CONFIG_RTE_LIBRTE_VHOST_NUMA=n
1032:CONFIG_RTE_LIBRTE_VHOST_DEBUG=n
[root@localhost dpdk-19.11]# mkdir -p /usr/src/dpdk [root@localhost dpdk-19.11]# make install T=arm64-armv8a-linuxapp-gcc DESTDIR=/usr/src/dpdk -j 64
將dpdk安裝到當前系統
make install T=arm64-armv8a-linuxapp-gcc DESTDIR=/usr -j 64
安裝完成后可以使用dpdk的命令了
[root@localhost ~]# dpdk-devbind --status
Network devices using DPDK-compatible driver
============================================
0000:05:00.0 'Hi1822 Family (2*25GE) 0200' drv=vfio-pci unused=hinic
0000:06:00.0 'Hi1822 Family (2*25GE) 0200' drv=vfio-pci unused=hinic
Network devices using kernel driver
===================================
0000:7d:00.0 'HNS GE/10GE/25GE RDMA Network Controller a222' if=enp125s0f0 drv=hns3 unused=hns_roce_hw_v2,vfio-pci *Active*
0000:7d:00.1 'HNS GE/10GE/25GE Network Controller a221' if=enp125s0f1 drv=hns3 unused=vfio-pci
0000:7d:00.2 'HNS GE/10GE/25GE RDMA Network Controller a222' if=enp125s0f2 drv=hns3 unused=hns_roce_hw_v2,vfio-pci
0000:7d:00.3 'HNS GE/10GE/25GE Network Controller a221' if=enp125s0f3 drv=hns3 unused=vfio-pci
No 'Baseband' devices detected
==============================
No 'Crypto' devices detected
============================
No 'Eventdev' devices detected
==============================
No 'Mempool' devices detected
=============================
No 'Compress' devices detected
==============================
No 'Misc (rawdev)' devices detected
===================================
[root@localhost ~]# cd /usr/
[root@localhost usr]# find ./ -name dpdk*
./sbin/dpdk-devbind
./bin/dpdk-test-compress-perf
./bin/dpdk-pdump
./bin/dpdk-test-crypto-perf
./bin/dpdk-pmdinfo
./bin/dpdk-test-eventdev
./bin/dpdk-procinfo
./src/dpdk
./src/dpdk/sbin/dpdk-devbind
./src/dpdk/bin/dpdk-test-compress-perf
./src/dpdk/bin/dpdk-pdump
./src/dpdk/bin/dpdk-test-crypto-perf
./src/dpdk/bin/dpdk-pmdinfo
./src/dpdk/bin/dpdk-test-eventdev
./src/dpdk/bin/dpdk-procinfo
./src/dpdk/lib/modules/4.14.0-115.el7a.0.1.aarch64/extra/dpdk
./src/dpdk/include/dpdk
./src/dpdk/share/dpdk
./src/dpdk/share/dpdk/usertools/dpdk-pmdinfo.py
./src/dpdk/share/dpdk/usertools/dpdk-devbind.py
./src/dpdk/share/dpdk/usertools/dpdk-telemetry-client.py
./src/dpdk/share/dpdk/usertools/dpdk-setup.sh
./src/dpdk/share/dpdk/arm64-armv8a-linuxapp-gcc/app/dpdk-pmdinfogen
./lib/modules/4.14.0-115.el7a.0.1.aarch64/extra/dpdk
./include/dpdk
./share/dpdk
./share/dpdk/usertools/dpdk-pmdinfo.py
./share/dpdk/usertools/dpdk-devbind.py
./share/dpdk/usertools/dpdk-telemetry-client.py
./share/dpdk/usertools/dpdk-setup.sh
./share/dpdk/arm64-armv8a-linuxapp-gcc/app/dpdk-pmdinfogen
[root@localhost usr]# dpdk-devbind -u 0000:05:00.0 0000:06:00.0
編譯ovs
[root@localhost data1]# mkdir -p ~/rpmbuild/SOURCES [root@localhost data1]# cp openvswitch-2.13.1 ~/rpmbuild/SOURCES/ -r [root@localhost data1]#
[root@localhost SOURCES]# python3 -m pip install --upgrade pip Collecting pip Downloading pip-20.3.3-py2.py3-none-any.whl (1.5 MB) |████████████████████████████████| 1.5 MB 5.9 MB/s Installing collected packages: pip Attempting uninstall: pip Found existing installation: pip 20.2.4 Uninstalling pip-20.2.4: Successfully uninstalled pip-20.2.4 Successfully installed pip-20.3.3 [root@localhost SOURCES]# pip3 install sphinx -i https://pypi.tuna.tsinghua.edu.cn/simpl
無法安裝openvswitch-2.13.1-1
[root@localhost SOURCES]# rpmbuild -bb --without check openvswitch-2.13.1/rhel/openvswitch.spec error: Failed build dependencies: python3-sphinx is needed by openvswitch-2.13.1-1.aarch64 unwind-devel is needed by openvswitch-2.13.1-1.aarch64
[root@localhost openvswitch-2.12.0]# ls acinclude.m4 config.log Documentation Makefile ovsdb tests windows aclocal.m4 config.status include Makefile.am package.m4 third-party xenserver appveyor.yml configure ipsec Makefile.in poc tutorial AUTHORS.rst configure.ac lib manpages.mk python utilities boot.sh CONTRIBUTING.rst libtool NEWS README.rst Vagrantfile build-aux datapath LICENSE NOTICE rhel Vagrantfile-FreeBSD config.h datapath-windows m4 ofproto selinux vswitchd config.h.in debian MAINTAINERS.rst ovn stamp-h1 vtep [root@localhost openvswitch-2.12.0]# ./configure --with-dpdk=/usr/src/dpdk --prefix=/usr --sysconfdir=/etc --localstatedir=/var
[root@localhost SOURCES]# pwd /root/rpmbuild/SOURCES [root@localhost SOURCES]# rpmbuild -bb --without check openvswitch-2.12.0/rhel/openvswitch.spec
[root@localhost ~]# ls /etc/openvswitch/
system-id.conf
[root@localhost ~]# ovsdb-tool create /etc/openvswitch/conf.db /usr/share/openvswitch/vswitch.ovsschema [root@localhost ~]# ls /etc/openvswitch/ conf.db system-id.conf [root@localhost ~]#
ovs啟動 :
ovs.sh
#啟動ovsdb server ovsdb-server /etc/openvswitch/conf.db \ -vconsole:emer -vsyslog:err -vfile:info \ --remote=punix:/var/run/openvswitch/db.sock \ --private-key=db:Open_vSwitch,SSL,private_key \ --certificate=db:Open_vSwitch,SSL,certificate \ --bootstrap-ca-cert=db:Open_vSwitch,SSL,ca_cert --no-chdir \ --log-file=/var/log/openvswitch/ovsdb-server.log \ --pidfile=/var/run/openvswitch/ovsdb-server.pid \ --detach --monitor #第一次啟動ovs需要初始化 ovs-vsctl --no-wait init #從ovs-v2.7.0開始,開啟dpdk功能已不是vswitchd進程啟動時指定–dpdk等參數了,而是通過設置ovsdb來開啟dpdk功能 ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-init=true #啟動vswitchd進程 ovs-vswitchd unix:/var/run/openvswitch/db.sock \ -vconsole:emer -vsyslog:err -vfile:info --mlockall --no-chdir \ --log-file=/var/log/openvswitch/ovs-vswitchd.log \ --pidfile=/var/run/openvswitch/ovs-vswitchd.pid \ --detach --monitor
What DPDK version does each Open vSwitch release work with?
A: The following table lists the DPDK version against which the given versions of Open vSwitch will successfully build.
Open vSwitch DPDK 2.2.x 1.6 2.3.x 1.6 2.4.x 2.0 2.5.x 2.2 2.6.x 16.07.2 2.7.x 16.11.9 2.8.x 17.05.2 2.9.x 17.11.4 2.10.x 17.11.4 2.11.x 18.11.6 2.12.x 18.11.6 2.13.x 19.11.0
原來是各個版本的ovs有對應的dpdk,當前ovs 版本是2.12,dpdk是19.11。所以要下載一個2.13版本的ovs
2.13需要更python版本>3.4,先安裝python
wget https://www.python.org/ftp/python/3.7.0/Python-3.7.0a1.tar.xz
要安裝python3.3.7,
Python-3.7.0a1.tar.xz有問題
tar -xvf Python-3.7.0a1.tar.xz ls cd Python-3.7.0a1 ls ./configure --prefix=/usr/local/python3 make && make install yum install libffi-devel make && make install ln -s /usr/local/python3/bin/python3 /usr/local/bin/python3 ln -s /usr/local/python3/bin/pip3 /usr/local/bin/pip3 python3 -V pip3 -V
[root@localhost ~]# python3 -V
Python 3.7.0a1
[root@localhost ~]# pip3 -V
pip 9.0.1 from /usr/local/python3/lib/python3.7/site-packages (python 3.7)
[root@localhost ~]#
rte_eth_pcap.c:(.text+0x6b0): undefined reference to `pcap_dump_close' rte_eth_pcap.c:(.text+0x6c4): undefined reference to `pcap_close' rte_eth_pcap.c:(.text+0x700): undefined reference to `pcap_close' rte_eth_pcap.c:(.text+0x790): undefined reference to `pcap_close' /usr/src/dpdk/lib/librte_pmd_pcap.a(rte_eth_pcap.o): In function `open_rx_pcap': rte_eth_pcap.c:(.text+0x15b0): undefined reference to `pcap_open_offline' rte_eth_pcap.c:(.text+0x1624): undefined reference to `pcap_close' /usr/src/dpdk/lib/librte_pmd_pcap.a(rte_eth_pcap.o): In function `open_rx_tx_iface': rte_eth_pcap.c:(.text+0x1d98): undefined reference to `pcap_open_live' /usr/src/dpdk/lib/librte_pmd_pcap.a(rte_eth_pcap.o): In function `open_tx_iface': rte_eth_pcap.c:(.text+0x1e44): undefined reference to `pcap_open_live' rte_eth_pcap.c:(.text+0x1ee4): undefined reference to `pcap_close' /usr/src/dpdk/lib/librte_pmd_pcap.a(rte_eth_pcap.o): In function `open_tx_pcap': rte_eth_pcap.c:(.text+0x21ac): undefined reference to `pcap_open_dead' rte_eth_pcap.c:(.text+0x21bc): undefined reference to `pcap_dump_open' rte_eth_pcap.c:(.text+0x21cc): undefined reference to `pcap_close' rte_eth_pcap.c:(.text+0x2214): undefined reference to `pcap_dump_close' rte_eth_pcap.c:(.text+0x2230): undefined reference to `pcap_close' /usr/src/dpdk/lib/librte_pmd_pcap.a(rte_eth_pcap.o): In function `rx_iface_args_process': rte_eth_pcap.c:(.text+0x2304): undefined reference to `pcap_open_live' rte_eth_pcap.c:(.text+0x237c): undefined reference to `pcap_setdirection' rte_eth_pcap.c:(.text+0x23dc): undefined reference to `pcap_geterr' rte_eth_pcap.c:(.text+0x2464): undefined reference to `pcap_close' /usr/src/dpdk/lib/librte_pmd_pcap.a(rte_eth_pcap.o): In function `eth_dev_start': rte_eth_pcap.c:(.text+0x3230): undefined reference to `pcap_open_dead' rte_eth_pcap.c:(.text+0x3244): undefined reference to `pcap_dump_open' rte_eth_pcap.c:(.text+0x3254): undefined reference to `pcap_close' rte_eth_pcap.c:(.text+0x3300): undefined reference to `pcap_open_live' rte_eth_pcap.c:(.text+0x33dc): undefined reference to `pcap_open_live' rte_eth_pcap.c:(.text+0x33fc): undefined reference to `pcap_open_offline' rte_eth_pcap.c:(.text+0x345c): undefined reference to `pcap_open_live' rte_eth_pcap.c:(.text+0x3548): undefined reference to `pcap_close' /usr/src/dpdk/lib/librte_pmd_pcap.a(rte_eth_pcap.o): In function `eth_rx_queue_setup': rte_eth_pcap.c:(.text+0x3614): undefined reference to `pcap_next' rte_eth_pcap.c:(.text+0x3628): undefined reference to `pcap_close' rte_eth_pcap.c:(.text+0x3640): undefined reference to `pcap_open_offline' rte_eth_pcap.c:(.text+0x36ec): undefined reference to `pcap_next' /usr/src/dpdk/lib/librte_pmd_pcap.a(rte_eth_pcap.o): In function `eth_pcap_rx': rte_eth_pcap.c:(.text+0x4490): undefined reference to `pcap_next' /usr/src/dpdk/lib/librte_pmd_pcap.a(rte_eth_pcap.o): In function `eth_pcap_tx': rte_eth_pcap.c:(.text+0x4de8): undefined reference to `pcap_sendpacket' /usr/src/dpdk/lib/librte_pmd_pcap.a(rte_eth_pcap.o): In function `eth_pcap_tx_dumper': rte_eth_pcap.c:(.text+0x5588): undefined reference to `pcap_dump' rte_eth_pcap.c:(.text+0x5750): undefined reference to `pcap_dump_flush
更改 arm64-armv8a-linuxapp-gcc/.config
將
CONFIG_RTE_LIBRTE_PMD_PCAP=y CONFIG_RTE_LIBRTE_PDUMP=y
改為
CONFIG_RTE_LIBRTE_PMD_PCAP=n CONFIG_RTE_LIBRTE_PDUMP=n
https://docs.mellanox.com/pages/releaseview.action?pageId=15053908
./boot.sh
./configure \
--with-dpdk=/usr/src/dpdk \
--prefix=/usr \
--exec-prefix=/usr \
--sysconfdir=/etc \
--localstatedir=/var
make -j 64
make install -j 64
mkdir -p /etc/openvswitch
mkdir -p /var/run/openvswitch
[root@localhost ovs-master]# vsdb-tool create /etc/openvswitch/conf.db /usr/share/openvswitch/vswitch.ovsschema -bash: vsdb-tool: command not found [root@localhost ovs-master]# ovsdb-tool create /etc/openvswitch/conf.db /usr/share/openvswitch/vswitch.ovsschema [root@localhost ovs-master]# cd - /root [root@localhost ~]# bash ovs.sh
[root@localhost ~]# ovs-appctl --version
ovs-appctl (Open vSwitch) 2.13.90 [root@localhost ~]# ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev [root@localhost ~]# ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk options:dpdk-devargs=0000:05:00.0 [root@localhost ~]# ovs-vsctl show 073f49bf-8e93-417b-9b46-e4431099f662 Bridge br0 datapath_type: netdev Port dpdk1 Interface dpdk1 type: dpdk options: {dpdk-devargs="0000:05:00.0"} Port br0 Interface br0 type: internal [root@localhost ~]#
[root@localhost ~]# dpdk-devbind -s Network devices using DPDK-compatible driver ============================================ 0000:05:00.0 'Hi1822 Family (2*25GE) 0200' drv=vfio-pci unused=hinic 0000:06:00.0 'Hi1822 Family (2*25GE) 0200' drv=vfio-pci unused=hinic Network devices using kernel driver =================================== 0000:7d:00.0 'HNS GE/10GE/25GE RDMA Network Controller a222' if=enp125s0f0 drv=hns3 unused=hns_roce_hw_v2,vfio-pci *Active* 0000:7d:00.1 'HNS GE/10GE/25GE Network Controller a221' if=enp125s0f1 drv=hns3 unused=vfio-pci 0000:7d:00.2 'HNS GE/10GE/25GE RDMA Network Controller a222' if=enp125s0f2 drv=hns3 unused=hns_roce_hw_v2,vfio-pci 0000:7d:00.3 'HNS GE/10GE/25GE Network Controller a221' if=enp125s0f3 drv=hns3 unused=vfio-pci No 'Baseband' devices detected ============================== No 'Crypto' devices detected ============================ No 'Eventdev' devices detected ============================== No 'Mempool' devices detected ============================= No 'Compress' devices detected ============================== No 'Misc (rawdev)' devices detected =================================== [root@localhost ~]#
[root@localhost ~]# ovs-vsctl show 073f49bf-8e93-417b-9b46-e4431099f662 Bridge br0 datapath_type: netdev Port dpdk1 Interface dpdk1 type: dpdk options: {dpdk-devargs="0000:05:00.0"} Port br0 Interface br0 type: internal [root@localhost ~]# ip a
11: ovs-netdev: <BROADCAST,MULTICAST,PROMISC> mtu 1500 qdisc noop state DOWN group default qlen 1000
link/ether 76:ac:03:ea:46:dc brd ff:ff:ff:ff:ff:ff
12: br0: <BROADCAST,MULTICAST,PROMISC> mtu 1500 qdisc noop state DOWN group default qlen 1000
link/ether 44:a1:91:a4:9b:eb brd ff:ff:ff:ff:ff:ff
[root@localhost ~]# lshw -c network -businfo
Bus info Device Class Description
========================================================
pci@0000:05:00.0 network Hi1822 Family (2*25GE)
pci@0000:06:00.0 network Hi1822 Family (2*25GE)
pci@0000:7d:00.0 enp125s0f0 network HNS GE/10GE/25GE RDMA Network Controller
pci@0000:7d:00.1 enp125s0f1 network HNS GE/10GE/25GE Network Controller
pci@0000:7d:00.2 enp125s0f2 network HNS GE/10GE/25GE RDMA Network Controller
pci@0000:7d:00.3 enp125s0f3 network HNS GE/10GE/25GE Network Controller
ovs-netdev network Ethernet interface
br0 network Ethernet interfac
[root@localhost ovs]# ovsdb-server /etc/openvswitch/conf.db --remote=punix:/var/run/openvswitch/db.sock --private-key=db:Open_vSwitch,SSL,private_key --certificate=db:Open_vSwitch,SSL,certificate --bootstrap-ca-cert=db:Open_vSwitch,SSL,ca_cert --no-chdir --log-file=/var/log/openvswitch/ovsdb-server.log --pidfile=/var/run/openvswitch/ovsdb-server.pid --detach --monitor 2020-12-17T01:15:33Z|00001|vlog|INFO|opened log file /var/log/openvswitch/ovsdb-server.log [root@localhost ovs]# ps -elf | grep ovsdb-server 1 S root 10307 1 0 80 0 - 294 do_wai 20:15 ? 00:00:00 ovsdb-server: monitoring pid 10308 (healthy) 1 S root 10308 10307 0 80 0 - 302 poll_s 20:15 ? 00:00:00 ovsdb-server /etc/openvswitch/conf.db --remote=punix:/var/run/openvswitch/db.sock --private-key=db:Open_vSwitch,SSL,private_key --certificate=db:Open_vSwitch,SSL,certificate --bootstrap-ca-cert=db:Open_vSwitch,SSL,ca_cert --no-chdir --log-file=/var/log/openvswitch/ovsdb-server.log --pidfile=/var/run/openvswitch/ovsdb-server.pid --detach --monitor 0 S root 10310 10236 0 80 0 - 1729 pipe_w 20:15 pts/0 00:00:00 grep --color=auto ovsdb-server [root@localhost ovs]#
[root@localhost ovs]# ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-init=true [root@localhost ovs]# ovs-vswitchd unix:/var/run/openvswitch/db.sock \ > -vconsole:emer -vsyslog:err -vfile:info --mlockall --no-chdir \ > --log-file=/var/log/openvswitch/ovs-vswitchd.log \ > --pidfile=/var/run/openvswitch/ovs-vswitchd.pid \ > --detach --monitor [root@localhost ovs]#
參考https://docs.mellanox.com/pages/releaseview.action?pageId=15053908
[root@localhost ovs]# ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-socket-mem="2048,0" [root@localhost ovs]# ovs-vsctl --no-wait set Open_vSwitch . other_config:pmd-cpu-mask=0xe [root@localhost ovs]# ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev [root@localhost ovs]# ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk options:dpdk-devargs=0000:07:00.0,n_rxq_desc=1024,n_txq_desc=1024,n_rxq=1,pmd-rxq-affinity="0:1" ofport_request=1 ovs-vsctl: Error detected while setting up 'dpdk0': Error attaching device '0000:07:00.0,n_rxq_desc=1024,n_txq_desc=1024,n_rxq=1,pmd-rxq-affinity=0:1' to DPDK. See ovs-vswitchd log for details. ovs-vsctl: The default log directory is "/var/log/openvswitch". [root@localhost ovs]# lspci | grep -i ehre [root@localhost ovs]# lspci | grep -i ether 05:00.0 Ethernet controller: Huawei Technologies Co., Ltd. Hi1822 Family (2*25GE) (rev 45) 06:00.0 Ethernet controller: Huawei Technologies Co., Ltd. Hi1822 Family (2*25GE) (rev 45) 7d:00.0 Ethernet controller: Huawei Technologies Co., Ltd. HNS GE/10GE/25GE RDMA Network Controller (rev 21) 7d:00.1 Ethernet controller: Huawei Technologies Co., Ltd. HNS GE/10GE/25GE Network Controller (rev 21) 7d:00.2 Ethernet controller: Huawei Technologies Co., Ltd. HNS GE/10GE/25GE RDMA Network Controller (rev 21) 7d:00.3 Ethernet controller: Huawei Technologies Co., Ltd. HNS GE/10GE/25GE Network Controller (rev 21) [root@localhost ovs]# ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk options:dpdk-devargs=0000:05:00.0,n_rxq_desc=1024,n_txq_desc=1024,n_rxq=1,pmd-rxq-affinity="0:1" ofport_request=1 ovs-vsctl: cannot create a port named dpdk0 because a port named dpdk0 already exists on bridge br0 [root@localhost ovs]# ovs-vsctl del-port br0 dpdk0 [root@localhost ovs]# ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk options:dpdk-devargs=0000:05:00.0,n_rxq_desc=1024,n_txq_desc=1024,n_rxq=1,pmd-rxq-affinity="0:1" ofport_request=1 ovs-vsctl: Error detected while setting up 'dpdk0': Error attaching device '0000:05:00.0,n_rxq_desc=1024,n_txq_desc=1024,n_rxq=1,pmd-rxq-affinity=0:1' to DPDK. See ovs-vswitchd log for details. ovs-vsctl: The default log directory is "/var/log/openvswitch". [root@localhost ovs]#
2020-12-17T01:21:12.026Z|00122|dpdk|INFO|EAL: Releasing pci mapped resource for 0000:05:00.0 2020-12-17T01:21:12.026Z|00123|dpdk|INFO|EAL: Calling pci_unmap_resource for 0000:05:00.0 at 0x8300260000 2020-12-17T01:21:12.026Z|00124|dpdk|INFO|EAL: Calling pci_unmap_resource for 0000:05:00.0 at 0x8300290000 2020-12-17T01:21:12.183Z|00125|dpdk|ERR|EAL: Driver cannot attach the device (0000:05:00.0) 2020-12-17T01:21:12.183Z|00126|dpdk|ERR|EAL: Failed to attach device on primary process 2020-12-17T01:21:12.183Z|00127|netdev_dpdk|WARN|Error attaching device '0000:05:00.0,n_rxq_desc=1024,n_txq_desc=1024,n_rxq=1,pmd-rxq-affinity=0:1' to DPDK 2020-12-17T01:21:12.183Z|00128|netdev|WARN|dpdk0: could not set configuration (Invalid argument) 2020-12-17T01:21:12.183Z|00129|dpdk|ERR|Invalid port_id=32
[root@localhost ovs]# ovs-vsctl get Open_vSwitch . dpdk_initialized true [root@localhost ovs]#
[root@localhost ovs]# ovs-vsctl show
c013fe69-c1a7-40dd-833b-bef8cd04d43e
Bridge br0
datapath_type: netdev
Port dpdk0
Interface dpdk0
type: dpdk
options: {dpdk-devargs="class=eth,mac=44:a1:91:a4:9b:eb"}
error: "Error attaching device 'class=eth,mac=44:a1:91:a4:9b:eb' to DPDK"
Port br0
Interface br0
type: internal
[root@localhost ovs]# ovs-vsctl del-port br0 dpdk0
[root@localhost ovs]# ovs-vsctl show
c013fe69-c1a7-40dd-833b-bef8cd04d43e
Bridge br0
datapath_type: netdev
Port br0
Interface br0
type: internal
[root@localhost ovs]#
原來是鯤鵬服務器的網卡出問題了,重啟服務器解決
[root@localhost ovs]# ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev ovs-vsctl: cannot create a bridge named br0 because a bridge named br0 already exists [root@localhost ovs]# ovs-vsctl show c013fe69-c1a7-40dd-833b-bef8cd04d43e Bridge br0 datapath_type: netdev Port br0 Interface br0 type: internal [root@localhost ovs]# ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk options:dpdk-devargs=0000:05:00.0 [root@localhost ovs]#
centots下安裝qemu5.1
yum -y install libcap-ng-devel
yum -y install librbd\*
target/arm/translate-a64.c:12998: int rmode= -1;
[root@localhost ovs]# bash kp_succ.sh qemu-system-aarch64: -chardev socket,id=char1,path=/var/run/openvswitch/dpdk1: Failed to connect to '/var/run/openvswitch/dpdk1': No such file or directory
沒有添加虛擬端口 [root@localhost ovs]# ls /var/run/openvswitch/ br0.mgmt br0.snoop db.sock ovsdb-server.14326.ctl ovsdb-server.pid ovs-vswitchd.14338.ctl ovs-vswitchd.pid [root@localhost ovs]# ovs-vsctl add-port br0 vhost-user1 -- set Interface vhost-user1 type=dpdkvhostuser [root@localhost ovs]# ls /var/run/openvswitch/ br0.mgmt br0.snoop db.sock ovsdb-server.14326.ctl ovsdb-server.pid ovs-vswitchd.14338.ctl ovs-vswitchd.pid vhost-user1 [root@localhost ovs]#
[root@localhost dpdk-19.11]# ovs-vsctl list open_vswitch _uuid : c013fe69-c1a7-40dd-833b-bef8cd04d43e bridges : [c4b37097-436c-4c07-9f0f-8979354dcd93] cur_cfg : 90 datapath_types : [netdev, system] datapaths : {} db_version : [] dpdk_initialized : true dpdk_version : "DPDK 19.11.0" external_ids : {} iface_types : [dpdk, dpdkr, dpdkvhostuser, dpdkvhostuserclient, erspan, geneve, gre, internal, ip6erspan, ip6gre, lisp, patch, stt, system, tap, vxlan] manager_options : [] next_cfg : 90 other_config : {dpdk-init="true", dpdk-socket-mem="2048,2048", pmd-cpu-mask="0xe"} ovs_version : [] ssl : [] statistics : {} system_type : [] system_version : []
-object memory-backend-file,id=mem,size=6144M,mem-path=/dev/hugepages,share=on \
qemu-system-aarch64: total memory for NUMA nodes (0x180000000) should equal RAM size (0x100000000) [root@localhost ovs]#
[root@localhost ovs]# numactl --hardware available: 4 nodes (0-3) node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 node 0 size: 130055 MB node 0 free: 94853 MB node 1 cpus: 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 node 1 size: 130937 MB node 1 free: 97351 MB node 2 cpus: 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 node 2 size: 130937 MB node 2 free: 89758 MB node 3 cpus: 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 node 3 size: 130935 MB node 3 free: 87078 MB node distances: node 0 1 2 3 0: 10 16 32 33 1: 16 10 25 32 2: 32 25 10 16 3: 33 32 16 10 [root@localhost ovs]#
qemu-system-aarch64: -device virtio-net-pci,netdev=mynet1,mac=00:00:00:00:00:01,mrg_rxbuf=off: failed to find romfile "efi-virtio.rom"
Installing /data1/qemu5.1/qemu/pc-bios/efi-eepro100.rom to /usr/share/qemu/qemu Installing /data1/qemu5.1/qemu/pc-bios/efi-ne2k_pci.rom to /usr/share/qemu/qemu Installing /data1/qemu5.1/qemu/pc-bios/efi-pcnet.rom to /usr/share/qemu/qemu Installing /data1/qemu5.1/qemu/pc-bios/efi-rtl8139.rom to /usr/share/qemu/qemu Installing /data1/qemu5.1/qemu/pc-bios/efi-virtio.rom to /usr/share/qemu/qemu Installing /data1/qemu5.1/qemu/pc-bios/efi-e1000e.rom to /usr/share/qemu/qemu Installing /data1/qemu5.1/qemu/pc-bios/efi-vmxnet3.rom to /usr/share/qemu/qemu Installing /data1/qemu5.1/qemu/pc-bios/qemu-nsis.bmp to /usr/share/qemu/qemu
ln -s /usr/share/qemu/qemu/ /usr/local/share/qemu/qemu
qemu-system-aarch64 -name vm2 -nographic \ -enable-kvm -M virt,usb=off -cpu host -smp 2 -m 4096 \ -global virtio-blk-device.scsi=off \ -device virtio-scsi-device,id=scsi \ -kernel vmlinuz-4.18 --append "/lib/systemd/systemd console=ttyAMA0 root=UUID=6a09973e-e8fd-4a6d-a8c0-1deb9556f477" \ -initrd initramfs-4.18 \ -drive file=vhuser-test1.qcow2 \ -m 2048M -numa node,memdev=mem -mem-prealloc \ -object memory-backend-file,id=mem,size=2048M,mem-path=/dev/hugepages,share=on \ -chardev socket,id=char1,path=$VHOST_SOCK_DIR/vhost-user1 \ -netdev type=vhost-user,id=mynet1,chardev=char1,vhostforce \ -device virtio-net-pci,netdev=mynet1,mac=00:00:00:00:00:01,mrg_rxbuf=off \
Adding vhost-user ports to the guest (QEMU)¶ To begin, you must attach the vhost-user device sockets to the guest. To do this, you must pass the following parameters to QEMU: -chardev socket,id=char1,path=/usr/local/var/run/openvswitch/vhost-user-1 -netdev type=vhost-user,id=mynet1,chardev=char1,vhostforce -device virtio-net-pci,mac=00:00:00:00:00:01,netdev=mynet1 where vhost-user-1 is the name of the vhost-user port added to the switch. Repeat the above parameters for multiple devices, changing the chardev path and id as necessary. Note that a separate and different chardev path needs to be specified for each vhost-user device.
For example you have a second vhost-user port named vhost-user-2, you append your QEMU command line with an additional set of parameters: -chardev socket,id=char2,path=/usr/local/var/run/openvswitch/vhost-user-2 -netdev type=vhost-user,id=mynet2,chardev=char2,vhostforce -device virtio-net-pci,mac=00:00:00:00:00:02,netdev=mynet2 In addition, QEMU must allocate the VM’s memory on hugetlbfs. vhost-user ports access a virtio-net device’s virtual rings and packet buffers mapping the VM’s physical memory on hugetlbfs.
To enable vhost-user ports to map the VM’s memory into their process address space, pass the following parameters to QEMU: -object memory-backend-file,id=mem,size=4096M,mem-path=/dev/hugepages,share=on -numa node,memdev=mem -mem-prealloc Finally, you may wish to enable multiqueue support. This is optional but, should you wish to enable it, run: -chardev socket,id=char2,path=/usr/local/var/run/openvswitch/vhost-user-2 -netdev type=vhost-user,id=mynet2,chardev=char2,vhostforce,queues=$q -device virtio-net-pci,mac=00:00:00:00:00:02,netdev=mynet2,mq=on,vectors=$v
[root@localhost ovs]# lsof -nn | grep vhost-user1 | awk '{print $1}' | uniq ovs-vswit eal-intr- rte_mp_ha dpdk_watc urcu2 ct_clean3 ipf_clean handler13 handler14 handler15 handler16 handler17 handler18 handler19 handler20 handler21 handler22 revalidat vhost-eve pmd-c03/i pmd-c01/i pmd-c02/i
沒有 名稱為vhost-$pid vhost工作進程 ,其中pid為QEMU的進程號
[root@localhost ovs]# ps -elf | grep vhost- 2 S root 114707 114706 1 80 0 - 62390 poll_s 21:38 pts/1 00:00:10 qemu-system-aarch64 -name vm2 -nographic -enable-kvm -M virt,usb=off -cpu host -smp 2 -m 4096 -global virtio-blk-device.scsi=off -device virtio-scsi-device,id=scsi -kernel vmlinuz-4.18 --append console=ttyAMA0 root=UUID=6a09973e-e8fd-4a6d-a8c0-1deb9556f477 -initrd initramfs-4.18 -drive file=vhuser-test1.qcow2 -m 2048M -numa node,memdev=mem -mem-prealloc -object memory-backend-file,id=mem,size=2048M,mem-path=/dev/hugepages,share=on -chardev socket,id=char1,path=/var/run/openvswitch/vhost-user1 -netdev type=vhost-user,id=mynet1,chardev=char1,vhostforce -device virtio-net-pci,netdev=mynet1,mac=00:00:00:00:00:01,mrg_rxbuf=off 0 S root 114948 113424 0 80 0 - 1729 pipe_w 21:51 pts/0 00:00:00 grep --color=auto vhost- [root@localhost ovs]# ps -elf | grep qemu 2 S root 114707 114706 1 80 0 - 62390 poll_s 21:38 pts/1 00:00:11 qemu-system-aarch64 -name vm2 -nographic -enable-kvm -M virt,usb=off -cpu host -smp 2 -m 4096 -global virtio-blk-device.scsi=off -device virtio-scsi-device,id=scsi -kernel vmlinuz-4.18 --append console=ttyAMA0 root=UUID=6a09973e-e8fd-4a6d-a8c0-1deb9556f477 -initrd initramfs-4.18 -drive file=vhuser-test1.qcow2 -m 2048M -numa node,memdev=mem -mem-prealloc -object memory-backend-file,id=mem,size=2048M,mem-path=/dev/hugepages,share=on -chardev socket,id=char1,path=/var/run/openvswitch/vhost-user1 -netdev type=vhost-user,id=mynet1,chardev=char1,vhostforce -device virtio-net-pci,netdev=mynet1,mac=00:00:00:00:00:01,mrg_rxbuf=off 0 S root 114959 113424 0 80 0 - 1729 pipe_w 21:51 pts/0 00:00:00 grep --color=auto qemu [root@localhost ovs]# ps -elf | grep 114707 2 S root 114707 114706 1 80 0 - 62390 poll_s 21:38 pts/1 00:00:11 qemu-system-aarch64 -name vm2 -nographic -enable-kvm -M virt,usb=off -cpu host -smp 2 -m 4096 -global virtio-blk-device.scsi=off -device virtio-scsi-device,id=scsi -kernel vmlinuz-4.18 --append console=ttyAMA0 root=UUID=6a09973e-e8fd-4a6d-a8c0-1deb9556f477 -initrd initramfs-4.18 -drive file=vhuser-test1.qcow2 -m 2048M -numa node,memdev=mem -mem-prealloc -object memory-backend-file,id=mem,size=2048M,mem-path=/dev/hugepages,share=on -chardev socket,id=char1,path=/var/run/openvswitch/vhost-user1 -netdev type=vhost-user,id=mynet1,chardev=char1,vhostforce -device virtio-net-pci,netdev=mynet1,mac=00:00:00:00:00:01,mrg_rxbuf=off 0 S root 114963 113424 0 80 0 - 1729 pipe_w 21:51 pts/0 00:00:00 grep --color=auto 114707 [root@localhost ovs]#
[root@localhost ovs]# ps -eLo pid,psr,comm | grep pmd 13435 3 pmd-c03/id:133 13435 1 pmd-c01/id:134 13435 2 pmd-c02/id:135
軟件版本
[root@localhost ovs]# gcc --version gcc (GCC) 5.4.0 Copyright (C) 2015 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. [root@localhost ovs]# qemu-system-aarch -version -bash: qemu-system-aarch: command not found [root@localhost ovs]# qemu-system-aarch64 -version QEMU emulator version 5.2.0 (v5.2.0-dirty) Copyright (c) 2003-2020 Fabrice Bellard and the QEMU Project developers [root@localhost ovs]# g++ --version g++ (GCC) 5.4.0 Copyright (C) 2015 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. [root@localhost ovs]#
test case 2
vm
qemu-system-aarch64 -name vm2 -nographic \ -enable-kvm -M virt,usb=off -cpu host -smp 2 -m 4096 \ -global virtio-blk-device.scsi=off \ -device virtio-scsi-device,id=scsi \ -kernel vmlinuz-4.18 --append "console=ttyAMA0 root=UUID=6a09973e-e8fd-4a6d-a8c0-1deb9556f477" \ -initrd initramfs-4.18 \ -drive file=vhuser-test1.qcow2 \ -m 2048M -numa node,memdev=mem -mem-prealloc \ -object memory-backend-file,id=mem,size=2048M,mem-path=/dev/hugepages,share=on \ -chardev socket,id=char1,path=/tmp/vhost1,server \ -netdev type=vhost-user,id=mynet1,chardev=char1,vhostforce \ -device virtio-net-pci,netdev=mynet1,mac=00:00:00:00:00:01,mrg_rxbuf=off \
[root@localhost ~]# ip a 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 inet 127.0.0.1/8 scope host lo valid_lft forever preferred_lft forever inet6 ::1/128 scope host valid_lft forever preferred_lft forever 2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000 link/ether 00:00:00:00:00:01 brd ff:ff:ff:ff:ff:ff inet6 fe80::200:ff:fe00:1/64 scope link valid_lft forever preferred_lft forever [root@localhost ~]# ip a add 10.10.103.229/24 dev eth0 [root@localhost ~]# ip a add 10.10.104.229/24 dev eth0 [root@localhost ~]# ping 10.10.104.81 PING 10.10.104.81 (10.10.104.81) 56(84) bytes of data. 64 bytes from 10.10.104.81: icmp_seq=1 ttl=64 time=0.473 ms 64 bytes from 10.10.104.81: icmp_seq=2 ttl=64 time=0.262 ms 64 bytes from 10.10.104.81: icmp_seq=3 ttl=64 time=0.248 ms 64 bytes from 10.10.104.81: icmp_seq=4 ttl=64 time=0.248 ms 64 bytes from 10.10.104.81: icmp_seq=5 ttl=64 time=0.302 ms 64 bytes from 10.10.104.81: icmp_seq=6 ttl=64 time=0.276 ms 64 bytes from 10.10.104.81: icmp_seq=7 ttl=64 time=0.273 ms 64 bytes from 10.10.104.81: icmp_seq=8 ttl=64 time=0.243 ms 64 bytes from 10.10.104.81: icmp_seq=9 ttl=64 time=0.272 ms 64 bytes from 10.10.104.81: icmp_seq=10 ttl=64 time=0.279 ms 64 bytes from 10.10.104.81: icmp_seq=11 ttl=64 time=0.221 ms 64 bytes from 10.10.104.81: icmp_seq=12 ttl=64 time=0.214 ms 64 bytes from 10.10.104.81: icmp_seq=13 ttl=64 time=0.259 ms 64 bytes from 10.10.104.81: icmp_seq=14 ttl=64 time=0.268 ms --- 10.10.104.81 ping statistics --- 14 packets transmitted, 14 received, 0% packet loss, time 13021ms rtt min/avg/max/mdev = 0.214/0.274/0.473/0.060 ms [root@localhost ~]#
swtich
https://github.com/magnate3/LearningSwitch-DPDK
build/app/switch -c 3 -m 256
源碼分析
netdev_dpdk_vhost_construct
2021-02-02T02:22:33.857Z|00147|dpdk|INFO|VHOST_CONFIG: Linear buffers requested without external buffers, disabling host segmentation offloading support 2021-02-02T02:22:33.857Z|00148|dpdk|INFO|VHOST_CONFIG: vhost-user server: socket created, fd: 156 2021-02-02T02:22:33.857Z|00149|netdev_dpdk|INFO|Socket /var/run/openvswitch/vhost-user1 created for vhost-user port vhost-user1 2021-02-02T02:22:33.858Z|00150|dpdk|ERR|VHOST_CONFIG: failed to bind to /var/run/openvswitch/vhost-user1: Address already in use; remove it and try again 2021-02-02T02:22:33.858Z|00151|netdev_dpdk|ERR|rte_vhost_driver_start failed for vhost user port: vhost-user1 2021-02-02T02:22:33.858Z|00152|bridge|WARN|could not open network device vhost-user1 (Unknown error -1)
lib/librte_vhost/socket.c
2021-02-02T02:22:33.857Z|00148|dpdk|INFO|VHOST_CONFIG: vhost-user server: socket created, fd: 156
創建unix socket server
create_unix_socket(struct vhost_user_socket *vsocket) { int fd; struct sockaddr_un *un = &vsocket->un; fd = socket(AF_UNIX, SOCK_STREAM, 0); if (fd < 0) return -1; RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n", vsocket->is_server ? "server" : "client", fd); if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) { RTE_LOG(ERR, VHOST_CONFIG, "vhost-user: can't set nonblocking mode for socket, fd: " "%d (%s)\n", fd, strerror(errno)); close(fd); return -1; } memset(un, 0, sizeof(*un)); un->sun_family = AF_UNIX; strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path)); un->sun_path[sizeof(un->sun_path) - 1] = '\0'; vsocket->socket_fd = fd; return 0; }
vhost_user_start_server
lib/librte_vhost/socket.c
2021-02-02T02:23:14.571Z|00183|dpdk|INFO|VHOST_CONFIG: bind to /var/run/openvswitch/vhost-user1
static int vhost_user_start_server(struct vhost_user_socket *vsocket) { int ret; int fd = vsocket->socket_fd; const char *path = vsocket->path; /* * bind () may fail if the socket file with the same name already * exists. But the library obviously should not delete the file * provided by the user, since we can not be sure that it is not * being used by other applications. Moreover, many applications form * socket names based on user input, which is prone to errors. * * The user must ensure that the socket does not exist before * registering the vhost driver in server mode. */ ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un)); if (ret < 0) { RTE_LOG(ERR, VHOST_CONFIG, "failed to bind to %s: %s; remove it and try again\n", path, strerror(errno)); goto err; } RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path); ret = listen(fd, MAX_VIRTIO_BACKLOG); if (ret < 0) goto err; ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection, NULL, vsocket); if (ret < 0) { RTE_LOG(ERR, VHOST_CONFIG, "failed to add listen fd %d to vhost server fdset\n", fd); goto err; } return 0; err: close(fd); return -1; }
poll_block
(gdb) bt #0 0x0000ffff8be54dc0 in poll () from /lib64/libc.so.6 #1 0x0000000000436eec in time_poll (pollfds=pollfds@entry=0x3b9a2f30, n_pollfds=2, handles=handles@entry=0x0, timeout_when=9223372036854775807, elapsed=elapsed@entry=0xffffc2b2e07c) at lib/timeval.c:326 #2 0x000000000042c8f0 in poll_block () at lib/poll-loop.c:364 #3 0x000000000040d3a4 in do_vsctl (idl=0x3b962900, n_commands=<optimized out>, commands=0x3b962490, args=0x3b9623f0 "ovs-vsctl add-port br0 vhost-user1 -- set Interface vhost-user1 type=dpdkvhostuser") at utilities/ovs-vsctl.c:2995 #4 main (argc=<optimized out>, argv=<optimized out>) at utilities/ovs-vsctl.c:203
(gdb) bt #0 0x0000ffff8a044dc0 in poll () from /lib64/libc.so.6 #1 0x0000000000436eec in time_poll (pollfds=pollfds@entry=0x3a513360, n_pollfds=2, handles=handles@entry=0x0, timeout_when=9223372036854775807, elapsed=elapsed@entry=0xfffffb48156c) at lib/timeval.c:326 #2 0x000000000042c8f0 in poll_block () at lib/poll-loop.c:364 #3 0x000000000040d3a4 in do_vsctl (idl=0x3a4d2d70, n_commands=<optimized out>, commands=0x3a4d2470, args=0x3a4d23f0 "ovs-vsctl del-port br0 vhost-user1") at utilities/ovs-vsctl.c:2995 #4 main (argc=<optimized out>, argv=<optimized out>) at utilities/ovs-vsctl.c:203 (gdb)
問題
問題1
could not create netdev dpdk1 of unknown type dpdk
ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-init=true
dpdk|ERR|VHOST_CONFIG: failed to bind to /var/run/openvswitch/vhost-user1: Address already in use; remove it and try again
刪掉舊的
rm /var/run/openvswitch/vhost-user1
問題2
[root@localhost ovs]# ovs-vsctl show c013fe69-c1a7-40dd-833b-bef8cd04d43e Bridge br0 datapath_type: netdev Port vhost-user1 Interface vhost-user1 type: dpdkvhostuser error: "could not open network device vhost-user1 (Unknown error -1)" Port dpdk1 Interface dpdk1 type: dpdk options: {dpdk-devargs="0000:05:00.0"} Port br0 Interface br0 type: internal [root@localhost ovs]#
智能網卡
Representors¶
DPDK representors enable configuring a phy port to a guest (VM) machine.
OVS resides in the hypervisor which has one or more physical interfaces also known as the physical functions (PFs). If a PF supports SR-IOV it can be used to enable communication with the VMs via Virtual Functions (VFs). The VFs are virtual PCIe devices created from the physical Ethernet controller.
DPDK models a physical interface as a rte device on top of which an eth device is created. DPDK (version 18.xx) introduced the representors eth devices. A representor device represents the VF eth device (VM side) on the hypervisor side and operates on top of a PF. Representors are multi devices created on top of one PF.
For more information, refer to the DPDK documentation.
Prior to port representors there was a one-to-one relationship between the PF and the eth device. With port representors the relationship becomes one PF to many eth devices. In case of two representors ports, when one of the ports is closed - the PCI bus cannot be detached until the second representor port is closed as well.
When configuring a PF-based port, OVS traditionally assigns the device PCI address in devargs. For an existing bridge called br0
and PCI address 0000:08:00.0
an add-port
command is written as:
$ ovs-vsctl add-port br0 dpdk-pf -- set Interface dpdk-pf type=dpdk \
options:dpdk-devargs=0000:08:00.0
When configuring a VF-based port, DPDK uses an extended devargs syntax which has the following format:
BDBF,representor=[<representor id>]
This syntax shows that a representor is an enumerated eth device (with a representor ID) which uses the PF PCI address. The following commands add representors 3 and 5 using PCI device address 0000:08:00.0
:
$ ovs-vsctl add-port br0 dpdk-rep3 -- set Interface dpdk-rep3 type=dpdk \
options:dpdk-devargs=0000:08:00.0,representor=[3]
$ ovs-vsctl add-port br0 dpdk-rep5 -- set Interface dpdk-rep5 type=dpdk \
options:dpdk-devargs=0000:08:00.0,representor=[5]
Important
Representors ports are configured prior to OVS invocation and independently of it, or by other means as well. Please consult a NIC vendor instructions on how to establish representors.
Intel NICs ixgbe and i40e
In the following example we create one representor on PF address 0000:05:00.0
. Once the NIC is bounded to a DPDK compatible PMD the representor is created:
# echo 1 > /sys/bus/pci/devices/0000\:05\:00.0/max_vfs
Mellanox NICs ConnectX-4, ConnectX-5 and ConnectX-6
In the following example we create two representors on PF address 0000:05:00.0
and net device name enp3s0f0
.
- Ensure SR-IOV is enabled on the system.
Enable IOMMU in Linux by adding intel_iommu=on
to kernel parameters, for example, using GRUB (see /etc/grub/grub.conf).
-
Verify the PF PCI address prior to representors creation:
# lspci | grep Mellanox 05:00.0 Ethernet controller: Mellanox Technologies MT27700 Family [ConnectX-4] 05:00.1 Ethernet controller: Mellanox Technologies MT27700 Family [ConnectX-4]
-
Create the two VFs on the compute node:
# echo 2 > /sys/class/net/enp3s0f0/device/sriov_numvfs
Verify the VFs creation:
# lspci | grep Mellanox 05:00.0 Ethernet controller: Mellanox Technologies MT27700 Family [ConnectX-4] 05:00.1 Ethernet controller: Mellanox Technologies MT27700 Family [ConnectX-4] 05:00.2 Ethernet controller: Mellanox Technologies MT27700 Family [ConnectX-4 Virtual Function] 05:00.3 Ethernet controller: Mellanox Technologies MT27700 Family [ConnectX-4 Virtual Function]
-
Unbind the relevant VFs 0000:05:00.2..0000:05:00.3:
# echo 0000:05:00.2 > /sys/bus/pci/drivers/mlx5_core/unbind # echo 0000:05:00.3 > /sys/bus/pci/drivers/mlx5_core/unbind
-
Change e-switch mode.
The Mellanox NIC has an e-switch on it. Change the e-switch mode from legacy to switchdev using the PF PCI address:
# sudo devlink dev eswitch set pci/0000:05:00.0 mode switchdev
This will create the VF representors network devices in the host OS.
-
After setting the PF to switchdev mode bind back the relevant VFs:
# echo 0000:05:00.2 > /sys/bus/pci/drivers/mlx5_core/bind # echo 0000:05:00.3 > /sys/bus/pci/drivers/mlx5_core/bind
-
Restart Open vSwitch
To verify representors correct configuration, execute:
$ ovs-vsctl show
and make sure no errors are indicated.
Port representors are an example of multi devices. There are NICs which support multi devices by other methods than representors for which a generic devargs syntax is used. The generic syntax is based on the device mac address:
class=eth,mac=<MAC address>
For example, the following command adds a port to a bridge called br0
using an eth device whose mac address is 00:11:22:33:44:55
:
$ ovs-vsctl add-port br0 dpdk-mac -- set Interface dpdk-mac type=dpdk \
options:dpdk-devargs="class=eth,mac=00:11:22:33:44:55"
Representor specific configuration¶
In some topologies, a VF must be configured before being assigned to a guest (VM) machine. This configuration is done through VF-specific fields in the options
column of the Interface
table.
Important
Some DPDK port use bifurcated drivers, which means that a kernel netdevice remains when Open vSwitch is stopped.
In such case, any configuration applied to a VF would remain set on the kernel netdevice, and be inherited from it when Open vSwitch is restarted, even if the options described in this section are unset from Open vSwitch.
-
Configure the VF MAC address:
$ ovs-vsctl set Interface dpdk-rep0 options:dpdk-vf-mac=00:11:22:33:44:55
The requested MAC address is assigned to the port and is listed as part of its options:
$ ovs-appctl dpctl/show
[...]
port 3: dpdk-rep0 (dpdk: configured_rx_queues=1, ..., dpdk-vf-mac=00:11:22:33:44:55, ...)
$ ovs-vsctl show
[...]
Port dpdk-rep0
Interface dpdk-rep0
type: dpdk
options: {dpdk-devargs="<representor devargs>", dpdk-vf-mac="00:11:22:33:44:55"}
$ ovs-vsctl get Interface dpdk-rep0 status
{dpdk-vf-mac="00:11:22:33:44:55", ...}
$ ovs-vsctl list Interface dpdk-rep0 | grep 'mac_in_use\|options'
mac_in_use : "00:11:22:33:44:55"
options : {dpdk-devargs="<representor devargs>", dpdk-vf-mac="00:11:22:33:44:55"}
The value listed as dpdk-vf-mac
is only a request from the user and is possibly not yet applied.
When the requested configuration is successfully applied to the port, this MAC address is then also shown in the column mac_in_use
of the Interface
table. On failure however, mac_in_use
will keep its previous value, which will thus differ from dpdk-vf-mac
.
Open vSwitch* with Data Plane Development Kit: vHost User Multiqueue Configuration and Use
https://software.intel.com/content/www/us/en/develop/articles/configure-vhost-user-multiqueue-for-ovs-with-dpdk.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+ISNMain+%28Intel+Developer+Zone+Articles+Feed%29