概念
系統總線(System Bus)
系統總線是用來連接計算機硬件系統中若干主要部件(如:CPU、主存、I/O模塊)的總線。Intel公司新推出的芯片組中,對系統總線賦予了特定的含義,把CPU連接到北橋芯片的總線稱為系統總線,也稱為處理器總線,或叫前端總線(Front Side Bus)。CPU通過前端總線(FSB)連接到北橋芯片,進而通過北橋芯片和內存、顯卡交換數據。在系統總線上傳輸的有數據、地址和控制信息(控制信息包括:命令/定時/總線請求/總線允許/中斷請求/中斷允許/……等)。所以把系統總線也分成三組傳輸線:數據線、地址線、控制線。有時也把它們分別稱為:數據總線、地址總線、控制總線。
數據結構
Memory Control hub
設置MMCONFIG區域
DeviceClass --- DeviceState --- device_type_info
PCIDeviceClass --- PCIDevice --- pci_device_type_info
nullptr --- MCHPCIState --- mch_info
北橋
DeviceClass --- DeviceState --- device_type_info
SysBusDeviceClass --- SysBusDevice --- sysbus_device_type_info
PCIHostBridgeClass --- PCIHostState --- pci_host_type_info
nullptr --- PCIExpressHost --- pcie_host_type_info
nullptr --- Q35PCIHost --- q35_host_info
其中 main_system_bus 是系統總線,在pc_q35_init中初始化的q35_host是北橋芯片
北橋芯片連接內存、顯存等高速緩存
總線
BusClass --- BusState --- bus_info
PCIBusClass --- PCIBus --- pci_bus_info
nullptr --- nullptr --- pcie_bus_info
設備
DeviceClass --- DeviceState --- device_type_info
PCIDeviceClass --- PCIDevice --- pci_device_type_info
橋
DeviceClass --- DeviceState --- device_type_info
PCIDeviceClass --- PCIDevice --- pci_device_type_info
nullptr --- PCIBridge --- pci_bridge_type_info
PCIE
PCI Express Root Port
DeviceClass --- DeviceState --- device_type_info
PCIDeviceClass --- PCIDevice --- pci_device_type_info
nullptr --- PCIBridge --- pci_bridge_type_info
nullptr --- PCIEPort --- pcie_port_type_info
nullptr --- PCIESlot --- pcie_slot_type_info
PCIERootPortClass --- nullptr --- rp_info # 設置配置空間寫函數為rp_write_config,PCIDeviceClass的realize為rp_realize
nullptr --- nullptr --- ioh3420_info # 設置廠商標識
PCI Upstream Port
DeviceClass --- DeviceState --- device_type_info
PCIDeviceClass --- PCIDevice --- pci_device_type_info
nullptr --- PCIBridge --- pci_bridge_type_info
nullptr --- PCIEPort --- pcie_port_type_info
nullptr --- nullptr --- xio3130_upstream_info # 設置廠商標識,設置配置空間寫函數為xio3130_upstream_write_config,PCIDeviceClass的realize為xio3130_upstream_realize
PCI Downstream Port
DeviceClass --- DeviceState --- device_type_info
PCIDeviceClass --- PCIDevice --- pci_device_type_info
nullptr --- PCIBridge --- pci_bridge_type_info
nullptr --- PCIEPort --- pcie_port_type_info
nullptr --- PCIESlot --- pcie_slot_type_info
nullptr --- nullptr --- xio3130_downstream_info # 設置廠商標識,設置配置空間寫函數為xio3130_downstream_write_config,PCIDeviceClass的realize為xio3130_downstream_realize
函數分析
PCIDevice *pci_create_simple_multifunction(PCIBus *bus, int devfn,
bool multifunction,
const char *name);
在bus上掛載為name的設備
devfn為設備的功能號(可以為負數,表示總線自動分配),一般通過如下宏計算:
#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
因此通過該宏算出的 devfn 是一個8bit的數字,高5bit為slot號,低3bit位func號
pci 設備初始化過程,先調用pci_qdev_realize(DeviceClass中的realize),,在調用PCIDeviceClass中的realize
先調用pci_qdev_realize:主要是建立分配配置空間,以及設備的配置空間默認讀寫函數:pci_default_read_config、pci_default_write_config
PCIDeviceClass中的realize主要是(以網卡為例:e1000e_pci_realize):調用pci_register_bar關聯配置空間中的BAR與MemoryRegion(地址空間)
對配置空間的操作是通過0xcf8 和 0xcfc 端口實現的。0xcf8指定地址,0xcfc指定讀取或寫入的值
這兩個地址的映射是通過
static void q35_host_realize(DeviceState *dev, Error **errp)
{
sysbus_add_io(sbd, MCH_HOST_BRIDGE_CONFIG_ADDR, &pci->conf_mem);
sysbus_init_ioports(sbd, MCH_HOST_BRIDGE_CONFIG_ADDR, 4);
sysbus_add_io(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, &pci->data_mem);
sysbus_init_ioports(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, 4);
}
和
static void q35_host_initfn(Object *obj)
{
memory_region_init_io(&phb->conf_mem, obj, &pci_host_conf_le_ops, phb,
"pci-conf-idx", 4);
memory_region_init_io(&phb->data_mem, obj, &pci_host_data_le_ops, phb,
"pci-conf-data", 4);
}
其指定了對應端口的操作集合:pci_host_conf_le_ops、pci_host_data_le_ops
mmconfig的設置
PCIE總線是通過內存訪問PCIE設備的配置地址空間的,這段地址空間的開始是MMCONFIG。可以通過 cat /proc/iomem 找到該地址的開始地址,然后ioremap/unremap,訪問。
在QEMU中,實現是通過MCH,映射到的。首先在TYPE_PCIE_HOST_BRIDGE中的對象初始函數pcie_host_init初始化MMCONFIG這段內存空間(主要是初始化該MemoryRegion的ops函數集合為pcie_mmcfg_ops,也就是該段空間的讀寫函數)。然后在TYPE_MCH_PCI_DEVICE中的config_write(mch_write_config)函數中,將該地址空間映射到物理地址空間。
自定義PCIE設備
#include "qemu/osdep.h"
#include "hw/pci/msi.h"
#include "stdio.h"
#include "qemu/timer.h"
#define TYPE_MYHW "my_hw"
#define MYHW(obj) OBJECT_CHECK(MYHWState,(obj),TYPE_MYHW)
typedef struct MYHWState{
PCIDevice parent_obj;
MemoryRegion mmio;
QEMUTimer my_timer;
bool irq_raise;
}MYHWState;
static uint64_t my_hw_mmio_read(void *opaque, hwaddr addr, unsigned size)
{
printf("my_hw_mmio_read: addr:0x%lx , size: %d\n",addr,size);
return 0;
}
static void my_timer_func(void *opaque)
{
MYHWState *myhw = opaque;
printf("my_timer_func\n");
timer_mod(&myhw->my_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 2000);
if(!myhw->irq_raise){
pci_set_irq(&myhw->parent_obj,1);
myhw->irq_raise = true;
}
}
static void my_hw_mmio_write(void *opaque, hwaddr addr,
uint64_t val, unsigned size)
{
MYHWState *myhw = opaque;
printf("my_hw_mmio_write: addr:0x%lx , val 0x%lx\n",addr,val);
switch(addr){
case 0x10:
printf("timer run\n");
myhw->irq_raise = false;
timer_init_ms(&myhw->my_timer,QEMU_CLOCK_VIRTUAL,my_timer_func,myhw);
timer_mod(&myhw->my_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 2000);
break;
case 0x20:
printf("del timer\n");
timer_del(&myhw->my_timer);
break;
case 0x30:
myhw->irq_raise = false;
pci_set_irq(&myhw->parent_obj,0);
break;
default:
printf("unkown opcode\n");
break;
}
}
static const MemoryRegionOps mmio_ops = {
.read = my_hw_mmio_read,
.write = my_hw_mmio_write,
};
static void my_hw_write_config(PCIDevice *pci_dev, uint32_t address,
uint32_t val, int len)
{
pci_default_write_config(pci_dev, address, val, len);
}
static uint32_t my_hw_read_config(PCIDevice *pci_dev, uint32_t address,
int len)
{
return pci_default_read_config(pci_dev,address,len);
}
static void myhw_pci_realize(PCIDevice *pci_dev,Error **errp)
{
MYHWState *s = MYHW(pci_dev);
pci_dev->config_write = my_hw_write_config;
pci_dev->config_read = my_hw_read_config;
pci_dev->config[PCI_INTERRUPT_PIN] = 1;
memory_region_init_io(&s->mmio,OBJECT(s),&mmio_ops,s,"MY_HW-mmio",1024*1024);
pci_register_bar(pci_dev, 0,PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio);
}
static void myhw_class_init(ObjectClass *class,void *data)
{
DeviceClass *dc = DEVICE_CLASS(class);
PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
c->realize = myhw_pci_realize;
c->vendor_id = 0x4c52;
c->device_id = 0x6c72;
c->revision = 0;
c->class_id = PCI_CLASS_NOT_DEFINED;
dc->desc = "Renli & lizhixi";
}
static void myhw_instance_init(Object *obj)
{
}
static const TypeInfo myhw_info = {
.name = TYPE_MYHW,
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(MYHWState),
.class_init = myhw_class_init,
.instance_init = myhw_instance_init,
.interfaces = (InterfaceInfo[]) {
{ INTERFACE_PCIE_DEVICE },
{ }
},
};
static void myhw_register_types(void)
{
type_register_static(&myhw_info);
}
type_init(myhw_register_types);
對應的驅動程序
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/init.h>
#define MY_PCI_VENDOR_ID 0x4c52
#define MY_PCI_DEVICE_ID 0x6c72
#define MY_PCI_REVISION_ID 0x0
static struct pci_device_id ids[]={
{ PCI_DEVICE(MY_PCI_VENDOR_ID,MY_PCI_DEVICE_ID), },
{ 0, }
};
static struct my_pci_info{
struct pci_dev *dev;
void __iomem *address_io;
}pci_info;
MODULE_DEVICE_TABLE(pci,ids);
static irqreturn_t my_pci_irq_handler(int irq,void *dev_id)
{
struct my_pci_info *pci_info = dev_id;
*((uint8_t *)pci_info->address_io + 0x30) = 0x01;
printk("my pci:receive irq\n");
return 0;
}
//return 0 means success
static int probe(struct pci_dev *dev,const struct pci_device_id *id)
{
int bar = 0;
int ret;
resource_size_t len;
ret = pci_enable_device(dev);
if(ret) return ret;
len = pci_resource_len(dev,bar);
pci_info.address_io = pci_iomap(dev,bar,len);
pci_info.dev = dev;
//interrupt
ret = request_irq(dev->irq,my_pci_irq_handler,IRQF_SHARED,"my_pci",&pci_info);
if(ret){
printk("request IRQ failed\n");
return 1;
}
*((uint8_t *)pci_info.address_io+0x10) = 0x01;
return 0;
}
static void remove(struct pci_dev *dev)
{
*((uint8_t *)pci_info.address_io+0x20) = 0x01;
free_irq(dev->irq,&pci_info);
pci_iounmap(dev,pci_info.address_io);
pci_disable_device(dev);
}
static struct pci_driver pci_driver = {
.name = "my_hw",
.id_table = ids,
.probe = probe,
.remove = remove,
};
static int __init my_pci_init(void)
{
return pci_register_driver(&pci_driver);
}
static void __exit my_pci_exit(void)
{
pci_unregister_driver(&pci_driver);
}
MODULE_LICENSE("GPL");
module_init(my_pci_init);
module_exit(my_pci_exit);
可以通過lspci獲得myhw的BAR地址,然后用ioremap和iounmap對該BAR指向的地址進行讀寫
Tips
MMCONFIG:PCIE設備的總的配置空間位置
資料
Qemu X86架構的Machine Type
https://remimin.github.io/2019/07/09/qemu_machine_type/
KVM虛擬機代碼揭秘——QEMU的PCI總線與設備(上)
https://blog.csdn.net/yearn520/article/details/6576875
KVM虛擬機代碼揭秘——QEMU的PCI總線與設備(下)
https://blog.csdn.net/yearn520/article/details/6577988
QEMU學習筆記——Q35
https://www.binss.me/blog/qemu-note-of-Q35-machine/
PCI設備的創建與初始化
https://github.com/GiantVM/doc/blob/master/pci.md
概念術語
http://media.njude.com.cn/vclass/Courses/15201A/CourseDetail.aspx?id=4612&name=概念術語
PCI EXPRESS GUIDELINES
https://github.com/qemu/qemu/blob/master/docs/pcie.txt
在qemu中增加pci設備並用linux驅動驗證
https://blog.csdn.net/XscKernel/article/details/8298195
qemu PCI edu.c
https://github.com/qemu/qemu/blob/v2.7.0/hw/misc/edu.c
https://github.com/qemu/qemu/blob/v2.7.0/docs/specs/edu.txt
淺談Linux PCI設備驅動