iommu_dma_mmap + mmap

本文轉載自查看原文 2020-09-24 15:04 680 os/ pci

https://www.codeleading.com/article/67454397455/

https://blog.csdn.net/weixin_43503508/article/details/107924027

static const struct dma_map_ops iommu_dma_ops = {
    .alloc            = iommu_dma_alloc,
    .free            = iommu_dma_free,
 .mmap = iommu_dma_mmap,
    .get_sgtable        = iommu_dma_get_sgtable,
    .map_page        = iommu_dma_map_page,
    .unmap_page        = iommu_dma_unmap_page,
    .map_sg            = iommu_dma_map_sg,
    .unmap_sg        = iommu_dma_unmap_sg,
    .sync_single_for_cpu    = iommu_dma_sync_single_for_cpu,
    .sync_single_for_device    = iommu_dma_sync_single_for_device,
    .sync_sg_for_cpu    = iommu_dma_sync_sg_for_cpu,
    .sync_sg_for_device    = iommu_dma_sync_sg_for_device,
    .map_resource        = iommu_dma_map_resource,
    .unmap_resource        = iommu_dma_unmap_resource,
    .get_merge_boundary    = iommu_dma_get_merge_boundary,
};

static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
        void *cpu_addr, dma_addr_t dma_addr, size_t size,
        unsigned long attrs)
{
    unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
    unsigned long pfn, off = vma->vm_pgoff;
    int ret;

    vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);

    if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
        return ret;

    if (off >= nr_pages || vma_pages(vma) > nr_pages - off)
        return -ENXIO;

    if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
        struct page **pages = dma_common_find_pages(cpu_addr);

        if (pages)
            return __iommu_dma_mmap(pages, size, vma);
        pfn = vmalloc_to_pfn(cpu_addr);
    } else {
        pfn = page_to_pfn(virt_to_page(cpu_addr));
    }

    return remap_pfn_range(vma, vma->vm_start, pfn + off,
                   vma->vm_end - vma->vm_start,
                   vma->vm_page_prot);
}

IOMMU核心框架層

IOMMU核心框架是管理IOMMU設備的一個通過框架，IOMMU設備通過實現特定的回調函數並將自身注冊到IOMMU核心框架中，以此通過IOMMU核心框架提供的API向整個內核提供IOMMU功能。所有的IOMMU設備都嵌入了一個struct iommu_device，iommu的核心代碼只會操作這個結構體。可以看到，我們唯一需要關心的就是ops，這是iommu驅動注冊到core中的回調函數。:

struct iommu_device {
	struct list_head list;
	const struct iommu_ops *ops;
	struct fwnode_handle *fwnode;
	struct device *dev;
};

/**
 * struct iommu_ops - iommu ops and capabilities
 * @capable: check capability
 * @domain_alloc: allocate iommu domain
 * @domain_free: free iommu domain
 * @attach_dev: attach device to an iommu domain
 * @detach_dev: detach device from an iommu domain
 * @map: map a physically contiguous memory region to an iommu domain
 * @unmap: unmap a physically contiguous memory region from an iommu domain
 * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain
 * @tlb_range_add: Add a given iova range to the flush queue for this domain
 * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
 *            queue
 * @iova_to_phys: translate iova to physical address
 * @add_device: add device to iommu grouping
 * @remove_device: remove device from iommu grouping
 * @device_group: find iommu group for a particular device
 * @domain_get_attr: Query domain attributes
 * @domain_set_attr: Change domain attributes
 * @get_resv_regions: Request list of reserved regions for a device
 * @put_resv_regions: Free list of reserved regions for a device
 * @apply_resv_region: Temporary helper call-back for iova reserved ranges
 * @domain_window_enable: Configure and enable a particular window for a domain
 * @domain_window_disable: Disable a particular window for a domain
 * @domain_set_windows: Set the number of windows for a domain
 * @domain_get_windows: Return the number of windows for a domain
 * @of_xlate: add OF master IDs to iommu grouping
 * @pgsize_bitmap: bitmap of all possible supported page sizes
 */

iommu中的核心代碼在drivers/iommu/iommu.c中實現，下面從一些基本的方面分析核心層提供的功能。由於一個運行的系統中只會同時存在幾個iommu設備，因此設備管理實現比較簡單，是由鏈表實現的。

static LIST_HEAD(iommu_device_list);
static DEFINE_SPINLOCK(iommu_device_lock);

注冊和注銷設備實質上就是操作這個鏈表。iommu向外提供的API不多，后面主要分析：

iommu_map && iommu_unmap
iommu_domain_alloc && iommu_domain_free
iommu_attach_device && iommu_detach_device

iommu_domain_alloc

在我的理解中，domain這個詞是從intel的VT-d文檔中繼承下來的，其他平台有各自的叫法如ARM下叫context。一個domain應該是指一個獨立的iommu映射上下文。處於同一個domain中的設備使用同一套映射做地址轉換（對於mmio來說就是獨立的頁表）。core層中使用struct iommu_domain表示一個domain：

struct iommu_domain {
	unsigned type;
	const struct iommu_ops *ops;
	unsigned long pgsize_bitmap;	/* Bitmap of page sizes in use */
	iommu_fault_handler_t handler;
	void *handler_token;
	struct iommu_domain_geometry geometry;
	void *iova_cookie;
};

后面分析各個字段的含義。注釋中提到了內核支持的domain類型：

/*
 * This are the possible domain-types
 *
 *	IOMMU_DOMAIN_BLOCKED	- All DMA is blocked, can be used to isolate
 *				  devices
 *	IOMMU_DOMAIN_IDENTITY	- DMA addresses are system physical addresses
 *	IOMMU_DOMAIN_UNMANAGED	- DMA mappings managed by IOMMU-API user, used
 *				  for VMs
 *	IOMMU_DOMAIN_DMA	- Internally used for DMA-API implementations.
 *				  This flag allows IOMMU drivers to implement
 *				  certain optimizations for these domain
 */

這個函數僅僅就是調用ops中驅動注冊的domain_alloc回調函數分配一個iommu_domain，從這里看書每個驅動也是要提供自己的domain類型並把struct iommu_domain嵌入進取的。

iommu_attach_device

int iommu_attach_device(struct iommu_domain *domain, struct device *dev);

從函數原型中可以看出該函數的操作對象是一個domain和一個設備，聯想函數名稱可以認為該函數是將一個設備添加到一個domain中。但事實上還是有些偏差的，該函數實際上將設備所在的Group與該domain綁定。值得一提的是，如果函數發現設備存在的Group中存在多個設備，則不進行綁定操作。總結下來，該操作針對獨立設備（即所在Group里只有自己），將設備所在Group與domain進行綁定。

	if (iommu_group_device_count(group) != 1)
		goto out_unlock;

	ret = __iommu_attach_group(domain, group);

__iommu_attach_group遍歷Group中所有的設備，並調用__iommu_attach_device。該函數首先通過domain->ops中的is_attach_deffered檢查是否延后進行attach操作。然后調用ops中的attach_dev函數將設備綁定到該domain中去。這里需要注意區分Group中default_domain和domain的概念：domain指group當前所在的domain，而default_domain指Group默認應該在的domain。進行attach操作時，會檢查default_domain是否與domain相同，以此判斷該Group是否已經attach到別的domain上了，在該情況下返回-EBUSY。

iommu_detach_device

該函數與上面的iommu_attach_device幾乎完全相反，並且該函數也是操作獨立設備。這里注意如果Group有自己的default_domain，那么該函數在detach完成之后會重新attach到default_domain上。

iommu_map

int iommu_map(struct iommu_domain *domain, unsigned long iova,
	      phys_addr_t paddr, size_t size, int prot);

函數原型上可以看出來是用於映射domain內的iova，將長度為size以iova為起始地址的iova區域映射到以paddr為起始地址的物理地址。該函數只能用於UNMANAGED類型和DMA類型的domain。domain->pgsize_bitmap是一個bitmap，用於記錄domain支持的最小page大小。iommu_map函數進行操作時，是以page為單位的，page大小不固定可以根據需要使用不同大小的page，在同一次iommu_map操作中也不要求page大小一致。最終一個page的映射是調用iommu->ops中的map回調函數實現的。

iommu_iova_to_phys

該函數調用domain->ops中提供的iova_to_phys回調函數將iova轉換成物理地址。

TODO dma integration

IOMMU Group

啃了兩天PCIE協議，對IOMMU的Group概念也有了一定的理解。從內核角度來看Group是一組設備，是IOMMU可以辨別的最小單位，即IOMMU無法區分出一個Group中的設備。區分標准是什么呢，IO地址空間。以PCIE總線來舉一個例子，我們知道PCIE是一個點對點的協議，如果一個多function設備掛到了一個不支持ACS的bridge下，那么這兩個function可以通過該bridge進行通信。這樣的通信直接由bridge進行轉發而無需通過Root Complex，自然也就無需通過IOMMU。這種情況下，這兩個function的IOVA無法完全通過IOMMU隔離開，所以他們需要分到同一個Group中。同一個Group的設備應該是公用一個domain的。

struct iommu_group {
	struct kobject kobj;
	struct kobject *devices_kobj;
	struct list_head devices;
	struct mutex mutex;
	struct blocking_notifier_head notifier;
	void *iommu_data;
	void (*iommu_data_release)(void *iommu_data);
	char *name;
	int id;
	struct iommu_domain *default_domain;
	struct iommu_domain *domain;
};

從iommu_group的結構中可以發現，devices列表保存group中設備。一個group需要關聯兩個iommu_domain，除此之外支持內核中其他組件向該group中注冊listener。default_domain保存的是默認該設備應該位於的domain，而domain字段保存設備當前所在的domain。

iommu_group_add_device

該函數將一個設備加入一個Group，函數的主要操作如下：

處理sysfs相關的事項，如建立iommu_group符號鏈接
將設備的iommu_group字段設置為這個Group
調用iommu_group_create_direct_mappings建立設備的iova映射
將設備加入到Group內的list里
通知所有注冊到Group里的listener有設備加入

TODO: iommu_group_create_direct_mappings

iommu_group_get_for_dev

該函數獲取一個設備所在的Group，如果設備不屬於任何一個Group，則調用IOMMU驅動提供的device_group回調函數嘗試進行獲取。

	group = iommu_group_get(dev);
	if (group)
		return group;

	if (!ops)
		return ERR_PTR(-EINVAL);

	group = ops->device_group(dev);

隨后，為獲取的Group設置domain，最后將設備加入Group。

	if (!group->default_domain) {
		struct iommu_domain *dom;

		dom = __iommu_domain_alloc(dev->bus, iommu_def_domain_type);
		if (!dom && iommu_def_domain_type != IOMMU_DOMAIN_DMA) {
			dev_warn(dev,
				 "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA",
				 iommu_def_domain_type);
			dom = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_DMA);
		}

		group->default_domain = dom;
		if (!group->domain)
			group->domain = dom;
	}

Bus integration

每一個struct device中保存了一個struct iommu_group的指針，用以獲取該設備處於的group。除此之外，內核需要其他方式將iommu功能集成到總線中。很明顯，一個iommu設備是作用於一個或者多個總線上的，那么就需要一個自然的方式管理與iommu相關的功能。首先明確struct bus中存在一個iommu_ops用於保存當前bus上生效的iommu驅動注冊的iommu_ops。同時可以根據這個指針是否為NULL確認這個bus中是否支持iommu功能。

iommu核心框架中提供了bus_set_iommu函數，該函數可以被iommu驅動調用，用以將自身掛入到對應總線中。函數中除了設置iommu_ops指針之外，還進行了兩個工作：

向bus中注冊一個listener：對於bus上設備的插入與移除的設備，調用iommu_ops中對應的add_device和remove_device回調函數。對於bus接收到的其他設備事件（如bind，unbind等），則將其傳播給該設備所處於的group中。
對於bus中已經存在的設備，則挨個調用add_device將其納入iommu的管轄，並設置其group

iommu_fwspec

struct iommu_fwspec {
	const struct iommu_ops	*ops;
	struct fwnode_handle	*iommu_fwnode;
	void			*iommu_priv;
	unsigned int		num_ids;
	u32			ids[1];
};

IOMMU Domain

每一個domain即代表一個iommu映射地址空間，即一個page table。一個Group邏輯上是需要與domain進行綁定的，即一個Group中的所有設備都位於一個domain中。

struct iommu_domain {
	unsigned type;
	const struct iommu_ops *ops;
	unsigned long pgsize_bitmap;	/* Bitmap of page sizes in use */
	iommu_fault_handler_t handler;
	void *handler_token;
	struct iommu_domain_geometry geometry;
	void *iova_cookie;
};

SMMU硬件及驅動分析

看代碼要點：

一定要看文檔，SMMU是一個比較簡單的設備，他的Spec只有300頁
fault分為global和context，global基本上就是smmu本身的一些fault，而context是smmu在進行地址轉換時出現的fault

開始分析代碼，我認為需要從中斷處理入手，這也是錯誤信息的入口。先看context fault的處理函數，代碼不貼了基本就是打出相關的寄存器信息，沒有什么參考意義。我認為有意義的地方是這個中斷是怎么注冊的，即驅動是如何管理io domain的。這就涉及通用的IOMMU框架代碼了。

可以發現arm_smmu_attach_dev函數中將一個設備添加到一個特定的iommu domain中。函數中調用arm_smmu_init_domain_context函數注冊了這個中斷。對於每一個struct device，其內部有一個iommu_group字段保存其所在的Group。

static struct iommu_ops arm_smmu_ops = {
	.capable		= arm_smmu_capable,
	.domain_alloc		= arm_smmu_domain_alloc,
	.domain_free		= arm_smmu_domain_free,
	.attach_dev		= arm_smmu_attach_dev,
	.map			= arm_smmu_map,
	.unmap			= arm_smmu_unmap,
	.flush_iotlb_all	= arm_smmu_iotlb_sync,
	.iotlb_sync		= arm_smmu_iotlb_sync,
	.iova_to_phys		= arm_smmu_iova_to_phys,
	.add_device		= arm_smmu_add_device,
	.remove_device		= arm_smmu_remove_device,
	.device_group		= arm_smmu_device_group,
	.domain_get_attr	= arm_smmu_domain_get_attr,
	.domain_set_attr	= arm_smmu_domain_set_attr,
	.of_xlate		= arm_smmu_of_xlate,
	.get_resv_regions	= arm_smmu_get_resv_regions,
	.put_resv_regions	= arm_smmu_put_resv_regions,
	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
};

由於前面已經分析了IOMMU核心框架，熟悉了IOMMU核心框架如何與IOMMU驅動如何互動。這里分析流程即為以一個設備的IOMMU操作周期為基准分析SMMU驅動向IOMMU核心框架注冊回調函數。

Stream Mapping管理

首先提及一些Spec中定義的名詞：

Steam翻譯成中文是流的意思。在SMMU中特指Master設備向SMMU發起的請求流。StreamID即為SMMU用以辨別不同Stream用的編號，注意Stream和設備不是一一對應的關系。
Stream Mapping在Spec中特指將StreamID映射到Stream Context（接近domain的概念）這一操作行為。

代碼中SME應該是Stream Mapping Entry的縮寫。Spec中提及到三種Stream Mapping的方式，這里主要提及Stream Indexing和Stream Matching`兩種。

arm_smmu_add_device

這個函數即為add_device回調函數。回憶前面的分析，IOMMU核心框架向bus中注冊listener，每當bus中新增設備時，即會調用該函數。從這里看，該函數的主要功能就是將一個設備納入到IOMMU驅動的管理中。該函數的核心參數就是被傳入的struct device結構體中保存的struct iommu_fwspec。

/**
 * struct iommu_fwspec - per-device IOMMU instance data
 * @ops: ops for this device's IOMMU
 * @iommu_fwnode: firmware handle for this device's IOMMU
 * @iommu_priv: IOMMU driver private data for this device
 * @num_ids: number of associated device IDs
 * @ids: IDs which this device may present to the IOMMU
 */
struct iommu_fwspec {
	const struct iommu_ops	*ops;
	struct fwnode_handle	*iommu_fwnode;
	void			*iommu_priv;
	unsigned int		num_ids;
	u32			ids[1];
};

該參數是從ACPI或者設備樹中得到的，用以描述設備綁定的IOMMU及拓撲關系。這里需要注意該關系必須遵循硬件設計，不然很明顯是無法正常工作的。函數為設備內分配了一個arm_smmu_master_cfg結構體，如下：

struct arm_smmu_master_cfg {
	struct arm_smmu_device		*smmu;
	s16				smendx[];
};

並保存在iommu_fwspec中的iommu_priv指針中。該結構體從名字上就能看出是Master設備的配置，Master這個名詞在Spec中是指Bus Master，即可以發起總線請求的設備。函數的核心操作由arm_smmu_master_alloc_smes完成，從名字可以看出是為設備分配Stream Mapping中的表項。對於每一個與設備關聯的StreamID，都需要分配一個Stream Mapping中的表項：

		ret = arm_smmu_find_sme(smmu, sid, mask);
		if (ret < 0)
			goto out_err;

		idx = ret;
		if (smrs && smmu->s2crs[idx].count == 0) {
			smrs[idx].id = sid;
			smrs[idx].mask = mask;
			smrs[idx].valid = true;
		}
		smmu->s2crs[idx].count++;
		cfg->smendx[i] = (s16)idx;

這里的操作簡明易懂，唯一需要注意的就是表項的分配方式。首先搜索整個表中是否存在完全匹配（即集合意義上的包含）的表項，如果存在則使用該表項，否則使用表中第一個發現的空表項。到這里可以發現arm_smmu_master_cfg中的smendx即為保存該Master設備對應的Stream Mapping表項。表項申請完畢后，其實質上還是沒有寫入到SMMU的mmio空間去的，寫入的話這個表項應該就生效了。但是軟件上還是沒有准備完畢的，這個設備沒有加入任何Group或者domain。這里可以看到：

	group = iommu_group_get_for_dev(dev);

這個函數是IOMMU核心框架提供的函數，函數最終還是會調用到device_group回調函數。這里我們只需要明確這個函數確定設備屬於哪一個Group。最后，為了使表項立馬生效，將其寫入到S2CR寄存器中：

	for_each_cfg_sme(fwspec, i, idx) {
		arm_smmu_write_sme(smmu, idx);
		smmu->s2crs[idx].group = group;
	}

arm_smmu_device_group

該函數為device_group回調函數，目的是獲取一個設備的Group。函數的操作也比較簡單：

sanity check：檢查設備所有SME是否都位於同一個Group
如果設備已經有一個Group，那么返回該Group
設備沒有Group的情況下，則需為設備分配一個Group。對於PCI設備調用pci_device_group，對於其他設備則為generic_device_group

arm_smmu_domain_alloc

該函數為domain_alloc回調函數，其目的是申請一個domain。SMMU驅動只支持三種domain：

 if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_IDENTITY)
		return NULL;

剩下的就是申請內存，初始化一些數據結構了，貌似有一些看着比較關鍵的字段是空着的。后面可以看到這個時候申請的domain僅僅是占位用的，沒有什么實際意義。在調用attach_dev時，會初始化domain context。驅動通過struct arm_smmu_domain里的smmu字段判斷context是否已經初始化。

arm_smmu_attach_dev

這里的實現細節是，一個設備的綁定的SMMU與其Stream Mappings已經在add_device回調函數中確定好了。attach_dev的實際操作就是根據設備保存的這些信息初始化domain。初始化domain context由arm_smmu_init_domain_context函數完成，該函數滿滿的硬件細節，后續需要專門討論。一個domain在SMMU硬件中實際對應的概念就是Context Bank，在使用Stream Matching的情況下，一共存在三層映射：StreamID && Mask -> S2CR寄存器 -> Context Bank。因此，初始化完domain（即Context Bank）后需要設置當前設備在Stream Mapping中對應的S2CR寄存器，使其指向該domain對應的Context Bank。

先說下iommu幾個名詞
iommu_group：代表共享同一個streamid的一組device，也就是多個device可以在同個group
domain ：代表一個具體的設備使用iommu的詳細spec

Kernel has DMA mapping API fromorigin. ARM defines IOMMU which can be used to connect scattered physicalmemory as a continuous region for devices which needs continue address towork(e.g: DMA). So IOMMU implementations & CMA should work behind kernelDMA mapping API. E.g: dma_alloc_from_contiguous can be implemented by CMA;dma_alloc_coherent can be implemented by IOMMU or by the normal case(just call__get_free_pages). So for device drivers need dma buffers, we should use dmamapping APIs, not call iommu api directly
說明cma可以實現函數dma_alloc_from_contiguous，iommu可以實現dma_alloc_coherent
iommu是實現在dma mapping api下層的驅動，所以我們只需要使用dma mapping的相關api，不需要直接調用iommu接口。

IOMMU，Input-Output Memory Management Unit
網上有些關於使用iommu的好處。
但是我感覺最終要的是用於將物理上分散的內存頁映射成 cif、isp可見的連續內存，如果沒有iommu需要在kernel預留比較大的cma內存
在rk芯片，所有模塊的iommu公用一個驅動

kernel\drivers\iommu\rockchip-iommu.c
kernel\drivers\iommu\rk-iommu.h
定義iommu的結構：

struct rk_iommu_domain { struct list_head iommus; struct platform_device *pdev; u32 *dt; /* page directory table */ dma_addr_t dt_dma; struct mutex iommus_lock; /* lock for iommus list */ struct mutex dt_lock; /* lock for modifying page directory table */ struct iommu_domain domain; }; struct rk_iommu { struct device *dev; void __iomem **bases; int num_mmu; int *irq; int num_irq; bool reset_disabled; /* isp iommu reset operation would failed */ bool skip_read; /* rk3126/rk3128 can't read vop iommu registers */ struct list_head node; /* entry in rk_iommu_domain.iommus */ struct iommu_domain *domain; /* domain to which iommu is attached */ struct clk *aclk; /* aclock belong to master */ struct clk *hclk; /* hclock belong to master */ struct clk *sclk; /* sclock belong to master */ struct list_head dev_node; };

在模塊加載的時候調用

static int __init rk_iommu_init(void) { struct device_node *np; int ret; np = of_find_matching_node(NULL, rk_iommu_dt_ids); if (!np) return 0; of_node_put(np); //初始化iommu bus ret = bus_set_iommu(&platform_bus_type, &rk_iommu_ops); if (ret) return ret; ret = platform_driver_register(&rk_iommu_domain_driver); if (ret) return ret; //注冊兩個驅動 ret = platform_driver_register(&rk_iommu_driver); if (ret) platform_driver_unregister(&rk_iommu_domain_driver); return ret; }

其中

bus_set_iommu
	iommu_bus_init
		err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group); 添加到group后的回調 add_iommu_group ops->add_device

調用了.add_device = rk_iommu_add_device

兩個probe函數
1.rk_iommu_domain_probe
調用
/* Set dma_ops for dev, otherwise it would be dummy_dma_ops */
arch_setup_dma_ops(dev, 0, DMA_BIT_MASK(32), NULL, false);
這樣設置dma_ops的操作函數
common_iommu_setup_dma_ops
do_iommu_attach
arch_set_dma_ops(dev, &iommu_dma_ops);
設置dma操作函數為iommu_dma_ops，這里面有使用iommu分配內存
思想是，分配許多頁，可能連續，也可能不連續。然后申請iova，最后用iova去匹配物理page，這樣就生成了table。(在dts中沒有定義關鍵字，不走買這個過程在后面的do_iommu_attach執行)

2.rockchip_iommu_probe
里面先獲取寄存器資源，ioremap過來，獲取中斷，並申請中斷
之后調用
在rockchip-iommu.c里有
static const struct iommu_ops rk_iommu_ops = {
.domain_alloc = rk_iommu_domain_alloc,
.domain_free = rk_iommu_domain_free,
.attach_dev = rk_iommu_attach_device,
.detach_dev = rk_iommu_detach_device,
.map = rk_iommu_map,
.unmap = rk_iommu_unmap,
.map_sg = rk_iommu_map_sg,
.add_device = rk_iommu_add_device,
.remove_device = rk_iommu_remove_device,
.iova_to_phys = rk_iommu_iova_to_phys,
.pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP,
};
rk_iommu_domain_alloc是初始化rk_iommu_domain結構，返回的是該結構下的iommu_domain結構體。

結合isp驅動看iommu是如何使用的

在dev.c有

	if (is_iommu_enable(dev)) { rkisp1_iommu_init(isp_dev); } static int rkisp1_iommu_init(struct rkisp1_device *rkisp1_dev) { ……//最終會調用到domain_alloc，申請domain rkisp1_dev->domain = iommu_domain_alloc(&platform_bus_type);//1 ……//分配iova_domain結構保存在domain->iova_cookie iommu_get_dma_cookie(rkisp1_dev->domain); ……//獲取group，為了保證多個device綁定iommu不至於混亂 group = iommu_group_get(rkisp1_dev->dev); ……//isp設備綁定domain ret = iommu_attach_device(domain, dev);//2 …… //設置dma相關操作函數， iommu_dma_ops，以及地址空間， // iommu_dma_ops應該是在開啟iommu的時候，dma相關操作函數就是執行iommu的相關函數,如果沒開啟，dma應該是其他函數。也就是說dma的對外操作函數是一致的，只是執行到dma函數的時候調用其中的ops是iommu的。這樣dma就可以執行分段操作。后續操作dma的時候會調用到iommu的map和ova_to_phys函數 0x10000000:IOVA可映射地址空間的起始位置 SZ_2G:IOVA空間大小 do_iommu_attach調用domain->ops = ops;和arch_set_dma_ops(dev, &iommu_dma_ops);綁定兩組ops if (!common_iommu_setup_dma_ops(dev, 0x10000000, SZ_2G, domain->ops)) { …… }

1.static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) { ……//alloc rk_domain結構 rk_domain = devm_kzalloc(&pdev->dev, sizeof(*rk_domain), GFP_KERNEL); ……//申請內存頁來保存dt rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32); ……//dt做dma映射 rk_domain->dt_dma = dma_map_single(iommu_dev, rk_domain->dt, SPAGE_SIZE, DMA_TO_DEVICE); ……//初始化參數，和iommu的ops rk_domain->domain.geometry.aperture_start = 0; rk_domain->domain.geometry.aperture_end = DMA_BIT_MASK(32); rk_domain->domain.geometry.force_aperture = true; rk_domain->domain.ops = &rk_iommu_ops; }

iommu_attach_device 先獲取group,然后調用

__iommu_attach_group
iommu_group_do_attach_device
   	__iommu_attach_device
   		domain->ops->attach_dev(domain, dev);

這樣就跑到.attach_dev = rk_iommu_attach_device
函數里

static int rk_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { ……//獲取設備iommu iommu = rk_iommu_from_dev(dev); ……//其實是設置clk rk_iommu_power_on(iommu); ……//中間是打開stall模式和復位 iommu->domain = domain; //綁定之后 ……//申請中斷 ret = devm_request_irq(iommu->dev, iommu->irq[i], rk_iommu_irq, IRQF_SHARED, dev_name(dev), iommu); ……設置mmu的寄存器和mask中斷等 for (i = 0; i < iommu->num_mmu; i++) { rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR,rk_domain->dt_dma); rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE); rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK); } ret = rk_iommu_enable_paging(iommu); …... }

Dma map函數會調用iommu的map函數，用申請好的地址，設置iommu的映射表

static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova, phys_addr_t paddr, size_t size, int prot) { ……//為pt申請一個page page_table = rk_dte_get_page_table(rk_domain, iova); …… dte_index = rk_domain->dt[rk_iova_dte_index(iova)]; pte_index = rk_iova_pte_index(iova); pte_addr = &page_table[pte_index];//pte表的首地址 pte_dma = rk_dte_pt_address(dte_index) + pte_index * sizeof(u32); //物理地址存放到iommu的映射表中 ret = rk_iommu_map_iova(rk_domain, pte_addr, pte_dma, iova, paddr, size, prot); }

static size_t rk_iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot) { ……//應該是找到最小的size 為4k min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap); // for_each_sg(sg, s, nents, i) { phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset; //這個函數應該是可以把不同幾段物理地址映射到連續地址， //里面調用domain->ops->map，其實就是做多次map ret = iommu_map(domain, iova + mapped, phys, s->length, prot | IOMMU_INV_TLB_ENTIRE); mapped += s->length; } rk_iommu_zap_tlb(domain); }

這個結構描述的是分散的內存

struct scatterlist { #ifdef CONFIG_DEBUG_SG unsigned long sg_magic; #endif unsigned long page_link;// 指示該內存塊所在的頁面。要求page最低4字節對齊 unsigned int offset;// 指示該內存塊在頁面中的偏移 unsigned int length;// 該內存塊的長度 dma_addr_t dma_address;// 該內存塊實際的起始地址 #ifdef CONFIG_NEED_SG_DMA_LENGTH unsigned int dma_length;//相應信息長度 #endif };

看這個要求，所有的地址和長度都有4k對齊要求，在iommu_map里面有體現。
rk_iommu_iova_to_phys這個函數其實就是在操作dma里的dma_map_ops函數時候會調用。關於前面調用情況，仔細去了解dma
看看

static phys_addr_t rk_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { struct rk_iommu_domain *rk_domain = to_rk_domain(domain); phys_addr_t pt_phys, phys = 0; u32 dte, pte; u32 *page_table; mutex_lock(&rk_domain->dt_lock); dte = rk_domain->dt[rk_iova_dte_index(iova)];// //找到對應的目錄 if (!rk_dte_is_pt_valid(dte)) goto out; //(iova & RK_IOVA_PTE_MASK) >> RK_IOVA_PTE_SHIFT; pt_phys = rk_dte_pt_address(dte);// //找到頁碼表對應的物理地址 page_table = (u32 *)phys_to_virt(pt_phys);// //找到頁碼表虛擬地址 pte = page_table[rk_iova_pte_index(iova)];// 找頁也碼的虛擬地址 if (!rk_pte_is_page_valid(pte)) goto out; //頁碼的物理地址+偏移量 phys = rk_pte_page_address(pte) + rk_iova_page_offset(iova); mutex_unlock(&rk_domain->dt_lock); return phys; }

這樣看函數的意思是一個dt對應申請一個1024個pt，一組pt為1024*4k,就是4M
如果看不動注釋，得懂這個函數就要弄懂地址結構
下面簡述rk iommu存儲結構
在這里插入圖片描述
第一個寄存器MMU_DTE_ADDR存放的是DTE表的首地址，初始化驅動代碼會調用get_zeroed_page申請一個4k頁作為DTE表，DTE表有1024個單位，每個占4Byte.
然后調用dma_map_single將這個地址虛擬地址映射到總線地址（后面有總線地址介紹）。然后把這個地址放到MMU_DTE_ADDR寄存器中。其中每個DTE指向一個PTE表的首地址（注意是PTE表的物理地址）。PTE表的頁是在rk_iommu_map中申請，先將這個物理地址虛化（我認為一般是線性的吧）。這地址里面的內容指向實際的頁表的物理地址，然后再加上偏移量就是實際頁的物理地址。（內存映射都是以頁為單位的，找物理地址實際是找的頁的物理地址）。
所以這樣指地址，那么iova連續的情況，實際的物理地址可以不連續，dma搬運在有iommu的時候傳入的是iova(總線地址)。Iommu可以實現轉換。
iommu總線地址如下
在這里插入圖片描述
所以，可以看的出來mmu的地址是由dte，pte，po組成。然后申請了專門的目錄表 pte和po的內存，每個有對下一級的指向，就知道物理地址。

小結：僅個人觀點
其實iommu都是由dma去申請內存，可能連續，也可能幾段離散的，但是地址總線是連續的，申請好之后，需調用dma的map函數，map函數會調用到iommu函數里面分配頁表，這樣，dma就可以操作分段內存，因為有iommu的映射。

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 iommu分析之---DMA remap框架實現內存映射MMAP和DMA【轉】 linux下共享內存mmap和DMA(直接訪問內存)的使用【轉】 iommu分析之---intel iommu初始化 iommu-spec 要點 mmap學習 PageCache和mmap malloc 和mmap mmap概述虛擬化底層技術之——iommu