上篇文章已經分析了探測PCI總線的部分代碼,礙於篇幅,這里另啟一篇。重點分析下pci_scan_root_bus函數
2016-10-24
pci_scan_root_bus函數
struct pci_bus *pci_scan_root_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata, struct list_head *resources) { struct pci_host_bridge_window *window; bool found = false; struct pci_bus *b; int max; /*尋找bus的資源*/ list_for_each_entry(window, resources, list) if (window->res->flags & IORESOURCE_BUS) { found = true; break; } /*創建bus對應的結構*/ b = pci_create_root_bus(parent, bus, ops, sysdata, resources); if (!b) return NULL; if (!found) { dev_info(&b->dev, "No busn resource found for root bus, will use [bus %02x-ff]\n", bus); pci_bus_insert_busn_res(b, bus, 255); } /*遍歷子總線*/ max = pci_scan_child_bus(b); if (!found) pci_bus_update_busn_res_end(b, max); pci_bus_add_devices(b); return b; }
這里首先尋找bus總線號資源,前面在x86_pci_root_bus_resources函數中已經分配了,所以這里理論上是已經分配好了,不過還是驗證下!!內核中總是精益求精。接着調用了pci_create_root_bus函數創建了對應的bus結構,然后調用pci_scan_child_bus函數遍歷該總線下所有的子總線。最后就調用pci_bus_add_devices添加設備。總體上就是這么幾步,但是要弄清楚,還真是不小的工作量。我們一步步來:
1、pci_create_root_bus函數
1 struct pci_bus *pci_create_root_bus(struct device *parent, int bus, 2 struct pci_ops *ops, void *sysdata, struct list_head *resources) 3 { 4 int error; 5 struct pci_host_bridge *bridge; 6 struct pci_bus *b, *b2; 7 struct pci_host_bridge_window *window, *n; 8 struct resource *res; 9 resource_size_t offset; 10 char bus_addr[64]; 11 char *fmt; 12 /*創建一個pci_bus結構*/ 13 b = pci_alloc_bus(); 14 if (!b) 15 return NULL; 16 /*基本的初始化*/ 17 b->sysdata = sysdata; 18 b->ops = ops; 19 /*0號總線的總線號正是該條根總線下的總線號資源的起始號*/ 20 b->number = b->busn_res.start = bus; 21 /**/ 22 b2 = pci_find_bus(pci_domain_nr(b), bus); 23 if (b2) { 24 /* If we already got to this bus through a different bridge, ignore it */ 25 dev_dbg(&b2->dev, "bus already known\n"); 26 goto err_out; 27 } 28 29 bridge = pci_alloc_host_bridge(b); 30 if (!bridge) 31 goto err_out; 32 33 bridge->dev.parent = parent; 34 bridge->dev.release = pci_release_host_bridge_dev; 35 dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(b), bus); 36 error = pcibios_root_bridge_prepare(bridge); 37 if (error) { 38 kfree(bridge); 39 goto err_out; 40 } 41 /*橋也是作為一個設備存在*/ 42 error = device_register(&bridge->dev); 43 if (error) { 44 put_device(&bridge->dev); 45 goto err_out; 46 } 47 /*建立總線到橋的指向*/ 48 b->bridge = get_device(&bridge->dev); 49 device_enable_async_suspend(b->bridge); 50 pci_set_bus_of_node(b); 51 52 if (!parent) 53 set_dev_node(b->bridge, pcibus_to_node(b)); 54 55 b->dev.class = &pcibus_class; 56 b->dev.parent = b->bridge; 57 dev_set_name(&b->dev, "%04x:%02x", pci_domain_nr(b), bus); 58 error = device_register(&b->dev); 59 if (error) 60 goto class_dev_reg_err; 61 62 pcibios_add_bus(b); 63 64 /* Create legacy_io and legacy_mem files for this bus */ 65 pci_create_legacy_files(b); 66 67 if (parent) 68 dev_info(parent, "PCI host bridge to bus %s\n", dev_name(&b->dev)); 69 else 70 printk(KERN_INFO "PCI host bridge to bus %s\n", dev_name(&b->dev)); 71 72 /* Add initial resources to the bus */ 73 list_for_each_entry_safe(window, n, resources, list) { 74 /*從全局的資源鏈表摘下,加入到特定橋的windows鏈表中*/ 75 list_move_tail(&window->list, &bridge->windows); 76 77 res = window->res; 78 offset = window->offset; 79 /*如果資源是總線號資源*/ 80 if (res->flags & IORESOURCE_BUS) 81 pci_bus_insert_busn_res(b, bus, res->end); 82 else 83 pci_bus_add_resource(b, res, 0); 84 /*看總線地址到物理地址的偏移*/ 85 if (offset) { 86 if (resource_type(res) == IORESOURCE_IO) 87 fmt = " (bus address [%#06llx-%#06llx])"; 88 else 89 fmt = " (bus address [%#010llx-%#010llx])"; 90 snprintf(bus_addr, sizeof(bus_addr), fmt, 91 (unsigned long long) (res->start - offset), 92 (unsigned long long) (res->end - offset)); 93 } else 94 bus_addr[0] = '\0'; 95 dev_info(&b->dev, "root bus resource %pR%s\n", res, bus_addr); 96 } 97 98 down_write(&pci_bus_sem); 99 /*加入根總線鏈表*/ 100 list_add_tail(&b->node, &pci_root_buses); 101 up_write(&pci_bus_sem); 102 103 return b; 104 105 class_dev_reg_err: 106 put_device(&bridge->dev); 107 device_unregister(&bridge->dev); 108 err_out: 109 kfree(b); 110 return NULL; 111 }
該函數和之前的相比就略顯龐大了。不過也難怪,到了最后的階段一般都挺復雜。哈哈!這里調用pci_alloc_bus函數分配了一個pci_bus結構,然后做基本的初始化。注意一個就是
1 b->number = b->busn_res.start = bus;
總線號資源時預分配好的,且一個總線的總線號就是其對應總線號區間的起始號。
然后調用pci_find_bus檢測下本次總線號是否已經存在對應的總線結構,如果存在,則表明有錯誤,當然一般是不會存在的。
然后調用pci_alloc_host_bridge函數分配了一個pci_host_bridge結構作為主橋。然后在主橋和總線之間建立關系。因為橋也是一種設備,所以需要注冊。
所以一直到這里,代碼雖然繁瑣卻不難理解。
到下面需要給總線分配資源了,之前我們是初始化了資源,並沒有在總線和資源之間建立關系,需要分清楚。看下面的list_for_each_entry_safe
這里實現的功能就是把window從resources鏈表中取下,然后加入到剛才創建host-bridge的window鏈表中,這樣就算把資源分配給了主橋,回想下前面提到橋設備的窗口就可以明白了。只是這里的意思貌似只考慮了一個主橋,雖然大部分都是一個主橋。然后把資源一個個資源都和總線相關聯。這樣總線的資源是有了。
最后調用list_add_tail把總線加入到全局的根總線鏈表。
下面看第二個函數pci_scan_child_bus,總線的遞歸遍歷就是在這里做的。
1 unsigned int pci_scan_child_bus(struct pci_bus *bus) 2 { 3 unsigned int devfn, pass, max = bus->busn_res.start; 4 struct pci_dev *dev; 5 6 dev_dbg(&bus->dev, "scanning bus\n"); 7 8 /* Go find them, Rover! 遍歷一條總線上的所有子總線,一條總線有32個接口,一個接口有8個子功能,所以這里只能以8遞增*/ 9 for (devfn = 0; devfn < 0x100; devfn += 8) 10 /*在遍歷每一個接口,這里一個接口最多有八個function*/ 11 /*在這里,就把總線上的每一個設備都探測過了並加入到了bus對應的設備鏈表中,后面遍歷還要用到*/ 12 pci_scan_slot(bus, devfn); 13 14 /* Reserve buses for SR-IOV capability. 加上預留的總線號的數量*/ 15 max += pci_iov_bus_range(bus); 16 17 /* 18 * After performing arch-dependent fixup of the bus, look behind 19 * all PCI-to-PCI bridges on this bus. 20 */ 21 /*查找PCI橋*/ 22 if (!bus->is_added) { 23 dev_dbg(&bus->dev, "fixups for bus\n"); 24 pcibios_fixup_bus(bus); 25 bus->is_added = 1; 26 } 27 /*據說是需要調用兩次pci_scan_bridge,第一次配置,第二次遍歷*/ 28 for (pass=0; pass < 2; pass++) 29 list_for_each_entry(dev, &bus->devices, bus_list) { 30 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || 31 dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) 32 /*遍歷PCI橋*/ 33 max = pci_scan_bridge(bus, dev, max, pass); 34 } 35 36 /* 37 * We've scanned the bus and so we know all about what's on 38 * the other side of any bridges that may be on this bus plus 39 * any devices. 40 * 41 * Return how far we've got finding sub-buses. 42 */ 43 dev_dbg(&bus->dev, "bus scan returning with max=%02x\n", max); 44 return max; 45 }
這里做的工作也不難理解,先注意有個max變量,初始值是當前總線的總線號,表示已經探測的總線的數量,后續會用到。
一條總線上有32個插槽,而每一個插槽都可以包含八個功能即邏輯設備,所以這里以8遞進。在循環中每次調用一下pci_scan_slot函數探測下具體的插槽。
1 int pci_scan_slot(struct pci_bus *bus, int devfn) 2 { 3 unsigned fn, nr = 0; 4 struct pci_dev *dev; 5 6 if (only_one_child(bus) && (devfn > 0)) 7 return 0; /* Already scanned the entire slot */ 8 /*遍歷了第一個功能號,即fn=0*/ 9 dev = pci_scan_single_device(bus, devfn); 10 if (!dev) 11 return 0; 12 if (!dev->is_added) 13 nr++; 14 /*fn=1開始,遍歷其他的功能*/ 15 for (fn = next_fn(bus, dev, 0); fn > 0; fn = next_fn(bus, dev, fn)) { 16 dev = pci_scan_single_device(bus, devfn + fn); 17 if (dev) { 18 /**/ 19 if (!dev->is_added) 20 nr++; 21 /*如果找到第二個設備就說明這是個多功能的設備*/ 22 dev->multifunction = 1; 23 } 24 } 25 26 /* only one slot has pcie device */ 27 if (bus->self && nr) 28 pcie_aspm_init_link_state(bus->self); 29 30 return nr; 31 }
最先開始仍然是判斷,如果這里該插槽只有一個邏輯設備即不是多功能的,且devfn=0,那么就表示在尋找一個不存在的設備,直接return 0,否則就調用pci_scan_single_device函數探測該插槽各個邏輯設備。接着調動了pci_scan_single_device函數,該函數檢查下對應設備號的設備是否已經存在於總線的設備鏈表中,不存在才會往下調用pci_scan_device函數探測。
1 static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) 2 { 3 struct pci_dev *dev; 4 u32 l; 5 /*獲取設備廠商*/ 6 if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000)) 7 return NULL; 8 /*分配一個dev結構*/ 9 dev = pci_alloc_dev(bus); 10 if (!dev) 11 return NULL; 12 13 dev->devfn = devfn; 14 dev->vendor = l & 0xffff; 15 dev->device = (l >> 16) & 0xffff; 16 17 pci_set_of_node(dev); 18 /*初始化設備*/ 19 if (pci_setup_device(dev)) { 20 pci_bus_put(dev->bus); 21 kfree(dev); 22 return NULL; 23 } 24 25 return dev; 26 }
這里就要做實質性的工作了,創建了一個設備結構並設置相關的信息如設備號,廠商等,然后調用pci_setup_device函數對設備進行全面的初始化,比較重要是地址空間的映射。這里先不說這些,后面再提。最后會調用pci_device_add函數把設備注冊進系統,主要還是在設備和總線之間建立聯系。回到pci_scan_child_bus函數中,經過這一步就把當前總線上的各個邏輯設備遍歷了一遍,也就是都凡是存在的邏輯設備都有了對應的結構,且都存在於總線的設備鏈表中。然后開始組個檢測這些設備,其目的在於尋找PCI-PCI 橋的存在也即1型設備。這里如果找到一個橋設備就會調用pci_scan_bridge函數遍歷橋設備:
1 int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass) 2 { 3 struct pci_bus *child; 4 int is_cardbus = (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS); 5 u32 buses, i, j = 0; 6 u16 bctl; 7 u8 primary, secondary, subordinate; 8 int broken = 0; 9 /*這里是先讀設備配置空間的總線號*/ 10 pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses); 11 primary = buses & 0xFF;//父總線號 12 secondary = (buses >> 8) & 0xFF;//子總線號 13 subordinate = (buses >> 16) & 0xFF;//橋下最大的總線號 14 15 dev_dbg(&dev->dev, "scanning [bus %02x-%02x] behind bridge, pass %d\n", 16 secondary, subordinate, pass); 17 /*!primary為真兩種情況,1為空 2為0(代表根總線),加上后面的&&才表示為空*/ 18 if (!primary && (primary != bus->number) && secondary && subordinate) { 19 /*Primary bus硬件實現為0,當是root總線時,正好總線號也是0就不需要修改,而其他子總線就需要重新設置*/ 20 dev_warn(&dev->dev, "Primary bus is hard wired to 0\n"); 21 /*手動設置*/ 22 primary = bus->number; 23 } 24 25 /* Check if setup is sensible at all 監測配置是否合法*/ 26 if (!pass && 27 (primary != bus->number || secondary <= bus->number || 28 secondary > subordinate)) { 29 dev_info(&dev->dev, "bridge configuration invalid ([bus %02x-%02x]), reconfiguring\n", 30 secondary, subordinate); 31 broken = 1; 32 } 33 34 /* Disable MasterAbortMode during probing to avoid reporting 35 of bus errors (in some architectures) */ 36 pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &bctl); 37 pci_write_config_word(dev, PCI_BRIDGE_CONTROL, 38 bctl & ~PCI_BRIDGE_CTL_MASTER_ABORT); 39 40 if ((secondary || subordinate) && !pcibios_assign_all_busses() && 41 !is_cardbus && !broken) { 42 unsigned int cmax; 43 /* 44 * Bus already configured by firmware, process it in the first 45 * pass and just note the configuration. 46 */ 47 if (pass) 48 goto out; 49 50 /* 51 * If we already got to this bus through a different bridge, 52 * don't re-add it. This can happen with the i450NX chipset. 53 * 54 * However, we continue to descend down the hierarchy and 55 * scan remaining child buses. 56 */ 57 /*得到子總線結構*/ 58 child = pci_find_bus(pci_domain_nr(bus), secondary); 59 if (!child) { 60 child = pci_add_new_bus(bus, dev, secondary); 61 if (!child) 62 goto out; 63 /*設置子總線的primary指針*/ 64 child->primary = primary; 65 /*給子總線也分配總線號資源*/ 66 pci_bus_insert_busn_res(child, secondary, subordinate); 67 child->bridge_ctl = bctl; 68 } 69 /*遞歸遍歷子總線*/ 70 cmax = pci_scan_child_bus(child); 71 if (cmax > max) 72 max = cmax; 73 if (child->busn_res.end > max) 74 max = child->busn_res.end; 75 } else { 76 /* 77 * We need to assign a number to this bus which we always 78 * do in the second pass. 79 */ 80 if (!pass) { 81 if (pcibios_assign_all_busses() || broken) 82 /* Temporarily disable forwarding of the 83 configuration cycles on all bridges in 84 this bus segment to avoid possible 85 conflicts in the second pass between two 86 bridges programmed with overlapping 87 bus ranges. */ 88 pci_write_config_dword(dev, PCI_PRIMARY_BUS, 89 buses & ~0xffffff); 90 goto out; 91 } 92 93 /* Clear errors */ 94 pci_write_config_word(dev, PCI_STATUS, 0xffff); 95 96 /* Prevent assigning a bus number that already exists. 97 * This can happen when a bridge is hot-plugged, so in 98 * this case we only re-scan this bus. */ 99 child = pci_find_bus(pci_domain_nr(bus), max+1); 100 if (!child) { 101 child = pci_add_new_bus(bus, dev, ++max); 102 if (!child) 103 goto out; 104 pci_bus_insert_busn_res(child, max, 0xff); 105 } 106 buses = (buses & 0xff000000) 107 | ((unsigned int)(child->primary) << 0) 108 | ((unsigned int)(child->busn_res.start) << 8) 109 | ((unsigned int)(child->busn_res.end) << 16); 110 111 /* 112 * yenta.c forces a secondary latency timer of 176. 113 * Copy that behaviour here. 114 */ 115 if (is_cardbus) { 116 buses &= ~0xff000000; 117 buses |= CARDBUS_LATENCY_TIMER << 24; 118 } 119 120 /* 121 * We need to blast all three values with a single write. 122 */ 123 pci_write_config_dword(dev, PCI_PRIMARY_BUS, buses); 124 125 if (!is_cardbus) { 126 child->bridge_ctl = bctl; 127 /* 128 * Adjust subordinate busnr in parent buses. 129 * We do this before scanning for children because 130 * some devices may not be detected if the bios 131 * was lazy. 132 */ 133 /*修正父總線的總線號資源范圍*/ 134 pci_fixup_parent_subordinate_busnr(child, max); 135 /* Now we can scan all subordinate buses... */ 136 max = pci_scan_child_bus(child); 137 /* 138 * now fix it up again since we have found 139 * the real value of max. 140 */ 141 pci_fixup_parent_subordinate_busnr(child, max); 142 } else { 143 /* 144 * For CardBus bridges, we leave 4 bus numbers 145 * as cards with a PCI-to-PCI bridge can be 146 * inserted later. 147 */ 148 for (i=0; i<CARDBUS_RESERVE_BUSNR; i++) { 149 struct pci_bus *parent = bus; 150 if (pci_find_bus(pci_domain_nr(bus), 151 max+i+1)) 152 break; 153 while (parent->parent) { 154 if ((!pcibios_assign_all_busses()) && 155 (parent->busn_res.end > max) && 156 (parent->busn_res.end <= max+i)) { 157 j = 1; 158 } 159 parent = parent->parent; 160 } 161 if (j) { 162 /* 163 * Often, there are two cardbus bridges 164 * -- try to leave one valid bus number 165 * for each one. 166 */ 167 i /= 2; 168 break; 169 } 170 } 171 max += i; 172 pci_fixup_parent_subordinate_busnr(child, max); 173 } 174 /* 175 * Set the subordinate bus number to its real value. 176 */ 177 pci_bus_update_busn_res_end(child, max); 178 pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, max); 179 } 180 181 sprintf(child->name, 182 (is_cardbus ? "PCI CardBus %04x:%02x" : "PCI Bus %04x:%02x"), 183 pci_domain_nr(bus), child->number); 184 185 /* Has only triggered on CardBus, fixup is in yenta_socket */ 186 while (bus->parent) { 187 if ((child->busn_res.end > bus->busn_res.end) || 188 (child->number > bus->busn_res.end) || 189 (child->number < bus->number) || 190 (child->busn_res.end < bus->number)) { 191 dev_info(&child->dev, "%pR %s " 192 "hidden behind%s bridge %s %pR\n", 193 &child->busn_res, 194 (bus->number > child->busn_res.end && 195 bus->busn_res.end < child->number) ? 196 "wholly" : "partially", 197 bus->self->transparent ? " transparent" : "", 198 dev_name(&bus->dev), 199 &bus->busn_res); 200 } 201 bus = bus->parent; 202 } 203 204 out: 205 pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bctl); 206 207 return max; 208 }
該函數通過遞歸的方式完成了所有總線以及設備的遍歷。每一遞歸都執行兩次該函數,第一次探測是否被BIOS處理,第二次才做真正的探測工作。
首先是先讀取橋設備的配置空間,獲得橋設備的primary bus,secondary bus,subordinate bus號,然后進行判斷,如果secondary bus和subordinate bus均不為0則說明配置有效,因為初始primary bus號被硬件初始化為0,所以這里如果傳遞進來的bus number不是0,就需要重新設置。
然后檢查這些號碼是否合法。合法情況下就在首次執行pci_scan_bridge函數的時候進行子總線的遍歷。可以看到這里同樣先是調用pci_find_bus函數查找下secondary號總線是否已經存在,不存在才調用pci_add_new_bus函數new一個新的bus結構,同時在該函數中也對總線的部分變量做了初始化。接着設置總線的primary指針。隨后需要給總線分配總線號資源了。根據已有的配置,這里secondary是子總線的號,而subordinate就是總線下最大的總線號,所以這正是總線的總線號區間。然后繼續調用pci_scan_child_bus函數繼續遍歷當前子總線。就這么層一層的遞歸下去。知道最后沒有橋了,就從pci_scan_child_bus函數返回探測到的總線的數量即max.而如果配置空間沒有被配置,那么就需要重新配置,這里首次執行pci_scan_bridge函數就只是把配置空間總線號區域清零。到了第二次,大題上根前面類似,不過這里因為沒有secondary 號,所以只能按照max+1來尋找或者創建子總線結構,同時對於子總線的總線區間設置成0xff即255最大值。然后寫入到橋配置空間中。這個時候已經探測了一個新的總線,那么需要對父總線的總線號區間進行更新,然后執行pci_scan_child_bus函數探測當前子總線的其他總線,在遞歸返回的時候,需要再次執行更新。並且需要把總線的總線號資源設置成正確的區間。因為開始分配的時候設置默認總線號區間最大為255.
整個遞歸流程完畢,就知道了一共存在多少總線,且總線上的設備都已經正確配置並都已經加入到了設備鏈表中。
總結:
本次分析可謂是困難重重,對於很多大牛來說,這或許根本不是事,但是筆者平時的研究沒喲涉及到PCI設備這一層面,僅僅是為了分析qemu中的virtIO才着手分析PCI設備。其中可能不乏錯誤之處,還望老師們看到多多指正。筆者也正是發現只記錄不分享,久而久之就越發懶散,好的東西信手沾來雖然容易,然是后續基本不會再看。而寫下來給別人分享就不同了,因為擔心寫錯,好多模糊的地方自己需要再三斟酌,同時也是對自己基礎的強化,利人利己!