mm_init中執行mem_init,將原通過bootmem分配器管理的低端內存 及 通過meminfo得知的高端內存釋放到伙伴系統中,最后bootmem位圖本身占用的低端內存物理頁也被釋放進伙伴系統,當然對於內核、初始頁表、pkmap頁表、struct page實例、ramdisk、percpu變量、dentry_hashtable、inode_hash_table已經被占用的區域不會被釋放(對於內核開始的一段,后面會釋放).
start_kernel() |---->page_address_init() | 考慮支持高端內存 | 業務:初始化page_address_pool鏈表; | 將page_address_maps數組元素按索 | 引降序插入page_address_pool鏈表; | 初始化page_address_htable數組 | |---->setup_arch(&command_line); | |---->setup_per_cpu_areas(); | 為per-CPU變量分配空間 | |---->build_all_zonelist() | 為系統中的zone建立后備zone的列表. | 2.6.34中的建立過程與《深入Linux內核架構》中 | p_134~p_135的圖不符(即使是UMA也不同), | 書中講述是每個zone都有自己的zonelist, | 2.6.34中對於UMA,所有zone的后備列表都在 | pglist_data->node_zonelists[0]中; | | 期間也對per-CPU變量boot_pageset做了初始化. | |---->page_alloc_init() |---->hotcpu_notifier(page_alloc_cpu_notifier, 0); | 不考慮熱插拔CPU | |---->pidhash_init() | 詳見下文. | 根據低端內存頁數和散列度,分配hash空間,並賦予pid_hash | |---->vfs_caches_init_early() |---->dcache_init_early() | dentry_hashtable空間,d_hash_shift, h_hash_mask賦值; | 同pidhash_init(); | 區別: | 散列度變化了(13 - PAGE_SHIFT); | 傳入alloc_large_system_hash的最后參數值為0; | |---->inode_init_early() | inode_hashtable空間,i_hash_shift, i_hash_mask賦值; | 同pidhash_init(); | 區別: | 散列度變化了(14 - PAGE_SHIFT); | 傳入alloc_large_system_hash的最后參數值為0; | |---->mm_init() |
void mm_init(void) |---->mem_init() | 業務:bootmem遷移至伙伴系統 | |---->
void mem_init(void) |-->max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; | max_pfn是物理內存的最大頁數量,PHYS_PFN_OFFSET是物理內存的起始 | 地址在4G空間中的頁幀號; | pfn_to_page(max_pfn + PHYS_PFN_OFFSET)是物理內存終結地址所在的頁 | 鎖對應的struct page實例虛擬地址,減去mem_map(struct page起始虛 | 擬地址),故max_mapnr是struct page實例的數量 | |-->free_unused_memmap_node(0, &meminfo) | 對於連續內存,bank之間沒有間隙,因此free_unused_memmap_node不會執行. | |-->totalram_pages += free_all_bootmem_node(pgdat); | |--->return free_all_bootmem_core(pgdat->bdata); | 1、將低端內存中未被使用的頁釋放到伙伴系統中; | 2、bootmem位圖分配器占用的頁也釋放到了伙伴系統中; | |-->for_each_nodebank(i, &meminfo, node = 0) |--{ | unsigned long start = bank_pfn_start(&meminfo.bank[i]); | unsigned long end = bank_pfn_end(&meminfo.bank[i]); | | 即:只對於高端內存使用free_area(start, end, NULL) | if(start >= max_low_pfn + PHYS_PFN_OFFSET) | totoalhigh_pates += free_area(start, end, NULL); |--} | |--totoalram_pages += totoalhigh_pages; | | |--for_each_nodebank(i, &meminfo, node) |--{ //統計已被分配的頁數(物理頁已被使用),並存入reserved_pages; //統計未被分配的頁數(物理頁未被使用),並存入free_pages; | ……………… |--} | |--num_physpages = meminfo中的各個membank下的總管理區內存大小. | |-->printk: nr_free_pages() << (PAGE_SHIFT) - 10 | 關於nr_free_pages()中涉及的值,實際上是在free_one_page函數 | 中完成的--->__mod_zone_page_state(zone, NR_FREE_PAGES, 1<< order), | 其改變了zone_vm_stat[NR_FREE_PAGES]的值.
void free_unused_memmap_node(int node, struct meminfo *mi)
|-->unsigned long bank_start, prev_bank_end = 0; | unsigned int i = 0; | |-->for_each_nodebank(i, mi, node) | 遍歷屬於該node的meminfo下的所有membank; | 對於UMA,membank分為低端內存和高端內存兩個bank | | struct membank *bank = &mi->bank[i]; | bank_start = bank_pfn_start(bank); | | if(prev_bank_end && prev_bank_end != bank_start) | free_memmap(node, prev_bank_end, bank_start) | 對於連續內存,bank之間沒有間隙,因此free_memmap不會執行. | | prev_bank_end = bank_pfn_end(bank); |-- |
void free_memmap(int node, unsigned long start_pfn,
unsigned long end_pfn) |-->struct page *start_pg = NULL, *end_pg = NULL; | unsigned long pg = 0, pgend = 0; | |-->start_pg = pfn_to_page(start_pfn - 1) + 1; | 該頁幀號所對應的struct page實例的虛擬地址 | end_pg = pfn_to_page(end_pfn); | 該頁幀號所對應的struct page實例的虛擬地址 | |-->pg = PAGN_ALIGN(__pa(start_pg); | 獲取start_pg所對應的虛擬地址,即start_pfn頁幀號所對應的struct page實例 | 的物理地址. | pgend = __pa(end_pg) & PAGE_MASK; | 獲取end_pg所對應的虛擬地址,即end_pfn頁幀號所對應的struct page實例 | 的物理地址. | |-->free_bootmem_node(&contig_page_data, pg, pgend - pg); | 將bootmem分配器中[pg,pgend]所對應的頁的bit標志位清0.
|
int free_area(unsigned long pfn, unsigned long end, char *s) |-->unsigned int pages = 0, size = (end - pfn) << (PAGESHITF - 10); | |--for(; pfn < end; pfn++) |--{ | struct page *page = pfn_to_page(pfn); | ClearPageReserved(page); | init_page_count(page); | | __free_page(page); | |--->free_pages(page, 0);
| 詳見下文 | | page++; |--} |
unsigned long free_all_bootmem_core(bootmem_data_t *bdata) |-->unsigned long start = bdata->node_min_pfn; | 存放低端內存的起始物理頁號. | unsigned long end = bdata->node_low_pfn; | 存放低端內存的結束物理頁號. | |-->while(start < end) |--{ | unsigned long *map = bdata->node_bootmem_map; | idx = start - bdata->node_min_pfn; | 獲取物理內存頁幀相對於起始物理內存頁幀號的偏移(從0記). | vec = ~map[idx/BITS_PER_LONG]; | 取構成一個字的位圖的反碼. | | if(vec == ~0UL && start + BITS_PER_LONG < end) | 如果一個字內的位圖全為0,即一個字內的頁都可釋放 | {int order = ilog2(BITS_PER_LONG); | __free_pages_bootmem(pfn_to_page(start), order); | count += BITS_PER_LONG;} | | else //該字內的位圖不全為0 | {遍歷字內的每一bit位,該bit位在字內偏移量為off. | 若bit位值為1,則 : | page = pfn_to_page(start + off); | __free_pages_bootmem(page, 0); | count++;} | | start += BITS_PER_LONG; |--} | |-->page = virt_to_page(bdata->node_bootmem_map); | 獲取位圖占用的頁的相應的struct page 實例的起始虛擬地址. | | pages= bdata->node_low_pfn - bdata->node_min_pfn; | pages = bootmem_bootmap_pages(pages); | 獲取位圖所占用的頁數 | | count += pages; | 更新釋放的總頁面數 |
|-->while(pages--) | __free_pages_bootmem(page++; 0); | 將bootmem位圖分配器所占用的頁釋放到buddy system | |-->return count; | 返回釋放給buddy system總的頁面數
void __free_pages_bootmem(struct page *page, unsigned int order) |-->if(order == 0) |--{ | __ClearPageReserved(page); | 將pgge->flags的PG_reserved清0. | set_page_count(page, 0); | 將page->_count清0. | set_page_refcounted(page); | 將page->_count置1. | __free_page(page);
| |-->__free_pages(page, 0) |--} | |--else |--{ | int loop = 0; | for(loop = 0; loop < BITS_PER_LONG; loop++) | { struct page *p = &page[loop]; | __ClearPageReReserved(p); | 將pgge->flags的PG_reserved清0. | set_page_count(p, 0); | 將page->_count清0.} | | set_page_refcounted(page); | //注意此處在循環外只將一個字內的第一個struct page的_count置為1. | __free_pages(page, order); | |--}
void __free_pages(struct page* page, unsigned int order) |-->if(put_page_testzero(page)) |--{ | //put_page_testzero(page)的意圖在於將page->_count值減去1,並 | //檢測page->_count的值是否為0,若為0,則執行該塊語句. | if(order == 0) | free_hot_cold_page(page, 0); | else | __free_pages_ok(page, order); |--}
//我們此處只看系統初始化時的情形 /* * Free a 0-order page * cold == 1 ? free a cold page : free a hot page */ void free_hot_cold_page(struct page *page, int cold) |-->struct zone *zone = page_zone(page) | 通過page->flags獲取該page所屬的zone. | |-->int migratetype = get_pageblock_migratetype(page) | 根據page所屬的pageblock獲取遷移類型, MIGRATETYPE_MOVABLE | |-->set_page_private(page, migratetype); | 初始化時,page設置為MIGRATETYPE_MOVABLE | |-->struct per_cpu_pages *pcp = NULL; | pcp = &this_cpu_ptr(zone->pageset)->pcp; | |-->if(cold) | list_add_tail(&page->lru, &pcp->lists[migratetype]); | else | list_add(&page->lru, &pcp->lists[migratetype]); | |-->pcp->count++; | | 初始化時pcp->count = 0 -- >1; pcp->high = 0; pcp->batch = 1; |-->if(pcp->count >= pcp->high) | { freepcppages_bulk(zone, pcp->batch, pcp); | pcp->count -= pcp->batch; } | |
void free_pcppages_bulk(struct zone *zone, int count, struct per_cpu_pages *pcp) |-->我們此處回避一些問題,因為本記錄以初始化為主,所以,我只下該函數在初始化 | 時的業務. | list_del(&page->lru); 從MIGRATETYPE_MOVABLE上取下. | __free_one_page(page, zone, 0, page_private(page));
void __free_pages_ok(page, order) |-->free_one_page(page_zone(page), page, order,
| get_pageblock_migratetype(page)); |-->__free_one_page(page, zone, order, migratetype);
void __free_one_page(struct page* page, struct zone *zone, unsigned int order, int migratetype) |-->unsigned int page_index = page_to_pfn(page)
| & ((1 << MAX_ORDER) - 1); | |--while(order < MAX_ORDER - 1) |--{ | unsigned long combined_idx; | struct page *buddy; | | buddy = __page_find_buddy(page, page_idx, order); | 找出可與page_idx構成的伙伴. | | 測試與page_idx相應的頁,是否在伙伴系統中 | if(!page_is_buddy(page, buddy, order)) | break; | | 如果在伙伴系統內,則執行伙伴合並,有可能連鎖合並,因此用了while循環 | list_del(&buddy->lru); | | zone->free_area[order].nr_free--; | nr_free的意義:處於同一個order下,有nr_free * (2**order)個頁 | | rmv_page_order(buddy); | | combined_idx = __find_combined_index(page_idx, order); | 因為可能發生連鎖合並,所以計算了combined_idx. |--} | | 對於初始化階段,均加入free_list[MIGRATETYPE_MOVABLE]; |-->set_page_order(page, order); | list_add(&page->lru,
| &zone->free_area[order].free_list[migratetypes]); | zone->free_area[order].nr_free++;