[root@localhost dpdk-19.11]# cat /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages 0 0 0 0 [root@localhost dpdk-19.11]# cat /sys/devices/system/node/node*/hugepages/hugepages-524288kB/nr_hugepages 64 64 64 64 [root@localhost dpdk-19.11]#
Breakpoint 1, main (argc=4, argv=0xfffffffff518) at /data1/dpdk-19.11/demo/memzone/main.c:45 45 mz = rte_memzone_reserve("memzone", sizeof(int)*2, (gdb) s rte_memzone_reserve (name=0xba6358 "memzone", len=8, socket_id=-1, flags=0) at /data1/dpdk-19.11/lib/librte_eal/common/eal_common_memzone.c:240 240 return rte_memzone_reserve_thread_safe(name, len, socket_id, (gdb) s rte_memzone_reserve_thread_safe (name=0xba6358 "memzone", len=8, socket_id=-1, flags=0, align=128, bound=0) at /data1/dpdk-19.11/lib/librte_eal/common/eal_common_memzone.c:192 192 const struct rte_memzone *mz = NULL; (gdb) list 187 static const struct rte_memzone * 188 rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id, 189 unsigned int flags, unsigned int align, unsigned int bound) 190 { 191 struct rte_mem_config *mcfg; 192 const struct rte_memzone *mz = NULL; 193 194 /* get pointer to global configuration */ 195 mcfg = rte_eal_get_configuration()->mem_config; 196 (gdb) n 195 mcfg = rte_eal_get_configuration()->mem_config; (gdb) p *mcfg Cannot access memory at address 0xfffffffff380 (gdb) n 197 rte_rwlock_write_lock(&mcfg->mlock); (gdb) p *mcfg $1 = {magic = 19820526, version = 319488099, nchannel = 0, nrank = 0, mlock = {cnt = 0}, qlock = {cnt = 0}, mplock = {cnt = 0}, tlock = {locked = 0}, memory_hotplug_lock = {cnt = 0}, memzones = { name = "memzone", '\000' <repeats 56 times>, count = 143, len = 2560, elt_sz = 72, data = 0x100010000, rwlock = {cnt = 0}}, memsegs = {{{base_va = 0x120000000, addr_64 = 4831838208}, page_sz = 536870912, socket_id = 0, version = 1, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-0-0", '\000' <repeats 45 times>, count = 1, len = 64, elt_sz = 48, data = 0x100040000, rwlock = {cnt = 0}}}, {{base_va = 0x940000000, addr_64 = 39728447488}, page_sz = 536870912, socket_id = 0, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-0-1", '\000' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, data = 0x920000000, rwlock = {cnt = 0}}}, {{base_va = 0x1160000000, addr_64 = 74625056768}, page_sz = 536870912, socket_id = 0, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-0-2", '\000' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, data = 0x1140000000, rwlock = {cnt = 0}}}, {{base_va = 0x1980000000, addr_64 = 109521666048}, page_sz = 536870912, socket_id = 0, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-0-3", '\000' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, data = 0x1960000000, rwlock = {cnt = 0}}}, {{base_va = 0x21a0000000, addr_64 = 144418275328}, page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-1-0", '\000' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, data = 0x2180000000, rwlock = {cnt = 0}}}, {{base_va = 0x29c0000000, addr_64 = 179314884608}, page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-1-1", '\000' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, data = 0x29a0000000, rwlock = {cnt = 0}}}, {{base_va = 0x31e0000000, addr_64 = 214211493888}, page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-1-2", '\000' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, data = 0x31c0000000, rwlock = {cnt = 0}}}, {{base_va = 0x3a00000000, addr_64 = 249108103168}, page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-1-3", '\000' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, data = 0x39e0000000, rwlock = {cnt = 0}}}, {{base_va = 0x4220000000, addr_64 = 284004712448}, page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-2-0", '\000' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, data = 0x4200000000, rwlock = {cnt = 0}}}, {{base_va = 0x4a40000000, addr_64 = 318901321728}, ---Type <return> to continue, or q <return> to quit--- page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-2-1", '\000' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, data = 0x4a20000000, rwlock = {cnt = 0}}}, {{base_va = 0x5260000000, addr_64 = 353797931008}, page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-2-2", '\000' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, data = 0x5240000000, rwlock = {cnt = 0}}}, {{base_va = 0x5a80000000, addr_64 = 388694540288}, page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-2-3", '\000' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, data = 0x5a60000000, rwlock = {cnt = 0}}}, {{base_va = 0x62a0000000, addr_64 = 423591149568}, page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-3-0", '\000' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, data = 0x6280000000, rwlock = {cnt = 0}}}, {{base_va = 0x6ac0000000, addr_64 = 458487758848}, page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-3-1", '\000' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, data = 0x6aa0000000, rwlock = {cnt = 0}}}, {{base_va = 0x72e0000000, addr_64 = 493384368128}, page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-3-2", '\000' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, data = 0x72c0000000, rwlock = {cnt = 0}}}, {{base_va = 0x7b00000000, addr_64 = 528280977408}, page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = { name = "memseg-524288k-3-3", '\000' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, data = 0x7ae0000000, rwlock = {cnt = 0}}}, {{base_va = 0x0, addr_64 = 0}, page_sz = 0, socket_id = 0, version = 0, len = 0, external = 0, heap = 0, memseg_arr = {name = '\000' <repeats 63 times>, count = 0, len = 0, elt_sz = 0, data = 0x0, rwlock = {cnt = 0}}} <repeats 48 times>}, tailq_head = {{tailq_head = { tqh_first = 0x0, tqh_last = 0x100002288}, name = "RTE_LPM", '\000' <repeats 24 times>}, {tailq_head = { tqh_first = 0x0, tqh_last = 0x1000022b8}, name = "RTE_LPM6", '\000' <repeats 23 times>}, {tailq_head = { tqh_first = 0x0, tqh_last = 0x1000022e8}, name = "RTE_ACL", '\000' <repeats 24 times>}, {tailq_head = { tqh_first = 0x13ff79c00, tqh_last = 0x13ff79c00}, name = "RTE_HASH", '\000' <repeats 23 times>}, { tailq_head = {tqh_first = 0x0, tqh_last = 0x100002348}, name = "RTE_FBK_HASH", '\000' <repeats 19 times>}, {tailq_head = {tqh_first = 0x0, tqh_last = 0x100002378}, name = "RTE_MEMBER", '\000' <repeats 21 times>}, { tailq_head = {tqh_first = 0x0, tqh_last = 0x1000023a8}, name = "RTE_MBUF_DYNFIELD", '\000' <repeats 14 times>}, {tailq_head = {tqh_first = 0x0, ---Type <return> to continue, or q <return> to quit--- tqh_last = 0x1000023d8}, name = "RTE_MBUF_DYNFLAG", '\000' <repeats 15 times>}, {tailq_head = { tqh_first = 0x0, tqh_last = 0x100002408}, name = "RTE_EVENT_RING", '\000' <repeats 17 times>}, { tailq_head = {tqh_first = 0x13febd800, tqh_last = 0x13febd800}, name = "RTE_MEMPOOL", '\000' <repeats 20 times>}, {tailq_head = {tqh_first = 0x0, tqh_last = 0x100002468}, name = "RTE_STACK", '\000' <repeats 22 times>}, {tailq_head = {tqh_first = 0x13ff9a080, tqh_last = 0x13febd500}, name = "RTE_RING", '\000' <repeats 23 times>}, {tailq_head = {tqh_first = 0x0, tqh_last = 0x1000024c8}, name = "RTE_REORDER", '\000' <repeats 20 times>}, {tailq_head = { tqh_first = 0x0, tqh_last = 0x1000024f8}, name = "RTE_KNI", '\000' <repeats 24 times>}, {tailq_head = { tqh_first = 0x13ffd4d80, tqh_last = 0x13ffd4d80}, name = "VFIO_RESOURCE_LIST", '\000' <repeats 13 times>}, {tailq_head = {tqh_first = 0x0, tqh_last = 0x100002558}, name = "UIO_RESOURCE_LIST", '\000' <repeats 14 times>}, {tailq_head = { tqh_first = 0x0, tqh_last = 0x100002588}, name = "VMBUS_RESOURCE_LIST", '\000' <repeats 12 times>}, { tailq_head = {tqh_first = 0x0, tqh_last = 0x0}, name = '\000' <repeats 31 times>} <repeats 15 times>}, malloc_heaps = {{lock = {locked = 0}, free_head = {{lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x0}, { lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x13fe81000}, {lh_first = 0x0}, {lh_first = 0x0}, { lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x120000000}, {lh_first = 0x0}}, first = 0x120000000, last = 0x13fffdf80, alloc_count = 259, socket_id = 0, total_size = 536870912, name = "socket_0", '\000' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{ lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 1, total_size = 0, name = "socket_1", '\000' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{ lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 2, total_size = 0, name = "socket_2", '\000' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{ lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 3, total_size = 0, name = "socket_3", '\000' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{ lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 0, total_size = 0, name = '\000' <repeats 31 times>} <repeats 28 times>}, next_socket_id = 256, mem_cfg_addr = 4294967296, legacy_mem = 0, single_file_segments = 0, tsc_hz = 100000000, dma_maskbits = 0 '\000'} (gdb)
(gdb) n 199 mz = memzone_reserve_aligned_thread_unsafe( (gdb) s memzone_reserve_aligned_thread_unsafe (name=0xba6358 "memzone", len=8, socket_id=-1, flags=0, align=128, bound=0) at /data1/dpdk-19.11/lib/librte_eal/common/eal_common_memzone.c:69 69 mcfg = rte_eal_get_configuration()->mem_config; (gdb) n 70 arr = &mcfg->memzones; (gdb) n 73 if (arr->count >= arr->len) { (gdb) p *arr $2 = {name = "memzone", '\000' <repeats 56 times>, count = 143, len = 2560, elt_sz = 72, data = 0x100010000, rwlock = {cnt = 0}} (gdb) n 79 if (strlen(name) > sizeof(mz->name) - 1) { (gdb) n 87 if ((memzone_lookup_thread_unsafe(name)) != NULL) { (gdb) n 95 if (align && !rte_is_power_of_2(align)) { (gdb) n 103 if (align < RTE_CACHE_LINE_SIZE) (gdb) n 107 if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) { (gdb) n 112 len = RTE_ALIGN_CEIL(len, RTE_CACHE_LINE_SIZE); (gdb) n 115 requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len); (gdb) n 118 if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) { (gdb) n 123 if ((socket_id != SOCKET_ID_ANY) && socket_id < 0) { (gdb) n 131 if (!rte_eal_has_hugepages() && socket_id < RTE_MAX_NUMA_NODES) (gdb) n 134 contig = (flags & RTE_MEMZONE_IOVA_CONTIG) != 0; (gdb) n 136 flags &= ~RTE_MEMZONE_IOVA_CONTIG; (gdb) n 138 if (len == 0 && bound == 0) { (gdb) n 144 if (len == 0) (gdb) n 147 mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id, ---------------------mz_addr从heap分配 (gdb) n 150 if (mz_addr == NULL) { (gdb) n 155 struct malloc_elem *elem = malloc_elem_from_data(mz_addr); (gdb) n 158 mz_idx = rte_fbarray_find_next_free(arr, 0); (gdb) n 160 if (mz_idx < 0) { (gdb) n 163 rte_fbarray_set_used(arr, mz_idx); (gdb) n 164 mz = rte_fbarray_get(arr, mz_idx); (gdb) p *mz $3 = {name = "\356o.\001c\000\v\023\000\000\000\000\000\000\000\000\377\377\377\377", '\000' <repeats 11 times>, {phys_addr = 0, iova = 0}, {addr = 0x656e6f7a6d656d, addr_64 = 28550397722191213}, len = 0, hugepage_sz = 0, socket_id = 0, flags = 0} (gdb) n 167 if (mz == NULL) { (gdb) n 174 strlcpy(mz->name, name, sizeof(mz->name)); (gdb) n 175 mz->iova = rte_malloc_virt2iova(mz_addr); (gdb) n 176 mz->addr = mz_addr; (gdb) n 178 elem->size - elem->pad - MALLOC_ELEM_OVERHEAD : (gdb) n 177 mz->len = requested_len == 0 ? (gdb) n 180 mz->hugepage_sz = elem->msl->page_sz; (gdb) n 181 mz->socket_id = elem->msl->socket_id; (gdb) p *elem $4 = {heap = 0x100002900, prev = 0x13fe81000, next = 0x13febc800, free_list = {le_next = 0x0, le_prev = 0x0}, msl = 0x100000088, state = ELEM_BUSY, pad = 0, size = 256, orig_elem = 0x120000000, orig_size = 536870912} (gdb) n 182 mz->flags = 0; (gdb) n 184 return mz; (gdb) p *mz $5 = {name = "memzone", '\000' <repeats 24 times>, {phys_addr = 261454808960, iova = 261454808960}, { addr = 0x13febc780, addr_64 = 5367383936}, len = 128, hugepage_sz = 536870912, socket_id = 0, flags = 0} (gdb) c
static const struct rte_memzone * memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, int socket_id, unsigned flags, unsigned align, unsigned bound) { struct rte_memzone *mz; struct rte_mem_config *mcfg; size_t requested_len; int socket, i; /* 获取全局变量rte_mem_config结构的指针 */ mcfg = rte_eal_get_configuration()->mem_config; /* no more room in config */ /*如果分配的memzone数量已经超过了最大值,则返错(数组大小是有限的)*/ if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) { RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__); rte_errno = ENOSPC; return NULL; } /*检查memzone的名字长度是否超过了限制*/ if (strlen(name) > sizeof(mz->name) - 1) { RTE_LOG(DEBUG, EAL, "%s(): memzone <%s>: name too long\n", __func__, name); rte_errno = ENAMETOOLONG; return NULL; } /* 在mcfg->memzone[]中查找是否已有同名的memzone,如果有表示已存在,返回创建出错*/ if ((memzone_lookup_thread_unsafe(name)) != NULL) { RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n", __func__, name); rte_errno = EEXIST; return NULL; } /* 检查对齐内存大小是否是2的幂大小 */ if (align && !rte_is_power_of_2(align)) { RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__, align); rte_errno = EINVAL; return NULL; } /* alignment less than cache size is not allowed */ if (align < RTE_CACHE_LINE_SIZE)/*对齐大小不能小于cache_line大小*/ align = RTE_CACHE_LINE_SIZE; /* align length on cache boundary. Check for overflow before doing so */ if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) { rte_errno = EINVAL; /* requested size too big */ return NULL; } len += RTE_CACHE_LINE_MASK; len &= ~((size_t) RTE_CACHE_LINE_MASK); /*申请内存大小进行内存对齐计算*/ /* save minimal requested length */ /*当申请的内存大小小于RTE_CACHE_LINE_SIZE时,则至少要分配RTE_CACHE_LINE_SIZE大小的内存*/ requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len); /* check that boundary condition is valid */ if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) { rte_errno = EINVAL; return NULL; } /*检查socket_id的合法性*/ if ((socket_id != SOCKET_ID_ANY) && (socket_id >= RTE_MAX_NUMA_NODES)) { rte_errno = EINVAL; return NULL; } /*如果不使用hugepage,memzone的内存分配就不会考虑socke_id,而直接设置为SOCKET_ID_ANY*/ if (!rte_eal_has_hugepages()) socket_id = SOCKET_ID_ANY; if (len == 0) { /*申请内存大小等于0的情况,则申请申请最大的连续内存空间*/ if (bound != 0) requested_len = bound; else { requested_len = find_heap_max_free_elem(&socket_id, align); if (requested_len == 0) { rte_errno = ENOMEM; return NULL; } } } /*如果socket_id为SOCKET_ID_ANY,则先在当前cpu所在的socket上分配内存*/ if (socket_id == SOCKET_ID_ANY) socket = malloc_get_numa_socket(); else socket = socket_id; /* 尝试在当前socket对应的malloc_heap上分配内存 */ void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL, requested_len, flags, align, bound); /*如果socket_id为SOCKET_ID_ANY,且在当前socket上分配失败,就尝试在其他cpu分配*/ if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) { /* try other heaps */ for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { if (socket == i) continue; mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i], NULL, requested_len, flags, align, bound); if (mz_addr != NULL) break; } } if (mz_addr == NULL) { rte_errno = ENOMEM; return NULL; } /*获取对应内存的malloc_elem结构*/ const struct malloc_elem *elem = malloc_elem_from_data(mz_addr); /* 从mcfg->memzone[]中找到一个还为使用的memzone结构 */ mz = get_next_free_memzone(); if (mz == NULL) { RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room " "in config!\n", __func__); rte_errno = ENOSPC; return NULL; } /*增加mcfg的memzone计数*/ mcfg->memzone_cnt++; snprintf(mz->name, sizeof(mz->name), "%s", name); mz->phys_addr = rte_malloc_virt2phy(mz_addr); mz->addr = mz_addr; mz->len = (requested_len == 0 ? elem->size : requested_len); mz->hugepage_sz = elem->ms->hugepage_sz;/*memzone对应的socketid和hupagesize即为对应malloc_elem的值*/ mz->socket_id = elem->ms->socket_id; mz->flags = 0; mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg; return mz; }
mz = get_next_free_memzone();
19.11版本
mz = rte_fbarray_get(arr, mz_idx);
if (len == 0 && bound == 0) { /* no size constraints were placed, so use malloc elem len */ requested_len = 0; mz_addr = malloc_heap_alloc_biggest(NULL, socket_id, flags, align, contig); } else { if (len == 0) requested_len = bound; /* allocate memory on heap */ mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id, flags, align, bound, contig); } if (mz_addr == NULL) { rte_errno = ENOMEM; return NULL; } struct malloc_elem *elem = malloc_elem_from_data(mz_addr); /* fill the zone in config */ mz_idx = rte_fbarray_find_next_free(arr, 0); if (mz_idx < 0) { mz = NULL; } else { rte_fbarray_set_used(arr, mz_idx); mz = rte_fbarray_get(arr, mz_idx); } if (mz == NULL) { RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone\n", __func__); malloc_heap_free(elem); rte_errno = ENOSPC; return NULL; } strlcpy(mz->name, name, sizeof(mz->name)); mz->iova = rte_malloc_virt2iova(mz_addr); mz->addr = mz_addr; mz->len = requested_len == 0 ? elem->size - elem->pad - MALLOC_ELEM_OVERHEAD : requested_len; mz->hugepage_sz = elem->msl->page_sz; mz->socket_id = elem->msl->socket_id; mz->flags = 0; return mz;
看一下memzone的结构体, 包含了zone的name、起始IO addr、virt addr、长度、对应的大页大小等。
/**
* A structure describing a memzone, which is a contiguous portion of
* physical memory identified by a name.
*/
struct rte_memzone { #define RTE_MEMZONE_NAMESIZE 32 /**< Maximum length of memory zone name.*/ char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */ RTE_STD_C11 union { phys_addr_t phys_addr; /**< deprecated - Start physical address. */ rte_iova_t iova; /**< Start IO address. */ }; RTE_STD_C11 union { void *addr; /**< Start virtual address. */ uint64_t addr_64; /**< Makes sure addr is always 64-bits */ }; size_t len; /**< Length of the memzone. */ uint64_t hugepage_sz; /**< The page size of underlying memory */ int32_t socket_id; /**< NUMA socket ID. */ uint32_t flags; /**< Characteristics of this memzone. */ uint32_t memseg_id; /**< Memseg it belongs. */ } __attribute__((__packed__));
接下来,我们从rte_memzone_reserve()开始看起,用户程序会调用该函数申请memzone,此时不会指定align和bound,DPDK为提高内存读写效率,到处运用了内存对齐技术,但是暴露给客户的时候不会像他底层的实现那样需要到处留意,从这段就可以大概看到DPDK的封装确实很好,只暴露有必要暴露的。
const struct rte_memzone * rte_memzone_reserve(const char *name, size_t len, int socket_id, unsigned flags) { return rte_memzone_reserve_thread_safe(name, len, socket_id, flags, RTE_CACHE_LINE_SIZE, 0); }
这里继续封装一层,上了一把锁,因此 memzone_reserve_aligned_thread_unsafe这个函数的实现将不会再考虑线程安全的问题了。
static const struct rte_memzone * rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id, unsigned flags, unsigned align, unsigned bound) { rte_rwlock_write_lock(&mcfg->mlock); mz = memzone_reserve_aligned_thread_unsafe( name, len, socket_id, flags, align, bound); rte_rwlock_write_unlock(&mcfg->mlock); return mz; }
继续分析 memzone_reserve_aligned_thread_unsafe()。首先检查memzone数量,这个最大值是用户编译DPDK前通过配置文件指定的,因此这里也可以看到,并不是DPDK绑定的所有大页内存都拿来做memzone了,还有其他的内存模块会使用到。
/* no more room in config */
if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) { RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__); rte_errno = ENOSPC; return NULL; }
检查用户申请的name是否已经存在。这个函数里面的实现很简单,在memzone数组中一个一个memzone地找过去,一个一个比较这个name是否已经存在。这里就可以看到memzone的申请确实效率很低,不适合大数量多次数地申请,只适合对申请效率要求不高的程序,或者预先规划好在程序初始化过程中一次性把需要的memzone全部申请完。
/* zone already exist */
if ((memzone_lookup_thread_unsafe(name)) != NULL) { RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n", __func__, name); rte_errno = EEXIST; return NULL; }
如果用户不指定要求alloc的memzone的内存长度,DPDK会在所有heap中找个最大的memseg\elem给用户。find_heap_max_free_elem()这个函数效率更低,要每一个heap的每一个queue的每一个elem地遍历过去,全部遍历完了之后才能知道空闲的哪个elem才是长度最大的。
requested_len = find_heap_max_free_elem(&socket_id, align);
if (requested_len == 0) { rte_errno = ENOMEM; return NULL; }
如果用户指定了len,就以用户指定为准,如果没指定(即len=0),就以找到的最大长度来申请elem。
/* allocate memory on heap */
void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL, requested_len, flags, align, bound);
如果用户没有指定socket id的话,就到其他的heap中去申请一下内存,但这样存在一个问题,会出现跨socket访问内存的问题,这个对效率影响非常大,程序性能甚至会降到30%左右,直接打了3折。
if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) { /* try other heaps */ for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { if (socket == i) continue; mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i], NULL, requested_len, flags, align, bound); if (mz_addr != NULL) break; } }
最后根据alloc到的elem和相关信息填写一下新的memzone,返回给用户。
struct malloc_elem *elem = malloc_elem_from_data(mz_addr); /* fill the zone in config */ mz = get_next_free_memzone(); mcfg->memzone_cnt++; snprintf(mz->name, sizeof(mz->name), "%s", name); mz->iova = rte_malloc_virt2iova(mz_addr); mz->addr = mz_addr; mz->len = (requested_len == 0 ? elem->size : requested_len); mz->hugepage_sz = elem->ms->hugepage_sz; mz->socket_id = elem->ms->socket_id; mz->flags = 0; mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg;
接下来看看memzone的释放流程。memset清空掉内存块后,最后调用rte_free。我们再下一篇文章再来分析这个rte_free的实现。
int rte_memzone_free(const struct rte_memzone *mz) { rte_rwlock_write_lock(&mcfg->mlock); idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone); idx = idx / sizeof(struct rte_memzone); addr = mcfg->memzone[idx].addr; if (addr == NULL) ret = -EINVAL; else if (mcfg->memzone_cnt == 0) { rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!\n", __func__); } else { memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx])); mcfg->memzone_cnt--; } rte_rwlock_write_unlock(&mcfg->mlock); rte_free(addr); return ret; }