FMallocBinned2是虛幻引擎實現的第二代裝箱內存分配器,其重要的配置參數及成員變量如下:
#define BINNED2_MAX_CACHED_OS_FREES (64) #if PLATFORM_64BITS #define BINNED2_MAX_CACHED_OS_FREES_BYTE_LIMIT (64*1024*1024) // 64MB #else #define BINNED2_MAX_CACHED_OS_FREES_BYTE_LIMIT (16*1024*1024) #endif #define BINNED2_LARGE_ALLOC 65536 // Alignment of OS-allocated pointer - pool-allocated pointers will have a non-aligned pointer #define BINNED2_MINIMUM_ALIGNMENT_SHIFT 4 // Alignment of blocks, expressed as a shift #define BINNED2_MINIMUM_ALIGNMENT 16 // Alignment of blocks #define BINNED2_MAX_SMALL_POOL_SIZE (32768-16) // Maximum block size in GMallocBinned2SmallBlockSizes #define BINNED2_SMALL_POOL_COUNT 45 #define DEFAULT_GMallocBinned2PerThreadCaches 1 #define DEFAULT_GMallocBinned2LockFreeCaches 0 #define DEFAULT_GMallocBinned2BundleCount 64 #define DEFAULT_GMallocBinned2AllocExtra 32 #define BINNED2_MAX_GMallocBinned2MaxBundlesBeforeRecycle 8 #if !defined(AGGRESSIVE_MEMORY_SAVING) #error "AGGRESSIVE_MEMORY_SAVING must be defined" #endif #if AGGRESSIVE_MEMORY_SAVING #define DEFAULT_GMallocBinned2BundleSize 8192 #else #define DEFAULT_GMallocBinned2BundleSize BINNED2_LARGE_ALLOC // 64KB #endif #define BINNED2_ALLOW_RUNTIME_TWEAKING 0 #if BINNED2_ALLOW_RUNTIME_TWEAKING extern CORE_API int32 GMallocBinned2PerThreadCaches; extern CORE_API int32 GMallocBinned2BundleSize = DEFAULT_GMallocBinned2BundleSize; extern CORE_API int32 GMallocBinned2BundleCount = DEFAULT_GMallocBinned2BundleCount; extern CORE_API int32 GMallocBinned2MaxBundlesBeforeRecycle = BINNED2_MAX_GMallocBinned2MaxBundlesBeforeRecycle; extern CORE_API int32 GMallocBinned2AllocExtra = DEFAULT_GMallocBinned2AllocExtra; #else #define GMallocBinned2PerThreadCaches DEFAULT_GMallocBinned2PerThreadCaches // 1 #define GMallocBinned2BundleSize DEFAULT_GMallocBinned2BundleSize // 64KB #define GMallocBinned2BundleCount DEFAULT_GMallocBinned2BundleCount // 64 #define GMallocBinned2MaxBundlesBeforeRecycle BINNED2_MAX_GMallocBinned2MaxBundlesBeforeRecycle // 8 #define GMallocBinned2AllocExtra DEFAULT_GMallocBinned2AllocExtra // 32 #endif // ... ... // Block sizes are based around getting the maximum amount of allocations per pool, with as little alignment waste as possible. // Block sizes should be close to even divisors of the system page size, and well distributed. // They must be 16-byte aligned as well. static uint16 SmallBlockSizes[] = // 共45個檔位的Block Size 當請求分配一個特定大小的內存塊時,將尋找到最接近的一檔 { 16, 32, 48, 64, 80, 96, 112, 128, 160, 192, 224, 256, 288, 320, 384, 448, 512, 576, 640, 704, 768, 896, 1024 - 16, 1168, 1360, 1632, 2048 - 16, 2336, 2720, 3264, 4096 - 16, 4368, 4672, 5040, 5456, 5952, 6544 - 16, 7280, 8192 - 16, 9360, 10912, 13104, 16384 - 16, 21840, 32768 - 16 };// FFreeBlock用來描述一塊Block,其本身也處於一個Block中,位於Block頭部,要占一定空間sizeof(FFreeBlock)為16,最高一檔是32768-16,這樣64K空間中能存下兩個最高檔。 // ... ... class CORE_API FMallocBinned2 final : public FMalloc { private: // ... ... FPtrToPoolMapping PtrToPoolMapping; // 內存池哈希桶的相關參數 // Pool tables for different pool sizes FPoolTable SmallPoolTables[BINNED2_SMALL_POOL_COUNT]; // 所有的內存池表列表, 單個內存池的Block尺寸是一樣的 宏BINNED2_SMALL_POOL_COUNT為45 PoolHashBucket* HashBuckets; // Key命中時使用的內存池哈希桶 PoolHashBucket* HashBucketFreeList; // Key未命中時使用的內存池哈希桶 uint64 NumPoolsPerPage; // 一個Page中內存池的個數 65536除以sizeof(FPoolInfo) = 65536/32 = 2048 // ... ... FCriticalSection Mutex; // 用於FScopeLock的臨界段對象,實現對臨界段的互斥訪問 // ... ... public: // ... ... static uint16 SmallBlockSizesReversed[BINNED2_SMALL_POOL_COUNT]; // this is reversed to get the smallest elements on our main cache line // 為SmallBlockSizes數組的反向數組 static FMallocBinned2* MallocBinned2; // 當前Binned2內存分配器實例 static uint32 Binned2TlsSlot; // 是否創建MallocBinned2的TLS Slot(為0表示未創建),所有線程共享同一個Binned2 TLS Slot static uint32 PageSize; // Constants.BinnedPageSize 為64KB static uint32 OsAllocationGranularity; // Constants.BinnedAllocationGranularity 為4096 // Mapping of sizes to small table indices 數組的個數為2048,存放的數據為:0,0,1,2,3,4,5,6,7,8,8, ... ,44,44,44,用於幫助計算當前內存Size在FPoolTable SmallPoolTables中的索引
// Size到BlockSize的PoolTable索引映射表。UE4初始化階段會預生成一個需要申請內存大小到PoolTable數組的索引的映射表,這樣當Malloc時,根據Malloc需要的大小直接找到對應的PoolTable static uint8 MemSizeToIndex[1 + (BINNED2_MAX_SMALL_POOL_SIZE >> BINNED2_MINIMUM_ALIGNMENT_SHIFT)]; // ... ... };
FMallocBinned2內存分配器的初始化 // 在其構造函數中
根據所在平台硬件和操作系統,來設置內存分配器的相關參數
FMallocBinned2::FMallocBinned2() : HashBucketFreeList(nullptr) { static bool bOnce = false; check(!bOnce); // this is now a singleton-like thing and you cannot make multiple copies bOnce = true; // 初始化SmallBlockSizesReversed數組,為SmallBlockSizes數組的反向數組 for (uint32 Index = 0; Index != BINNED2_SMALL_POOL_COUNT; ++Index) { uint32 Partner = BINNED2_SMALL_POOL_COUNT - Index - 1; SmallBlockSizesReversed[Index] = SmallBlockSizes[Partner]; } // 根據所在平台硬件和操作系統,來設置內存分配器的相關參數 FGenericPlatformMemoryConstants Constants = FPlatformMemory::GetConstants(); PageSize = Constants.BinnedPageSize; OsAllocationGranularity = Constants.BinnedAllocationGranularity ? Constants.BinnedAllocationGranularity : PageSize; NumPoolsPerPage = PageSize / sizeof(FPoolInfo); PtrToPoolMapping.Init(PageSize, NumPoolsPerPage, Constants.AddressLimit); // 初始化內存池哈希桶的相關參數 // 一些check checkf(FMath::IsPowerOfTwo(PageSize), TEXT("OS page size must be a power of two")); checkf(FMath::IsPowerOfTwo(Constants.AddressLimit), TEXT("OS address limit must be a power of two")); checkf(Constants.AddressLimit > PageSize, TEXT("OS address limit must be greater than the page size")); // Check to catch 32 bit overflow in AddressLimit checkf(SmallBlockSizes[BINNED2_SMALL_POOL_COUNT - 1] == BINNED2_MAX_SMALL_POOL_SIZE, TEXT("BINNED2_MAX_SMALL_POOL_SIZE must equal the smallest block size")); checkf(PageSize % BINNED2_LARGE_ALLOC == 0, TEXT("OS page size must be a multiple of BINNED2_LARGE_ALLOC")); checkf(sizeof(FMallocBinned2::FFreeBlock) <= SmallBlockSizes[0], TEXT("Pool header must be able to fit into the smallest block")); static_assert(UE_ARRAY_COUNT(SmallBlockSizes) == BINNED2_SMALL_POOL_COUNT, "Small block size array size must match BINNED2_SMALL_POOL_COUNT"); static_assert(UE_ARRAY_COUNT(SmallBlockSizes) <= 256, "Small block size array size must fit in a byte"); static_assert(sizeof(FFreeBlock) <= BINNED2_MINIMUM_ALIGNMENT, "Free block struct must be small enough to fit into a block."); // Init pool tables. 填充SmallBlockSizes數組中BlockSize,共45檔 for (uint32 Index = 0; Index != BINNED2_SMALL_POOL_COUNT; ++Index) { checkf(Index == 0 || SmallBlockSizes[Index - 1] < SmallBlockSizes[Index], TEXT("Small block sizes must be strictly increasing")); checkf(SmallBlockSizes[Index] <= PageSize, TEXT("Small block size must be small enough to fit into a page")); checkf(SmallBlockSizes[Index] % BINNED2_MINIMUM_ALIGNMENT == 0, TEXT("Small block size must be a multiple of BINNED2_MINIMUM_ALIGNMENT")); SmallPoolTables[Index].BlockSize = SmallBlockSizes[Index]; } // Set up pool mappings 數組的個數為2048,存放的數據為:0,0,1,2,3,4,5,6,7,8,8, ... ,44,44,44,用於幫助計算當前內存Size在FPoolTable SmallPoolTables中的索引 uint8* IndexEntry = MemSizeToIndex; uint32 PoolIndex = 0; for (uint32 Index = 0; Index != 1 + (BINNED2_MAX_SMALL_POOL_SIZE >> BINNED2_MINIMUM_ALIGNMENT_SHIFT); ++Index) { uint32 BlockSize = Index << BINNED2_MINIMUM_ALIGNMENT_SHIFT; // inverse of int32 Index = int32((Size >> BINNED2_MINIMUM_ALIGNMENT_SHIFT)); while (SmallBlockSizes[PoolIndex] < BlockSize) { ++PoolIndex; check(PoolIndex != BINNED2_SMALL_POOL_COUNT); } check(PoolIndex < 256); *IndexEntry++ = uint8(PoolIndex); } // now reverse the pool sizes for cache coherency // 再次初始化SmallBlockSizesReversed數組,為SmallBlockSizes數組的反向數組 for (uint32 Index = 0; Index != BINNED2_SMALL_POOL_COUNT; ++Index) { uint32 Partner = BINNED2_SMALL_POOL_COUNT - Index - 1; SmallBlockSizesReversed[Index] = SmallBlockSizes[Partner]; } uint64 MaxHashBuckets = PtrToPoolMapping.GetMaxHashBuckets(); { LLM_PLATFORM_SCOPE(ELLMTag::FMalloc); // 為Key命中時使用的內存池哈希桶分配內存 HashBuckets = (PoolHashBucket*)FPlatformMemory::BinnedAllocFromOS(Align(MaxHashBuckets * sizeof(PoolHashBucket), OsAllocationGranularity)); #if BINNED2_ALLOCATOR_STATS Binned2HashMemory += Align(MaxHashBuckets * sizeof(PoolHashBucket), OsAllocationGranularity); #endif } DefaultConstructItems<PoolHashBucket>(HashBuckets, MaxHashBuckets); // 缺省構造與初始化HashBuckets MallocBinned2 = this; GFixedMallocLocationPtr = (FMalloc**)(&MallocBinned2); }
具體數值如下:
FPoolTable // 同一Block大小內存池表
/** 內存池表 sizeof(FPoolTable)為24*/ struct FPoolTable { FPoolList ActivePools; // 指向有空閑Block的內存池鏈表 FPoolList ExhaustedPools; // 指向已滿(沒有可分配的內存)的內存池鏈表 uint32 BlockSize; // 當前PoolTable中所有內存池的Block大小 // ... ... };
FPoolList // 內存池鏈表
// 內存池鏈表 sizeof(FPoolList)為8 struct FPoolList { // ... ... private: FPoolInfo* Front; };
FPoolInfo // 內存池
FPoolInfo中的所有Block為空閑時,才釋放其占用的內存頁
// 內存池 sizeof(FPoolInfo)為32 struct FMallocBinned2::FPoolInfo { // ... ... // 已分配的Block的個數 當為0時,將釋放整個內存池及其FirstMem指向的內存塊 public: uint16 Taken; // Number of allocated elements in this pool, when counts down to zero can free the entire pool public: ECanary Canary; // See ECanary // 已分配的字節數 private: uint32 AllocSize; // Number of bytes allocated // 如果是Bin模式,指向內存池可用的內存塊Block鏈表; 如果非Bin模式, 指向由操作系統直接分配的內存塊. public: FFreeBlock* FirstFreeBlock; // Pointer to first free memory in this pool or the OS Allocation Size in bytes if this allocation is not binned // 指向下一個內存池 public: FPoolInfo* Next; // Pointer to next pool public: FPoolInfo** PtrToPrevNext; // Pointer to whichever pointer points to this pool // ... ... };
FFreeBlock // 內存塊
// 內存塊 sizeof(FFreeBlock)為16 struct FFreeBlock { // ... ... uint16 BlockSize; // Size of the blocks that this list points to // 所在Pool的BlockSize uint8 PoolIndex; // Index of this pool // 所在Pool的Index uint8 Canary; // Constant value of 0xe3 // 固定常量 用於判斷內存是否越界寫 以此判斷這塊Block數據是否損壞 uint32 NumFreeBlocks; // Number of consecutive free blocks here, at least 1. // 空閑Block個數 void* NextFreeBlock; // Next free block in another pool // 釋放1個Block時,會構建該Block的FFreeMem,並插入到Pool->FirstMem鏈表的頭部 };
PoolHashBucket // 內存池哈希桶
// 內存池哈希桶的相關參數 sizeof(FPtrToPoolMapping)為32 struct FPtrToPoolMapping { // ... ... private: /** Shift to apply to a pointer to get the reference from the indirect tables */ uint64 PtrToPoolPageBitShift; /** Shift required to get required hash table key. */ uint64 HashKeyShift; /** Used to mask off the bits that have been used to lookup the indirect table */ uint64 PoolMask; // PageSize dependent constants uint64 MaxHashBuckets; }; /** 內存池哈希桶,用於存放由內存地址哈希出來的鍵對應的內存池鏈表 sizeof(PoolHashBucket)為32 */ struct FMallocBinned2::PoolHashBucket { UPTRINT BucketIndex; // 哈希鍵 Key=Ptr >> Allocator.HashKeyShift 內存地址右移27個bit位 FPoolInfo* FirstPool; // 指向內存池內存塊(大小為64KB:成員變量PageSize的值)的起始處 PoolHashBucket* Prev; // 上一個內存池哈希桶 PoolHashBucket* Next; // 下一個內存池哈希桶 // ... ... };
從內存池Pool中分配內存給Block
struct FMallocBinned2::FPoolInfo { // ... ... void* AllocateRegularBlock() { check(HasFreeRegularBlock()); // 檢查FPoolInfo中是否有空閑Block ++Taken; // 已分配的Block數+1 void* Result = FirstFreeBlock->AllocateRegularBlock(); // 分配Block ExhaustPoolIfNecessary(); // 如果當前FPoolInfo無空閑Block,則將其移動到FPoolList ExhaustedPools鏈表中 return Result; } // ... ... }; struct FFreeBlock { // ... ... FORCEINLINE void* AllocateRegularBlock() { --NumFreeBlocks; // 空閑Block個數減1 if (IsAligned(this, BINNED2_LARGE_ALLOC)) // BINNED2_LARGE_ALLOC為64KB FFreeBlock起始處是否對齊到64KB { return (uint8*)this + BINNED2_LARGE_ALLOC - (NumFreeBlocks + 1) * BlockSize; // +64KB后,從前往后分配Block } return (uint8*)this + (NumFreeBlocks)* BlockSize; // 從后往前分配Block } // ... ... };
TLS Cache機制
與FMallocBinned內存分配器相比,FMallocBinned2最大的改進:
引入了TLS(Thread Local Storage,線程局部存儲。線程可以有自己的存儲空間,以鍵值對形式存儲一些自己獨有的變量)緩存,來優化內存的分配速度
各線程會記錄被free的地址,把它們保存到一個列表中,當這個線程再有malloc請求來時,如果BlockSize匹配,則直接返回之前緩存的free地址
這樣就不需要再訪問FPoolTable SmallPoolTables[BINNED2_SMALL_POOL_COUNT]了,因此也不用再加互斥鎖了
各個線程在啟動時,通過調用FMemory::SetupTLSCachesOnCurrentThread()創建自己的TLS數據FPerThreadFreeBlockLists
各個線程在創建FPerThreadFreeBlockLists后,都會把它添加到Binned2的RegisteredFreeBlockLists數組中記錄。代碼如下:
void FMallocBinned2::SetupTLSCachesOnCurrentThread() { if (!BINNED2_ALLOW_RUNTIME_TWEAKING && !GMallocBinned2PerThreadCaches) { return; } if (!FMallocBinned2::Binned2TlsSlot) { FMallocBinned2::Binned2TlsSlot = FPlatformTLS::AllocTlsSlot(); // 只會執行一次,TLS Slot全局唯一 } check(FMallocBinned2::Binned2TlsSlot); FPerThreadFreeBlockLists::SetTLS(); // 各線程創建自己的TLS } void FMallocBinned2::FPerThreadFreeBlockLists::SetTLS() { check(FMallocBinned2::Binned2TlsSlot); FPerThreadFreeBlockLists* ThreadSingleton = (FPerThreadFreeBlockLists*)FPlatformTLS::GetTlsValue(FMallocBinned2::Binned2TlsSlot); if (!ThreadSingleton) { LLM_PLATFORM_SCOPE(ELLMTag::FMalloc); ThreadSingleton = new (FPlatformMemory::BinnedAllocFromOS(Align(sizeof(FPerThreadFreeBlockLists), FMallocBinned2::OsAllocationGranularity))) FPerThreadFreeBlockLists(); #if BINNED2_ALLOCATOR_STATS Binned2TLSMemory += Align(sizeof(FPerThreadFreeBlockLists), FMallocBinned2::OsAllocationGranularity); #endif FPlatformTLS::SetTlsValue(FMallocBinned2::Binned2TlsSlot, ThreadSingleton); FMallocBinned2::Private::RegisterThreadFreeBlockLists(ThreadSingleton); } } static TArray<FPerThreadFreeBlockLists*>& GetRegisteredFreeBlockLists() { static TArray<FPerThreadFreeBlockLists*> RegisteredFreeBlockLists; return RegisteredFreeBlockLists; } static void RegisterThreadFreeBlockLists( FPerThreadFreeBlockLists* FreeBlockLists ) { FScopeLock Lock(&GetFreeBlockListsRegistrationMutex()); #if BINNED2_ALLOCATOR_STATS_VALIDATION ++RecursionCounter; #endif GetRegisteredFreeBlockLists().Add(FreeBlockLists); #if BINNED2_ALLOCATOR_STATS_VALIDATION --RecursionCounter; #endif }
各線程調用FMemory::SetupTLSCachesOnCurrentThread()的情況:
每個線程都會有一份FPerThreadFreeBlockLists副本,其中FreeLists[]數組也通過BlockSize產生,有45個元素
每個元素類型為FFreeBlockList,包含FBundle PartialBundle鏈表(未裝滿的Bundle)和FBundle FullBundle鏈表(已裝滿的Bundle)
為了讓Binned2不過多占用內存空間,給每個FBundle限定了元素數量(不能超過64)及總容量(其中FBundleNode的Count*BlockSize不能大於64KB)
struct FPerThreadFreeBlockLists { // ... ... private: FFreeBlockList FreeLists[BINNED2_SMALL_POOL_COUNT]; // 宏BINNED2_SMALL_POOL_COUNT為45 對應各個檔位的Block }; // sizeof(FFreeBlockList)為32 struct FFreeBlockList { // ... ... private: FBundle PartialBundle; FBundle FullBundle; }; // FBundleNode鏈表 sizeof(FBundle)為16 struct FBundle { // ... ... FBundleNode* Head; // 鏈表頭指針 uint32 Count; }; // sizeof(FBundleNode)為16 struct FBundleNode { FBundleNode* NextNodeInCurrentBundle; union { FBundleNode* NextBundle; int32 Count; }; };
注:FBundleNode*指向的是Block內存塊區域,把Ptr指針轉行成FBundleNode*后,其size為16字節,對其修改不會影響到其他內存空間
FGlobalRecycler // 用於緩存FBundle FullBundle鏈表
struct FGlobalRecycler // sizeof(FGlobalRecycler)為64*45 = 2880 { // ... ... private: struct FPaddedBundlePointer // sizeof(FPaddedBundlePointer)為8*8 = 64 { FBundleNode* FreeBundles[BINNED2_MAX_GMallocBinned2MaxBundlesBeforeRecycle]; // 宏BINNED2_MAX_GMallocBinned2MaxBundlesBeforeRecycle為8 // ... ... }; // ... ... MS_ALIGN(PLATFORM_CACHE_LINE_SIZE) FPaddedBundlePointer Bundles[BINNED2_SMALL_POOL_COUNT] GCC_ALIGN(PLATFORM_CACHE_LINE_SIZE); // 宏BINNED2_SMALL_POOL_COUNT為45 }; static FGlobalRecycler GGlobalRecycler; // 用於緩存FBundle FullBundle鏈表 各Block檔允許有8個元素。有空位時,PushBundle會成功,否則失敗。全部為空時,PopBundle失敗並返回null。
Malloc申請內存時TLS Cache的流程細節如下:
Free釋放內存時TLS Cache的流程細節如下:
Free掉Ptr指針的內存占用
void FMallocBinned2::FreeExternal(void* Ptr) { if (!IsOSAllocation(Ptr)) // 是否為操作系統直接分配的內存塊 { // Bin模式,內存池 check(Ptr); // null is 64k aligned so we should not be here FFreeBlock* BasePtr = GetPoolHeaderFromPointer(Ptr); // 將指針轉換成FFreeBlock* BasePtr->CanaryTest(); uint32 BlockSize = BasePtr->BlockSize; uint32 PoolIndex = BasePtr->PoolIndex; FBundleNode* BundlesToRecycle = nullptr; FPerThreadFreeBlockLists* Lists = GMallocBinned2PerThreadCaches ? FPerThreadFreeBlockLists::Get() : nullptr; if (Lists) { // 若FPerThreadFreeBlockLists[BlockSize].FullBundle.Head不為空,會加入GGlobalRecycler對應Block檔位的數組中緩存 // 當GGlobalRecycler對應Block檔位沒有空位時,BundlesToRecycle會被賦值為FPerThreadFreeBlockLists[BlockSize].FullBundle.Head // 當GGlobalRecycler對應Block檔位有空位時,會被添加進去,並返回空 BundlesToRecycle = Lists->RecycleFullBundle(BasePtr->PoolIndex); bool bPushed = Lists->Free(Ptr, PoolIndex, BlockSize); // 加到FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head鏈表的頭部 check(bPushed); #if BINNED2_ALLOCATOR_STATS Lists->AllocatedMemory -= BlockSize; #endif } else { BundlesToRecycle = (FBundleNode*)Ptr; BundlesToRecycle->NextNodeInCurrentBundle = nullptr; } if (BundlesToRecycle) // 若不為空 { BundlesToRecycle->NextBundle = nullptr; FScopeLock Lock(&Mutex); Private::FreeBundles(*this, BundlesToRecycle, BlockSize, PoolIndex); // 釋放BundlesToRecycle地址所占用的Block內存 #if BINNED2_ALLOCATOR_STATS if (!Lists) { // lists track their own stat track them instead in the global stat if we don't have lists AllocatedSmallPoolMemory -= ((int64)(BlockSize)); } #endif } } else if (Ptr) { // 非Bin模式, 操作系統直接分配 FScopeLock Lock(&Mutex); FPoolInfo* Pool = Private::FindPoolInfo(*this, Ptr); if (!Pool) { UE_LOG(LogMemory, Fatal, TEXT("FMallocBinned2 Attempt to free an unrecognized block %p"), Ptr); } UPTRINT PoolOsBytes = Pool->GetOsAllocatedBytes(); SIZE_T PoolOSRequestedBytes = Pool->GetOSRequestedBytes(); #if BINNED2_ALLOCATOR_STATS AllocatedLargePoolMemory -= ((int64)PoolOSRequestedBytes); AllocatedLargePoolMemoryWAlignment -= ((int64)PoolOsBytes); #endif checkf(PoolOSRequestedBytes <= PoolOsBytes, TEXT("FMallocBinned2::FreeExternal %d %d"), int32(PoolOSRequestedBytes), int32(PoolOsBytes)); Pool->SetCanary(FPoolInfo::ECanary::Unassigned, true, false); // Free an OS allocation. CachedOSPageAllocator.Free(Ptr, PoolOsBytes); } } static void FreeBundles(FMallocBinned2& Allocator, FBundleNode* BundlesToRecycle, uint32 InBlockSize, uint32 InPoolIndex) { FPoolTable& Table = Allocator.SmallPoolTables[InPoolIndex]; // 釋放FBundleNode* BundlesToRecycle鏈表上各節點指向的內存 FBundleNode* Bundle = BundlesToRecycle; while (Bundle) { FBundleNode* NextBundle = Bundle->NextBundle; FBundleNode* Node = Bundle; do { FBundleNode* NextNode = Node->NextNodeInCurrentBundle; FPoolInfo* NodePool = FindPoolInfo(Allocator, Node); if (!NodePool) { UE_LOG(LogMemory, Fatal, TEXT("FMallocBinned2 Attempt to free an unrecognized small block %p"), Node); } NodePool->CheckCanary(FPoolInfo::ECanary::FirstFreeBlockIsPtr); // If this pool was exhausted, move to available list. if (!NodePool->FirstFreeBlock) // FPoolInfo* NodePool在FPoolList ExhaustedPools鏈表上時 { Table.ActivePools.LinkToFront(NodePool); // 將FPoolInfo* NodePool從FPoolList ExhaustedPools移動到FPoolList ActivePools } else { check(NodePool->FirstFreeBlock->Canary == 0 || NodePool->FirstFreeBlock->IsCanaryOk()); } // Free a pooled allocation. 在FBundleNode* Node地址處,構建一個新的FFreeBlock,NumFreeBlocks設置為1,並插入到NodePool->FirstFreeBlock鏈表的頭部 FFreeBlock* Free = (FFreeBlock*)Node; Free->NumFreeBlocks = 1; Free->NextFreeBlock = NodePool->FirstFreeBlock; Free->BlockSize = InBlockSize; Free->Canary = FFreeBlock::CANARY_VALUE; Free->PoolIndex = InPoolIndex; NodePool->FirstFreeBlock = Free; // Free this pool. check(NodePool->Taken >= 1); if (--NodePool->Taken == 0) // FPoolInfo* NodePool中所有Block為空閑時 { NodePool->SetCanary(FPoolInfo::ECanary::Unassigned, true, false); FFreeBlock* BasePtrOfNode = GetPoolHeaderFromPointer(Node); // Free the OS memory. NodePool->Unlink(); // 從FPoolList鏈表上斷開 Allocator.CachedOSPageAllocator.Free(BasePtrOfNode, Allocator.PageSize); // 回收整個FPoolInfo* NodePool的內存 #if BINNED2_ALLOCATOR_STATS AllocatedOSSmallPoolMemory -= ((int64)Allocator.PageSize); #endif } Node = NextNode; // 遍歷下一個FBundleNode* } while (Node); Bundle = NextBundle; } }
Malloc分配內存
FORCEINLINE void* MallocSelect(SIZE_T Size, uint32 Alignment) { void* Result; if (UseSmallAlloc(Size, Alignment)) // Size <= BINNED2_MAX_SMALL_POOL_SIZE & Alignment <= BINNED2_MINIMUM_ALIGNMENT { Result = MallocExternalSmall(Size, Alignment); // 使用內存池來分配內存 } else { Result = MallocExternalLarge(Size, Alignment); // 由操作系統直接分配內存, 且放入HashBuckets表中 } return Result; } void* FMallocBinned2::MallocExternalSmall(SIZE_T Size, uint32 Alignment) { uint32 PoolIndex = BoundSizeToPoolIndex(Size); // 根據Size獲取在FPoolTable SmallPoolTables中的索引 // 優先從TLS Cache中獲取可用的內存Block FPerThreadFreeBlockLists* Lists = GMallocBinned2PerThreadCaches ? FPerThreadFreeBlockLists::Get() : nullptr; if (Lists) { // 若FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head鏈表為空,從GGlobalRecycler對應Block檔位的數組中Pop出一個並賦值給FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head // 然后再判斷FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head是否為空 if (Lists->ObtainRecycledPartial(PoolIndex)) { if (void* Result = Lists->Malloc(PoolIndex)) // 從FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head鏈表頭部Pop出一個FBundleNode*,並賦值給Result { #if BINNED2_ALLOCATOR_STATS uint32 BlockSize = PoolIndexToBlockSize(PoolIndex); Lists->AllocatedMemory += BlockSize; #endif return Result; // 有對應PoolIndex的TLS Cache的Block,則直接分配出去 } } } FScopeLock Lock(&Mutex); // 獲取互斥鎖,離開作用域自動釋放互斥鎖 // Allocate from small object pool. FPoolTable& Table = SmallPoolTables[PoolIndex]; // 根據PoolIndex找到對應的PoolTable FPoolInfo* Pool; if (!Table.ActivePools.IsEmpty()) // 當前內存池表中的ActivePools不為空 { Pool = &Table.ActivePools.GetFrontPool(); // 獲取第一個FPoolInfo } else { Pool = &Table.ActivePools.PushNewPoolToFront(*this, Table.BlockSize, PoolIndex); // 創建一個新的FPoolInfo } void* Result = Pool->AllocateRegularBlock(); // 從FPoolInfo Pool分配一個Block 注:函數中會對Pool的空閑檢查,如果已無空閑Block,則把Pool添加到PoolTable的ExhaustedPools中 #if BINNED2_ALLOCATOR_STATS AllocatedSmallPoolMemory += PoolIndexToBlockSize(PoolIndex); #endif // BINNED2_ALLOCATOR_STATS if (GMallocBinned2AllocExtra) // GMallocBinned2AllocExtra值為32 這部分邏輯為TLS Cache優化邏輯 { if (Lists) { // prefill the free list with some allocations so we are less likely to hit this slow path with the mutex for (int32 Index = 0; Index < GMallocBinned2AllocExtra && Pool->HasFreeRegularBlock(); Index++) { if (!Lists->Free(Result, PoolIndex, Table.BlockSize)) // Free成功,會將當前Result指針加入到FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head鏈表頭部 { break; } Result = Pool->AllocateRegularBlock(); // 從FPoolInfo Pool分配一個Block } } } if (!Pool->HasFreeRegularBlock()) // 當前Pool是否有空閑Block { Table.ExhaustedPools.LinkToFront(Pool); // 移動到FPoolList ExhaustedPools鏈表中 } return Result; }
Android(小米10)DumpPlatformAndAllocatorStats統計信息:
[2021.05.27-15.59.49:152][ 66]LogMemory: Platform Memory Stats for Android [2021.05.27-15.59.49:152][ 66]LogMemory: Process Physical Memory: 1207.08 MB used, 1254.11 MB peak [2021.05.27-15.59.49:152][ 66]LogMemory: Process Virtual Memory: 8984.62 MB used, 9077.56 MB peak [2021.05.27-15.59.49:152][ 66]LogMemory: Physical Memory: 5445.78 MB used, 2177.80 MB free, 7623.57 MB total [2021.05.27-15.59.49:153][ 66]LogMemory: Virtual Memory: 608.03 MB used, 1439.97 MB free, 2048.00 MB total [2021.05.27-15.59.49:153][ 66]LogMemory: PageSize: 4096, BinnedPageSize: 65536, BinnedAllocationGranularity: 4096, AddressLimit: 8589934592 [2021.05.27-15.59.49:154][ 66]FMallocBinned2 Mem report [2021.05.27-15.59.49:154][ 66]Constants.BinnedPageSize = 65536 [2021.05.27-15.59.49:154][ 66]Constants.BinnedAllocationGranularity = 4096 [2021.05.27-15.59.49:154][ 66]Small Pool Allocations: 388.752121mb (including block size padding) [2021.05.27-15.59.49:155][ 66]Small Pool OS Allocated: 419.000000mb [2021.05.27-15.59.49:155][ 66]Large Pool Requested Allocations: 204.530167mb [2021.05.27-15.59.49:155][ 66]Large Pool OS Allocated: 205.332031mb [2021.05.27-15.59.49:155][ 66]Requested Allocations: 204.530167mb [2021.05.27-15.59.49:155][ 66]OS Allocated: 205.332031mb [2021.05.27-15.59.49:155][ 66]PoolInfo: 1.687500mb [2021.05.27-15.59.49:155][ 66]Hash: 0.003906mb [2021.05.27-15.59.49:156][ 66]TLS: 0.066406mb [2021.05.27-15.59.49:156][ 66]Total allocated from OS: 626.089844mb [2021.05.27-15.59.49:156][ 66]Cached free OS pages: 3.894531mb
PC下DumpPlatformAndAllocatorStats統計信息:
[2021.06.04-06.12.34:488][748]LogMemory: Platform Memory Stats for Windows [2021.06.04-06.12.34:488][748]LogMemory: Process Physical Memory: 704.69 MB used, 775.71 MB peak [2021.06.04-06.12.34:488][748]LogMemory: Process Virtual Memory: 784.52 MB used, 888.80 MB peak [2021.06.04-06.12.34:488][748]LogMemory: Physical Memory: 24035.47 MB used, 8565.63 MB free, 32601.11 MB total [2021.06.04-06.12.34:488][748]LogMemory: Virtual Memory: 134206408.00 MB used, 11316.60 MB free, 134217728.00 MB total [2021.06.04-06.12.34:489][748]FMallocBinned2 Mem report [2021.06.04-06.12.34:489][748]Constants.BinnedPageSize = 65536 [2021.06.04-06.12.34:489][748]Constants.BinnedAllocationGranularity = 4096 [2021.06.04-06.12.34:489][748]Small Pool Allocations: 130.058121mb (including block size padding) [2021.06.04-06.12.34:489][748]Small Pool OS Allocated: 157.312500mb [2021.06.04-06.12.34:489][748]Large Pool Requested Allocations: 141.529739mb [2021.06.04-06.12.34:489][748]Large Pool OS Allocated: 141.667969mb [2021.06.04-06.12.34:489][748]Requested Allocations: 141.529739mb [2021.06.04-06.12.34:489][748]OS Allocated: 141.667969mb [2021.06.04-06.12.34:489][748]PoolInfo: 0.500000mb [2021.06.04-06.12.34:489][748]Hash: 0.007813mb [2021.06.04-06.12.34:489][748]TLS: 0.128906mb [2021.06.04-06.12.34:489][748]Total allocated from OS: 299.617188mb [2021.06.04-06.12.34:490][748]Cached free OS pages: 34.992188mb
參考
People Mountain People Sea(服務器篇)