FMallocBinned2內存分配器


FMallocBinned2是虛幻引擎實現的第二代裝箱內存分配器,其重要的配置參數及成員變量如下:

#define BINNED2_MAX_CACHED_OS_FREES (64)
#if PLATFORM_64BITS
    #define BINNED2_MAX_CACHED_OS_FREES_BYTE_LIMIT (64*1024*1024) // 64MB
#else
    #define BINNED2_MAX_CACHED_OS_FREES_BYTE_LIMIT (16*1024*1024)
#endif

#define BINNED2_LARGE_ALLOC                    65536        // Alignment of OS-allocated pointer - pool-allocated pointers will have a non-aligned pointer
#define BINNED2_MINIMUM_ALIGNMENT_SHIFT        4            // Alignment of blocks, expressed as a shift
#define BINNED2_MINIMUM_ALIGNMENT            16            // Alignment of blocks
#define BINNED2_MAX_SMALL_POOL_SIZE            (32768-16)    // Maximum block size in GMallocBinned2SmallBlockSizes
#define BINNED2_SMALL_POOL_COUNT            45


#define DEFAULT_GMallocBinned2PerThreadCaches 1
#define DEFAULT_GMallocBinned2LockFreeCaches 0
#define DEFAULT_GMallocBinned2BundleCount 64
#define DEFAULT_GMallocBinned2AllocExtra 32
#define BINNED2_MAX_GMallocBinned2MaxBundlesBeforeRecycle 8

#if !defined(AGGRESSIVE_MEMORY_SAVING)
    #error "AGGRESSIVE_MEMORY_SAVING must be defined"
#endif
#if AGGRESSIVE_MEMORY_SAVING
    #define DEFAULT_GMallocBinned2BundleSize 8192
#else
    #define DEFAULT_GMallocBinned2BundleSize BINNED2_LARGE_ALLOC  // 64KB
#endif


#define BINNED2_ALLOW_RUNTIME_TWEAKING 0
#if BINNED2_ALLOW_RUNTIME_TWEAKING
    extern CORE_API int32 GMallocBinned2PerThreadCaches;
    extern CORE_API int32 GMallocBinned2BundleSize = DEFAULT_GMallocBinned2BundleSize;
    extern CORE_API int32 GMallocBinned2BundleCount = DEFAULT_GMallocBinned2BundleCount;
    extern CORE_API int32 GMallocBinned2MaxBundlesBeforeRecycle = BINNED2_MAX_GMallocBinned2MaxBundlesBeforeRecycle;
    extern CORE_API int32 GMallocBinned2AllocExtra = DEFAULT_GMallocBinned2AllocExtra;
#else
    #define GMallocBinned2PerThreadCaches DEFAULT_GMallocBinned2PerThreadCaches  // 1
    #define GMallocBinned2BundleSize DEFAULT_GMallocBinned2BundleSize  // 64KB
    #define GMallocBinned2BundleCount DEFAULT_GMallocBinned2BundleCount  // 64
    #define GMallocBinned2MaxBundlesBeforeRecycle BINNED2_MAX_GMallocBinned2MaxBundlesBeforeRecycle  // 8
    #define GMallocBinned2AllocExtra DEFAULT_GMallocBinned2AllocExtra  // 32
#endif

// ... ... 

// Block sizes are based around getting the maximum amount of allocations per pool, with as little alignment waste as possible.
// Block sizes should be close to even divisors of the system page size, and well distributed.
// They must be 16-byte aligned as well.
static uint16 SmallBlockSizes[] =  // 共45個檔位的Block Size  當請求分配一個特定大小的內存塊時,將尋找到最接近的一檔
{
    16, 32, 48, 64, 80, 96, 112, 128,
    160, 192, 224, 256, 288, 320, 384, 448,
    512, 576, 640, 704, 768, 896, 1024 - 16, 1168,
    1360, 1632, 2048 - 16, 2336, 2720, 3264, 4096 - 16, 4368,
    4672, 5040, 5456, 5952, 6544 - 16, 7280, 8192 - 16, 9360,
    10912, 13104, 16384 - 16, 21840, 32768 - 16 
};// FFreeBlock用來描述一塊Block,其本身也處於一個Block中,位於Block頭部,要占一定空間sizeof(FFreeBlock)為16,最高一檔是32768-16,這樣64K空間中能存下兩個最高檔。 // ... ...

class CORE_API FMallocBinned2 final : public FMalloc
{

private:
    // ... ...
    FPtrToPoolMapping PtrToPoolMapping;  // 內存池哈希桶的相關參數

    // Pool tables for different pool sizes
    FPoolTable SmallPoolTables[BINNED2_SMALL_POOL_COUNT]; // 所有的內存池表列表, 單個內存池的Block尺寸是一樣的  宏BINNED2_SMALL_POOL_COUNT為45

    PoolHashBucket* HashBuckets;  // Key命中時使用的內存池哈希桶
    PoolHashBucket* HashBucketFreeList; // Key未命中時使用的內存池哈希桶
    uint64 NumPoolsPerPage; // 一個Page中內存池的個數   65536除以sizeof(FPoolInfo) = 65536/32 = 2048
    // ... ...
    
    FCriticalSection Mutex; // 用於FScopeLock的臨界段對象,實現對臨界段的互斥訪問
    
    // ... ...

public:
    // ... ...
    static uint16 SmallBlockSizesReversed[BINNED2_SMALL_POOL_COUNT]; // this is reversed to get the smallest elements on our main cache line // 為SmallBlockSizes數組的反向數組
    static FMallocBinned2* MallocBinned2; // 當前Binned2內存分配器實例
    static uint32 Binned2TlsSlot; // 是否創建MallocBinned2的TLS Slot(為0表示未創建),所有線程共享同一個Binned2 TLS Slot
    static uint32 PageSize;  // Constants.BinnedPageSize  為64KB
    static uint32 OsAllocationGranularity;  // Constants.BinnedAllocationGranularity  為4096 
    // Mapping of sizes to small table indices  數組的個數為2048,存放的數據為:0,0,1,2,3,4,5,6,7,8,8, ... ,44,44,44,用於幫助計算當前內存Size在FPoolTable SmallPoolTables中的索引
// Size到BlockSize的PoolTable索引映射表。UE4初始化階段會預生成一個需要申請內存大小到PoolTable數組的索引的映射表,這樣當Malloc時,根據Malloc需要的大小直接找到對應的PoolTable static uint8 MemSizeToIndex[1 + (BINNED2_MAX_SMALL_POOL_SIZE >> BINNED2_MINIMUM_ALIGNMENT_SHIFT)]; // ... ... };

 

FMallocBinned2內存分配器的初始化 // 在其構造函數中

根據所在平台硬件和操作系統,來設置內存分配器的相關參數

FMallocBinned2::FMallocBinned2()
    : HashBucketFreeList(nullptr)
{
    static bool bOnce = false;
    check(!bOnce); // this is now a singleton-like thing and you cannot make multiple copies
    bOnce = true;

    // 初始化SmallBlockSizesReversed數組,為SmallBlockSizes數組的反向數組
    for (uint32 Index = 0; Index != BINNED2_SMALL_POOL_COUNT; ++Index)
    {
        uint32 Partner = BINNED2_SMALL_POOL_COUNT - Index - 1;
        SmallBlockSizesReversed[Index] = SmallBlockSizes[Partner];
    }
    
    // 根據所在平台硬件和操作系統,來設置內存分配器的相關參數
    FGenericPlatformMemoryConstants Constants = FPlatformMemory::GetConstants();
    PageSize = Constants.BinnedPageSize;
    OsAllocationGranularity = Constants.BinnedAllocationGranularity ? Constants.BinnedAllocationGranularity : PageSize;
    NumPoolsPerPage = PageSize / sizeof(FPoolInfo);
    PtrToPoolMapping.Init(PageSize, NumPoolsPerPage, Constants.AddressLimit); // 初始化內存池哈希桶的相關參數

    // 一些check
    checkf(FMath::IsPowerOfTwo(PageSize), TEXT("OS page size must be a power of two"));
    checkf(FMath::IsPowerOfTwo(Constants.AddressLimit), TEXT("OS address limit must be a power of two"));
    checkf(Constants.AddressLimit > PageSize, TEXT("OS address limit must be greater than the page size")); // Check to catch 32 bit overflow in AddressLimit
    checkf(SmallBlockSizes[BINNED2_SMALL_POOL_COUNT - 1] == BINNED2_MAX_SMALL_POOL_SIZE, TEXT("BINNED2_MAX_SMALL_POOL_SIZE must equal the smallest block size"));
    checkf(PageSize % BINNED2_LARGE_ALLOC == 0, TEXT("OS page size must be a multiple of BINNED2_LARGE_ALLOC"));
    checkf(sizeof(FMallocBinned2::FFreeBlock) <= SmallBlockSizes[0], TEXT("Pool header must be able to fit into the smallest block"));
    static_assert(UE_ARRAY_COUNT(SmallBlockSizes) == BINNED2_SMALL_POOL_COUNT, "Small block size array size must match BINNED2_SMALL_POOL_COUNT");
    static_assert(UE_ARRAY_COUNT(SmallBlockSizes) <= 256, "Small block size array size must fit in a byte");
    static_assert(sizeof(FFreeBlock) <= BINNED2_MINIMUM_ALIGNMENT, "Free block struct must be small enough to fit into a block.");

    // Init pool tables.  填充SmallBlockSizes數組中BlockSize,共45檔
    for (uint32 Index = 0; Index != BINNED2_SMALL_POOL_COUNT; ++Index)
    {
        checkf(Index == 0 || SmallBlockSizes[Index - 1] < SmallBlockSizes[Index], TEXT("Small block sizes must be strictly increasing"));
        checkf(SmallBlockSizes[Index] <= PageSize, TEXT("Small block size must be small enough to fit into a page"));
        checkf(SmallBlockSizes[Index] % BINNED2_MINIMUM_ALIGNMENT == 0, TEXT("Small block size must be a multiple of BINNED2_MINIMUM_ALIGNMENT"));

        SmallPoolTables[Index].BlockSize = SmallBlockSizes[Index];
    }

    // Set up pool mappings  數組的個數為2048,存放的數據為:0,0,1,2,3,4,5,6,7,8,8, ... ,44,44,44,用於幫助計算當前內存Size在FPoolTable SmallPoolTables中的索引
    uint8* IndexEntry = MemSizeToIndex;
    uint32  PoolIndex  = 0;
    for (uint32 Index = 0; Index != 1 + (BINNED2_MAX_SMALL_POOL_SIZE >> BINNED2_MINIMUM_ALIGNMENT_SHIFT); ++Index)
    {
        
        uint32 BlockSize = Index << BINNED2_MINIMUM_ALIGNMENT_SHIFT; // inverse of int32 Index = int32((Size >> BINNED2_MINIMUM_ALIGNMENT_SHIFT));
        while (SmallBlockSizes[PoolIndex] < BlockSize)
        {
            ++PoolIndex;
            check(PoolIndex != BINNED2_SMALL_POOL_COUNT);
        }
        check(PoolIndex < 256);
        *IndexEntry++ = uint8(PoolIndex);
    }
    
    
    // now reverse the pool sizes for cache coherency  // 再次初始化SmallBlockSizesReversed數組,為SmallBlockSizes數組的反向數組

    for (uint32 Index = 0; Index != BINNED2_SMALL_POOL_COUNT; ++Index)
    {
        uint32 Partner = BINNED2_SMALL_POOL_COUNT - Index - 1;
        SmallBlockSizesReversed[Index] = SmallBlockSizes[Partner];
    }

    uint64 MaxHashBuckets = PtrToPoolMapping.GetMaxHashBuckets();

    {
        LLM_PLATFORM_SCOPE(ELLMTag::FMalloc);
        // 為Key命中時使用的內存池哈希桶分配內存
        HashBuckets = (PoolHashBucket*)FPlatformMemory::BinnedAllocFromOS(Align(MaxHashBuckets * sizeof(PoolHashBucket), OsAllocationGranularity));
#if BINNED2_ALLOCATOR_STATS
        Binned2HashMemory += Align(MaxHashBuckets * sizeof(PoolHashBucket), OsAllocationGranularity);
#endif
    }

    DefaultConstructItems<PoolHashBucket>(HashBuckets, MaxHashBuckets); // 缺省構造與初始化HashBuckets
    MallocBinned2 = this;
    GFixedMallocLocationPtr = (FMalloc**)(&MallocBinned2);
}

具體數值如下:

 

FPoolTable  // 同一Block大小內存池表

/** 內存池表 sizeof(FPoolTable)為24*/
struct FPoolTable
{
    FPoolList ActivePools;  // 指向有空閑Block的內存池鏈表
    FPoolList ExhaustedPools; // 指向已滿(沒有可分配的內存)的內存池鏈表
    uint32    BlockSize;  // 當前PoolTable中所有內存池的Block大小

    // ... ...
};

 

FPoolList  // 內存池鏈表

// 內存池鏈表 sizeof(FPoolList)為8
struct FPoolList
{
    // ... ...

private:
    FPoolInfo* Front;
};

 

FPoolInfo  // 內存池

FPoolInfo中的所有Block為空閑時,才釋放其占用的內存頁

// 內存池  sizeof(FPoolInfo)為32
struct FMallocBinned2::FPoolInfo  
{
    // ... ...
 // 已分配的Block的個數  當為0時,將釋放整個內存池及其FirstMem指向的內存塊
 public:    uint16      Taken;          // Number of allocated elements in this pool, when counts down to zero can free the entire pool    
 public:    ECanary        Canary;    // See ECanary
 // 已分配的字節數
 private:    uint32      AllocSize;      // Number of bytes allocated
 // 如果是Bin模式,指向內存池可用的內存塊Block鏈表; 如果非Bin模式, 指向由操作系統直接分配的內存塊.
 public:    FFreeBlock* FirstFreeBlock; // Pointer to first free memory in this pool or the OS Allocation Size in bytes if this allocation is not binned
 // 指向下一個內存池
 public:    FPoolInfo*  Next;           // Pointer to next pool
 public:    FPoolInfo** PtrToPrevNext;  // Pointer to whichever pointer points to this pool
 
    // ... ...
};

 

FFreeBlock  // 內存塊

// 內存塊  sizeof(FFreeBlock)為16
struct FFreeBlock
{
    // ... ...
    uint16 BlockSize;                // Size of the blocks that this list points to  // 所在Pool的BlockSize
    uint8 PoolIndex;                // Index of this pool  // 所在Pool的Index
    uint8 Canary;                    // Constant value of 0xe3  // 固定常量 用於判斷內存是否越界寫 以此判斷這塊Block數據是否損壞
    uint32 NumFreeBlocks;          // Number of consecutive free blocks here, at least 1.  // 空閑Block個數
    void*  NextFreeBlock;          // Next free block in another pool // 釋放1個Block時,會構建該Block的FFreeMem,並插入到Pool->FirstMem鏈表的頭部
};

 

PoolHashBucket  // 內存池哈希桶

// 內存池哈希桶的相關參數  sizeof(FPtrToPoolMapping)為32
struct FPtrToPoolMapping
{
    // ... ...

private:
    /** Shift to apply to a pointer to get the reference from the indirect tables */
    uint64 PtrToPoolPageBitShift;

    /** Shift required to get required hash table key. */
    uint64 HashKeyShift;

    /** Used to mask off the bits that have been used to lookup the indirect table */
    uint64 PoolMask;

    // PageSize dependent constants
    uint64 MaxHashBuckets;
};

/** 內存池哈希桶,用於存放由內存地址哈希出來的鍵對應的內存池鏈表  sizeof(PoolHashBucket)為32 */
struct FMallocBinned2::PoolHashBucket
{
    UPTRINT         BucketIndex; // 哈希鍵 Key=Ptr >> Allocator.HashKeyShift  內存地址右移27個bit位
    FPoolInfo*      FirstPool; // 指向內存池內存塊(大小為64KB:成員變量PageSize的值)的起始處
    PoolHashBucket* Prev; // 上一個內存池哈希桶
    PoolHashBucket* Next; // 下一個內存池哈希桶
    
    // ... ...
};

 

從內存池Pool中分配內存給Block

struct FMallocBinned2::FPoolInfo
{
    // ... ...
    void* AllocateRegularBlock()
    {
        check(HasFreeRegularBlock()); // 檢查FPoolInfo中是否有空閑Block
        ++Taken; // 已分配的Block數+1
        void* Result = FirstFreeBlock->AllocateRegularBlock(); // 分配Block
        ExhaustPoolIfNecessary(); // 如果當前FPoolInfo無空閑Block,則將其移動到FPoolList ExhaustedPools鏈表中
        return Result;
    }
    
    // ... ...
};


struct FFreeBlock
{
    // ... ...
    FORCEINLINE void* AllocateRegularBlock()
    {
        --NumFreeBlocks;  // 空閑Block個數減1
        if (IsAligned(this, BINNED2_LARGE_ALLOC)) // BINNED2_LARGE_ALLOC為64KB  FFreeBlock起始處是否對齊到64KB
        {
            return (uint8*)this + BINNED2_LARGE_ALLOC - (NumFreeBlocks + 1) * BlockSize; // +64KB后,從前往后分配Block
        }
        return (uint8*)this + (NumFreeBlocks)* BlockSize; // 從后往前分配Block
    }
    
    // ... ...
};

 

TLS Cache機制

FMallocBinned內存分配器相比,FMallocBinned2最大的改進:

引入了TLS(Thread Local Storage線程局部存儲。線程可以有自己的存儲空間,以鍵值對形式存儲一些自己獨有的變量)緩存,來優化內存的分配速度

各線程會記錄被free的地址,把它們保存到一個列表中,當這個線程再有malloc請求來時,如果BlockSize匹配,則直接返回之前緩存的free地址

這樣就不需要再訪問FPoolTable SmallPoolTables[BINNED2_SMALL_POOL_COUNT]了,因此也不用再加互斥鎖了

 

各個線程在啟動時,通過調用FMemory::SetupTLSCachesOnCurrentThread()創建自己的TLS數據FPerThreadFreeBlockLists

各個線程在創建FPerThreadFreeBlockLists后,都會把它添加到Binned2的RegisteredFreeBlockLists數組中記錄。代碼如下:

void FMallocBinned2::SetupTLSCachesOnCurrentThread()
{
    if (!BINNED2_ALLOW_RUNTIME_TWEAKING && !GMallocBinned2PerThreadCaches)
    {
        return;
    }
    if (!FMallocBinned2::Binned2TlsSlot)
    {
        FMallocBinned2::Binned2TlsSlot = FPlatformTLS::AllocTlsSlot(); // 只會執行一次,TLS Slot全局唯一
    }
    check(FMallocBinned2::Binned2TlsSlot);
    FPerThreadFreeBlockLists::SetTLS(); // 各線程創建自己的TLS
}


void FMallocBinned2::FPerThreadFreeBlockLists::SetTLS()
{
    check(FMallocBinned2::Binned2TlsSlot);
    FPerThreadFreeBlockLists* ThreadSingleton = (FPerThreadFreeBlockLists*)FPlatformTLS::GetTlsValue(FMallocBinned2::Binned2TlsSlot);
    if (!ThreadSingleton)
    {
        LLM_PLATFORM_SCOPE(ELLMTag::FMalloc);
        ThreadSingleton = new (FPlatformMemory::BinnedAllocFromOS(Align(sizeof(FPerThreadFreeBlockLists), FMallocBinned2::OsAllocationGranularity))) FPerThreadFreeBlockLists();
#if BINNED2_ALLOCATOR_STATS
        Binned2TLSMemory += Align(sizeof(FPerThreadFreeBlockLists), FMallocBinned2::OsAllocationGranularity);
#endif
        FPlatformTLS::SetTlsValue(FMallocBinned2::Binned2TlsSlot, ThreadSingleton);
        FMallocBinned2::Private::RegisterThreadFreeBlockLists(ThreadSingleton);
    }
}

static TArray<FPerThreadFreeBlockLists*>& GetRegisteredFreeBlockLists()
{
    static TArray<FPerThreadFreeBlockLists*> RegisteredFreeBlockLists;
    return RegisteredFreeBlockLists;
}
static void RegisterThreadFreeBlockLists( FPerThreadFreeBlockLists* FreeBlockLists )
{
    FScopeLock Lock(&GetFreeBlockListsRegistrationMutex());
#if BINNED2_ALLOCATOR_STATS_VALIDATION
    ++RecursionCounter;
#endif
    GetRegisteredFreeBlockLists().Add(FreeBlockLists);
#if BINNED2_ALLOCATOR_STATS_VALIDATION
    --RecursionCounter;
#endif
}

 

各線程調用FMemory::SetupTLSCachesOnCurrentThread()的情況:

 

每個線程都會有一份FPerThreadFreeBlockLists副本,其中FreeLists[]數組也通過BlockSize產生,有45個元素

每個元素類型為FFreeBlockList,包含FBundle PartialBundle鏈表(未裝滿的Bundle)和FBundle FullBundle鏈表(已裝滿的Bundle)

為了讓Binned2不過多占用內存空間,給每個FBundle限定了元素數量(不能超過64)及總容量(其中FBundleNode的Count*BlockSize不能大於64KB)

 

struct FPerThreadFreeBlockLists
{
    // ... ...
private:
    FFreeBlockList FreeLists[BINNED2_SMALL_POOL_COUNT]; // 宏BINNED2_SMALL_POOL_COUNT為45  對應各個檔位的Block
};

// sizeof(FFreeBlockList)為32
struct FFreeBlockList
{
    // ... ...
private:
    FBundle PartialBundle;
    FBundle FullBundle;
};

// FBundleNode鏈表  sizeof(FBundle)為16
struct FBundle
{
    // ... ...
    FBundleNode* Head;  // 鏈表頭指針
    uint32       Count;
};

// sizeof(FBundleNode)為16
struct FBundleNode
{
    FBundleNode* NextNodeInCurrentBundle;
    union
    {
        FBundleNode* NextBundle;
        int32 Count;
    };
};

注:FBundleNode*指向的是Block內存塊區域,把Ptr指針轉行成FBundleNode*后,其size為16字節,對其修改不會影響到其他內存空間 

 

FGlobalRecycler  // 用於緩存FBundle FullBundle鏈表

struct FGlobalRecycler  // sizeof(FGlobalRecycler)為64*45 = 2880
{

    // ... ...

private:
    struct FPaddedBundlePointer // sizeof(FPaddedBundlePointer)為8*8 = 64
    {
        FBundleNode* FreeBundles[BINNED2_MAX_GMallocBinned2MaxBundlesBeforeRecycle]; // 宏BINNED2_MAX_GMallocBinned2MaxBundlesBeforeRecycle為8

        // ... ...
    };
    
    // ... ...
    
    MS_ALIGN(PLATFORM_CACHE_LINE_SIZE) FPaddedBundlePointer Bundles[BINNED2_SMALL_POOL_COUNT] GCC_ALIGN(PLATFORM_CACHE_LINE_SIZE); // 宏BINNED2_SMALL_POOL_COUNT為45
};

static FGlobalRecycler GGlobalRecycler;  // 用於緩存FBundle FullBundle鏈表   各Block檔允許有8個元素。有空位時,PushBundle會成功,否則失敗。全部為空時,PopBundle失敗並返回null。

 

Malloc申請內存時TLS Cache的流程細節如下:

 

 

 

 

 

Free釋放內存時TLS Cache的流程細節如下:

 

 

 

Free掉Ptr指針的內存占用  

void FMallocBinned2::FreeExternal(void* Ptr)
{
    if (!IsOSAllocation(Ptr)) // 是否為操作系統直接分配的內存塊
    {
        // Bin模式,內存池
        check(Ptr); // null is 64k aligned so we should not be here
        FFreeBlock* BasePtr = GetPoolHeaderFromPointer(Ptr); // 將指針轉換成FFreeBlock*
        BasePtr->CanaryTest();
        uint32 BlockSize = BasePtr->BlockSize;
        uint32 PoolIndex = BasePtr->PoolIndex;

        FBundleNode* BundlesToRecycle = nullptr;
        FPerThreadFreeBlockLists* Lists = GMallocBinned2PerThreadCaches ? FPerThreadFreeBlockLists::Get() : nullptr;
        if (Lists)
        {
            // 若FPerThreadFreeBlockLists[BlockSize].FullBundle.Head不為空,會加入GGlobalRecycler對應Block檔位的數組中緩存 // 當GGlobalRecycler對應Block檔位沒有空位時,BundlesToRecycle會被賦值為FPerThreadFreeBlockLists[BlockSize].FullBundle.Head // 當GGlobalRecycler對應Block檔位有空位時,會被添加進去,並返回空
            BundlesToRecycle = Lists->RecycleFullBundle(BasePtr->PoolIndex); bool bPushed = Lists->Free(Ptr, PoolIndex, BlockSize); // 加到FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head鏈表的頭部
 check(bPushed); #if BINNED2_ALLOCATOR_STATS Lists->AllocatedMemory -= BlockSize; #endif
        }
        else
        {
            BundlesToRecycle = (FBundleNode*)Ptr;
            BundlesToRecycle->NextNodeInCurrentBundle = nullptr;
        }
        if (BundlesToRecycle) // 若不為空
        {
            BundlesToRecycle->NextBundle = nullptr;
            FScopeLock Lock(&Mutex);
            Private::FreeBundles(*this, BundlesToRecycle, BlockSize, PoolIndex); // 釋放BundlesToRecycle地址所占用的Block內存
#if BINNED2_ALLOCATOR_STATS
            if (!Lists)
            {
                // lists track their own stat track them instead in the global stat if we don't have lists
                AllocatedSmallPoolMemory -= ((int64)(BlockSize));
            }
#endif
        }
    }
    else if (Ptr)
    {
        // 非Bin模式, 操作系統直接分配
        FScopeLock Lock(&Mutex);
        FPoolInfo* Pool = Private::FindPoolInfo(*this, Ptr);
        if (!Pool)
        {
            UE_LOG(LogMemory, Fatal, TEXT("FMallocBinned2 Attempt to free an unrecognized block %p"), Ptr);
        }
        UPTRINT PoolOsBytes = Pool->GetOsAllocatedBytes();
        SIZE_T PoolOSRequestedBytes = Pool->GetOSRequestedBytes();

#if BINNED2_ALLOCATOR_STATS
        AllocatedLargePoolMemory -= ((int64)PoolOSRequestedBytes);
        AllocatedLargePoolMemoryWAlignment -= ((int64)PoolOsBytes);
#endif

        checkf(PoolOSRequestedBytes <= PoolOsBytes, TEXT("FMallocBinned2::FreeExternal %d %d"), int32(PoolOSRequestedBytes), int32(PoolOsBytes));
        Pool->SetCanary(FPoolInfo::ECanary::Unassigned, true, false);
        // Free an OS allocation.
        CachedOSPageAllocator.Free(Ptr, PoolOsBytes);
    }
}


static void FreeBundles(FMallocBinned2& Allocator, FBundleNode* BundlesToRecycle, uint32 InBlockSize, uint32 InPoolIndex)
{
    FPoolTable& Table = Allocator.SmallPoolTables[InPoolIndex];

    // 釋放FBundleNode* BundlesToRecycle鏈表上各節點指向的內存
    FBundleNode* Bundle = BundlesToRecycle;
    while (Bundle)
    {
        FBundleNode* NextBundle = Bundle->NextBundle;

        FBundleNode* Node = Bundle;
        do
        {
            FBundleNode* NextNode = Node->NextNodeInCurrentBundle;
            FPoolInfo*   NodePool = FindPoolInfo(Allocator, Node);
            if (!NodePool)
            {
                UE_LOG(LogMemory, Fatal, TEXT("FMallocBinned2 Attempt to free an unrecognized small block %p"), Node);
            }
            NodePool->CheckCanary(FPoolInfo::ECanary::FirstFreeBlockIsPtr);

            // If this pool was exhausted, move to available list.
            if (!NodePool->FirstFreeBlock) // FPoolInfo* NodePool在FPoolList ExhaustedPools鏈表上時
            {
                Table.ActivePools.LinkToFront(NodePool); // 將FPoolInfo* NodePool從FPoolList ExhaustedPools移動到FPoolList ActivePools
            }
            else
            {
                check(NodePool->FirstFreeBlock->Canary == 0 || NodePool->FirstFreeBlock->IsCanaryOk());
            }

            // Free a pooled allocation.  在FBundleNode* Node地址處,構建一個新的FFreeBlock,NumFreeBlocks設置為1,並插入到NodePool->FirstFreeBlock鏈表的頭部
            FFreeBlock* Free = (FFreeBlock*)Node;
            Free->NumFreeBlocks = 1;
            Free->NextFreeBlock = NodePool->FirstFreeBlock;
            Free->BlockSize     = InBlockSize;
            Free->Canary = FFreeBlock::CANARY_VALUE;
            Free->PoolIndex = InPoolIndex;
            NodePool->FirstFreeBlock   = Free;

            // Free this pool.
            check(NodePool->Taken >= 1);
            if (--NodePool->Taken == 0) // FPoolInfo* NodePool中所有Block為空閑時
            {
                NodePool->SetCanary(FPoolInfo::ECanary::Unassigned, true, false);
                FFreeBlock* BasePtrOfNode = GetPoolHeaderFromPointer(Node);

                // Free the OS memory.
                NodePool->Unlink(); // 從FPoolList鏈表上斷開
                Allocator.CachedOSPageAllocator.Free(BasePtrOfNode, Allocator.PageSize); // 回收整個FPoolInfo* NodePool的內存
#if BINNED2_ALLOCATOR_STATS
                AllocatedOSSmallPoolMemory -= ((int64)Allocator.PageSize);
#endif
            }

            Node = NextNode; // 遍歷下一個FBundleNode*
        } while (Node);

        Bundle = NextBundle;
    }
}

 

Malloc分配內存

FORCEINLINE void* MallocSelect(SIZE_T Size, uint32 Alignment)
{
    void* Result;

    if (UseSmallAlloc(Size, Alignment)) // Size <= BINNED2_MAX_SMALL_POOL_SIZE & Alignment <= BINNED2_MINIMUM_ALIGNMENT
    {
        Result = MallocExternalSmall(Size, Alignment); // 使用內存池來分配內存
    }
    else
    {
        Result = MallocExternalLarge(Size, Alignment); // 由操作系統直接分配內存, 且放入HashBuckets表中
    }

    return Result;
}


void* FMallocBinned2::MallocExternalSmall(SIZE_T Size, uint32 Alignment)
{
    uint32 PoolIndex = BoundSizeToPoolIndex(Size); // 根據Size獲取在FPoolTable SmallPoolTables中的索引
    
    // 優先從TLS Cache中獲取可用的內存Block
    FPerThreadFreeBlockLists* Lists = GMallocBinned2PerThreadCaches ? FPerThreadFreeBlockLists::Get() : nullptr; if (Lists) { // 若FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head鏈表為空,從GGlobalRecycler對應Block檔位的數組中Pop出一個並賦值給FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head // 然后再判斷FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head是否為空
        if (Lists->ObtainRecycledPartial(PoolIndex)) { if (void* Result = Lists->Malloc(PoolIndex)) // 從FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head鏈表頭部Pop出一個FBundleNode*,並賦值給Result
 { #if BINNED2_ALLOCATOR_STATS uint32 BlockSize = PoolIndexToBlockSize(PoolIndex); Lists->AllocatedMemory += BlockSize; #endif
                return Result; // 有對應PoolIndex的TLS Cache的Block,則直接分配出去
 } } }

    FScopeLock Lock(&Mutex); // 獲取互斥鎖,離開作用域自動釋放互斥鎖

    // Allocate from small object pool.
    FPoolTable& Table = SmallPoolTables[PoolIndex]; // 根據PoolIndex找到對應的PoolTable

    FPoolInfo* Pool;
    if (!Table.ActivePools.IsEmpty()) // 當前內存池表中的ActivePools不為空
    {
        Pool = &Table.ActivePools.GetFrontPool();  // 獲取第一個FPoolInfo
    }
    else
    {
        Pool = &Table.ActivePools.PushNewPoolToFront(*this, Table.BlockSize, PoolIndex); // 創建一個新的FPoolInfo
    }

    void* Result = Pool->AllocateRegularBlock(); // 從FPoolInfo Pool分配一個Block  注:函數中會對Pool的空閑檢查,如果已無空閑Block,則把Pool添加到PoolTable的ExhaustedPools中
#if BINNED2_ALLOCATOR_STATS
    AllocatedSmallPoolMemory += PoolIndexToBlockSize(PoolIndex);
#endif // BINNED2_ALLOCATOR_STATS
    if (GMallocBinned2AllocExtra) // GMallocBinned2AllocExtra值為32 這部分邏輯為TLS Cache優化邏輯
 { if (Lists) { // prefill the free list with some allocations so we are less likely to hit this slow path with the mutex 
            for (int32 Index = 0; Index < GMallocBinned2AllocExtra && Pool->HasFreeRegularBlock(); Index++) { if (!Lists->Free(Result, PoolIndex, Table.BlockSize)) // Free成功,會將當前Result指針加入到FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head鏈表頭部
 { break; } Result = Pool->AllocateRegularBlock(); // 從FPoolInfo Pool分配一個Block
 } } } if (!Pool->HasFreeRegularBlock()) // 當前Pool是否有空閑Block
    {
        Table.ExhaustedPools.LinkToFront(Pool); // 移動到FPoolList ExhaustedPools鏈表中
    }

    return Result;
}

 

Android(小米10)DumpPlatformAndAllocatorStats統計信息:

[2021.05.27-15.59.49:152][ 66]LogMemory: Platform Memory Stats for Android
[2021.05.27-15.59.49:152][ 66]LogMemory: Process Physical Memory: 1207.08 MB used, 1254.11 MB peak
[2021.05.27-15.59.49:152][ 66]LogMemory: Process Virtual Memory: 8984.62 MB used, 9077.56 MB peak
[2021.05.27-15.59.49:152][ 66]LogMemory: Physical Memory: 5445.78 MB used,  2177.80 MB free, 7623.57 MB total
[2021.05.27-15.59.49:153][ 66]LogMemory: Virtual Memory: 608.03 MB used,  1439.97 MB free, 2048.00 MB total
[2021.05.27-15.59.49:153][ 66]LogMemory: PageSize: 4096, BinnedPageSize: 65536, BinnedAllocationGranularity: 4096, AddressLimit: 8589934592
[2021.05.27-15.59.49:154][ 66]FMallocBinned2 Mem report
[2021.05.27-15.59.49:154][ 66]Constants.BinnedPageSize = 65536
[2021.05.27-15.59.49:154][ 66]Constants.BinnedAllocationGranularity = 4096
[2021.05.27-15.59.49:154][ 66]Small Pool Allocations: 388.752121mb  (including block size padding)
[2021.05.27-15.59.49:155][ 66]Small Pool OS Allocated: 419.000000mb
[2021.05.27-15.59.49:155][ 66]Large Pool Requested Allocations: 204.530167mb
[2021.05.27-15.59.49:155][ 66]Large Pool OS Allocated: 205.332031mb
[2021.05.27-15.59.49:155][ 66]Requested Allocations: 204.530167mb
[2021.05.27-15.59.49:155][ 66]OS Allocated: 205.332031mb
[2021.05.27-15.59.49:155][ 66]PoolInfo: 1.687500mb
[2021.05.27-15.59.49:155][ 66]Hash: 0.003906mb
[2021.05.27-15.59.49:156][ 66]TLS: 0.066406mb
[2021.05.27-15.59.49:156][ 66]Total allocated from OS: 626.089844mb
[2021.05.27-15.59.49:156][ 66]Cached free OS pages: 3.894531mb

 

PC下DumpPlatformAndAllocatorStats統計信息:

[2021.06.04-06.12.34:488][748]LogMemory: Platform Memory Stats for Windows
[2021.06.04-06.12.34:488][748]LogMemory: Process Physical Memory: 704.69 MB used, 775.71 MB peak
[2021.06.04-06.12.34:488][748]LogMemory: Process Virtual Memory: 784.52 MB used, 888.80 MB peak
[2021.06.04-06.12.34:488][748]LogMemory: Physical Memory: 24035.47 MB used,  8565.63 MB free, 32601.11 MB total
[2021.06.04-06.12.34:488][748]LogMemory: Virtual Memory: 134206408.00 MB used,  11316.60 MB free, 134217728.00 MB total
[2021.06.04-06.12.34:489][748]FMallocBinned2 Mem report
[2021.06.04-06.12.34:489][748]Constants.BinnedPageSize = 65536
[2021.06.04-06.12.34:489][748]Constants.BinnedAllocationGranularity = 4096
[2021.06.04-06.12.34:489][748]Small Pool Allocations: 130.058121mb  (including block size padding)
[2021.06.04-06.12.34:489][748]Small Pool OS Allocated: 157.312500mb
[2021.06.04-06.12.34:489][748]Large Pool Requested Allocations: 141.529739mb
[2021.06.04-06.12.34:489][748]Large Pool OS Allocated: 141.667969mb
[2021.06.04-06.12.34:489][748]Requested Allocations: 141.529739mb
[2021.06.04-06.12.34:489][748]OS Allocated: 141.667969mb
[2021.06.04-06.12.34:489][748]PoolInfo: 0.500000mb
[2021.06.04-06.12.34:489][748]Hash: 0.007813mb
[2021.06.04-06.12.34:489][748]TLS: 0.128906mb
[2021.06.04-06.12.34:489][748]Total allocated from OS: 299.617188mb
[2021.06.04-06.12.34:490][748]Cached free OS pages: 34.992188mb

 

參考

UE4 MallocBinned2分配器

People Mountain People Sea(服務器篇)

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM