UE4 stats性能埋點


某個Stats文件所統計到大類(Group Name)如下:

某個Stats文件Group Name為Memory的所統計到細項如下:

 

統計項類型:

為int或float數字類型   // 用於Stat HUD展示,如下圖所示

Stat GPU // 顯示幀的GPU統計數據   注:android平台上沒有輸出

 

為Memory類型  // 用於Stat HUD展示,如下圖所示

Stat Memory // 顯示有關虛幻引擎中各個子系統使用多少內存的統計數據

 

為hierarchy類別,可以嵌套子節點,包含CallCount、InclusiveTime、ExclusiveTime等字段   // 用於Stat HUD展示和Profiler工具CallStack展示  如下圖所示

Stat Component   // 顯示組件列表及組件性能信息

Profiler工具CallStack展示

 

 

本文重點講述如何用自定義hierarchy類別來埋點,並在Profiler工具的CallStack樹中查看數據。

 

定義分組

DECLARE_STATS_GROUP(TEXT("AI"),STATGROUP_AI, STATCAT_Advanced);   // 3個參數分別對應Description、GroupName、GroupCategory

// 展開后為如下結構體:
struct FStatGroup_STATGROUP_AI
{
    enum { DefaultEnable = true, CompileTimeEnable = true, SortByName = false };

    static __forceinline const char* GetGroupName() { return "STATGROUP_AI"; }
    static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; }
    static __forceinline const TCHAR* GetDescription() { return L"AI"; }
    static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; }
    static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; }
    static __forceinline bool GetSortByName() { return (bool)SortByName; }
};;;

 

DECLARE_STATS_GROUP_VERBOSE(TEXT("LoadTimeVerbose"), STATGROUP_LoadTimeVerbose, STATCAT_Advanced);  // 缺省不開啟統計

// 展開后為如下結構體:
struct FStatGroup_STATGROUP_LoadTimeVerbose
{
    enum { DefaultEnable = false, CompileTimeEnable = true, SortByName = false };

    static __forceinline const char* GetGroupName() { return "STATGROUP_LoadTimeVerbose"; }
    static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; }
    static __forceinline const TCHAR* GetDescription() { return L"LoadTimeVerbose"; }
    static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; }
    static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; }
    static __forceinline bool GetSortByName() { return (bool)SortByName; }
};;;

 

DECLARE_STATS_GROUP_SORTBYNAME(TEXT("Streaming Overview"),STATGROUP_StreamingOverview, STATCAT_Advanced); // 會進行排序,消耗會大一些

// 展開后為如下結構體:
struct FStatGroup_STATGROUP_StreamingOverview
{
    enum { DefaultEnable = true, CompileTimeEnable = true, SortByName = true };

    static __forceinline const char* GetGroupName() { return "STATGROUP_StreamingOverview"; }
    static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; }
    static __forceinline const TCHAR* GetDescription() { return L"Streaming Overview"; }
    static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; }
    static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; }
    static __forceinline bool GetSortByName() { return (bool)SortByName; }
};;;

 

DECLARE_STATS_GROUP_MAYBE_COMPILED_OUT(TEXT("SlateVeryVerbose"), STATGROUP_SlateVeryVerbose, STATCAT_Advanced, WITH_VERY_VERBOSE_SLATE_STATS);  // 缺省不開啟統計  宏WITH_VERY_VERBOSE_SLATE_STATS為0:表示不編譯該統計項的邏輯

// 展開后為如下結構體:
struct FStatGroup_STATGROUP_SlateVeryVerbose
{
    enum { DefaultEnable = false, CompileTimeEnable = 0, SortByName = false };

    static __forceinline const char* GetGroupName() { return "STATGROUP_SlateVeryVerbose"; }
    static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; }
    static __forceinline const TCHAR* GetDescription() { return L"SlateVeryVerbose"; }
    static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; }
    static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; }
    static __forceinline bool GetSortByName() { return (bool)SortByName; }
};;;

 

定義埋點方式1

在cpp代碼的全局區域,定義埋點結構體類型和static全局變量

DECLARE_CYCLE_STAT(TEXT("Test1"), STAT_Test1, STATGROUP_TestGroup); // 3個參數分別對應Description、埋點結構體類型、GroupName

// 宏展開后為:
struct FStat_STAT_Test1
{
    typedef FStatGroup_STATGROUP_TestGroup TGroup;
    static __forceinline const char* GetStatName() { return "STAT_Test1"; }
    static __forceinline const TCHAR* GetDescription() { return L"Test1"; }
    static __forceinline EStatDataType::Type GetStatType() { return EStatDataType::ST_int64; }
    static __forceinline bool IsClearEveryFrame() { return true; }
    static __forceinline bool IsCycleStat() { return true; }

    static __forceinline FPlatformMemory::EMemoryCounterRegion GetMemoryRegion()
    {
        return FPlatformMemory::MCR_Invalid;
    }
};;

static struct FThreadSafeStaticStat<FStat_STAT_Test1> StatPtr_STAT_Test1;;

 

在函數中插入埋點

DECLARE_STATS_GROUP(TEXT("TestGroupDesc"), STATGROUP_TestGroup, STATCAT_Advanced); // 定義名為TestGroup的分組

DECLARE_CYCLE_STAT(TEXT("Test1"), STAT_Test1, STATGROUP_TestGroup); // 定義Test1的埋點類型與static埋點變量,並放在TestGroup分組中 DECLARE_CYCLE_STAT(TEXT("Test2"), STAT_Test2, STATGROUP_TestGroup); // 定義Test2的埋點類型與static埋點變量,並放在TestGroup分組中 DECLARE_CYCLE_STAT(TEXT("Test3"), STAT_Test3, STATGROUP_TestGroup); // 定義Test3的埋點類型與static埋點變量,並放在TestGroup分組中 DECLARE_CYCLE_STAT(TEXT("Test4"), STAT_Test4, STATGROUP_TestGroup); // 定義Test4的埋點類型與static埋點變量,並放在TestGroup分組中 DECLARE_CYCLE_STAT(TEXT("Test5"), STAT_Test5, STATGROUP_TestGroup); // 定義Test5的埋點類型與static埋點變量,並放在TestGroup分組中 DECLARE_CYCLE_STAT(TEXT("Test6"), STAT_Test6, STATGROUP_TestGroup); // 定義Test6的埋點類型與static埋點變量,並放在TestGroup分組中 // LoopCall(1)在我的電腦耗時約為16ms #define LoopCall(n) \ { \ uint64 sum = 1; \ for (int32 i = 1; i < 10000000*n; i++) \ { \ sum *= i; \ } \ } void AMyTest1Character::StatTest() { SCOPE_CYCLE_COUNTER(STAT_Test1); // 宏展開后的代碼為:FScopeCycleCounter CycleCount_STAT_Test1((StatPtr_STAT_Test1.GetStatId()));; FPlatformProcess::Sleep(0.002); // 統計到CPU Stall - Sleep
// 條件埋點: 條件成立時,才會埋點成功
int a = 100; CONDITIONAL_SCOPE_CYCLE_COUNTER(STAT_Test2, a > 50); // 宏展開后的代碼為:FScopeCycleCounter CycleCount_STAT_Test2(a > 50 ? (StatPtr_STAT_Test2.GetStatId()) : TStatId());; LoopCall(1); SCOPE_CYCLE_COUNTER(STAT_Test3); // 宏展開后的代碼為:FScopeCycleCounter CycleCount_STAT_Test3((StatPtr_STAT_Test3.GetStatId()));; FPlatformProcess::Sleep(0.005); // 統計到CPU Stall - Sleep { SCOPE_CYCLE_COUNTER(STAT_Test6); // 宏展開后的代碼為:FScopeCycleCounter CycleCount_STAT_Test6((StatPtr_STAT_Test6.GetStatId()));; } FPlatformProcess::Sleep(0.003); // 統計到CPU Stall - Sleep SCOPE_CYCLE_COUNTER(STAT_Test4); // 宏展開后的代碼為:FScopeCycleCounter CycleCount_STAT_Test4((StatPtr_STAT_Test4.GetStatId()));; { SCOPE_CYCLE_COUNTER(STAT_Test5); // 宏展開后的代碼為:FScopeCycleCounter CycleCount_STAT_Test5((StatPtr_STAT_Test5.GetStatId()));; LoopCall(2); } { SCOPE_CYCLE_COUNTER(STAT_Test5); // 宏展開后的代碼為:FScopeCycleCounter CycleCount_STAT_Test5((StatPtr_STAT_Test5.GetStatId()));; FPlatformProcess::SleepNoStats(0.5); // 會被統計到當前作用域埋點的IncTime中 } SCOPE_CYCLE_COUNTER(STAT_Test6); // 宏展開后的代碼為:FScopeCycleCounter CycleCount_STAT_Test6((StatPtr_STAT_Test6.GetStatId()));; }

 

定義埋點方式2

相比方式1,該方式不需要提前定義埋點類型,比較方便

DECLARE_SCOPE_CYCLE_COUNTER(TEXT("UnhashUnreachableObjects"), STAT_UnhashUnreachableObjects, STATGROUP_GC)展開為如下代碼:

struct FStat_STAT_UnhashUnreachableObjects
{
    typedef FStatGroup_STATGROUP_GC TGroup;
    static __forceinline const char* GetStatName() { return "STAT_UnhashUnreachableObjects"; }
    static __forceinline const TCHAR* GetDescription() { return L"UnhashUnreachableObjects"; }
    static __forceinline EStatDataType::Type GetStatType() { return EStatDataType::ST_int64; }
    static __forceinline bool IsClearEveryFrame() { return true; }
    static __forceinline bool IsCycleStat() { return true; }

    static __forceinline FPlatformMemory::EMemoryCounterRegion GetMemoryRegion()
    {
        return FPlatformMemory::MCR_Invalid;
    }
};;
static struct FThreadSafeStaticStat<FStat_STAT_UnhashUnreachableObjects> StatPtr_STAT_UnhashUnreachableObjects;
FScopeCycleCounter CycleCount_STAT_UnhashUnreachableObjects((StatPtr_STAT_UnhashUnreachableObjects.GetStatId()));;

 

在函數中,定義埋點結構體類型和static局部變量,並插入埋點

bool UnhashUnreachableObjects(bool bUseTimeLimit, float TimeLimit)
{
    DECLARE_SCOPE_CYCLE_COUNTER(TEXT("UnhashUnreachableObjects"), STAT_UnhashUnreachableObjects, STATGROUP_GC);
    
    // ... ...
}

 

定義埋點方式3

在DECLARE_SCOPE_CYCLE_COUNTER基礎上封裝,放到Quick分組下,更易於使用

QUICK_SCOPE_CYCLE_COUNTER(STAT_QuickTest1);  // 即:DECLARE_SCOPE_CYCLE_COUNTER(TEXT("STAT_QuickTest1"),STAT_QuickTest1,STATGROUP_Quick)

                                                                                              // 用宏QUICK_SCOPE_CYCLE_COUNTER定義的埋點會放到GroupName為FStatGroup_STATGROUP_Quick下

 

QUICK_SCOPE_CYCLE_COUNTER(STAT_QuickTest1)展開為:

struct FStat_STAT_QuickTest1
{
    typedef FStatGroup_STATGROUP_Quick TGroup;
    static __forceinline const char* GetStatName() { return "STAT_QuickTest1"; }
    static __forceinline const TCHAR* GetDescription() { return L"STAT_QuickTest1"; }
    static __forceinline EStatDataType::Type GetStatType() { return EStatDataType::ST_int64; }
    static __forceinline bool IsClearEveryFrame() { return true; }
    static __forceinline bool IsCycleStat() { return true; }

    static __forceinline FPlatformMemory::EMemoryCounterRegion GetMemoryRegion()
    {
        return FPlatformMemory::MCR_Invalid;
    }
};;
static struct FThreadSafeStaticStat<FStat_STAT_QuickTest1> StatPtr_STAT_QuickTest1;
FScopeCycleCounter CycleCount_STAT_QuickTest1((StatPtr_STAT_QuickTest1.GetStatId()));;

 

在函數中,定義埋點結構體類型和static局部變量,並插入埋點

void AMyTest1Character::StatTest()
{
    QUICK_SCOPE_CYCLE_COUNTER(STAT_QuickTest1);

    // ... ...
}

 

對UObject對象埋點

DECLARE_STATS_GROUP(TEXT("TestGroupDesc"), STATGROUP_TestGroup, STATCAT_Advanced); // 定義名為TestGroup的分組

DECLARE_CYCLE_STAT(TEXT("ObjTest1"), STAT_ObjTest1, STATGROUP_TestGroup); // 定義ObjTest1的埋點類型與static埋點變量,並放在TestGroup分組中
DECLARE_CYCLE_STAT(TEXT("ObjTest2"), STAT_ObjTest2, STATGROUP_TestGroup); // 定義ObjTest2的埋點類型與static埋點變量,並放在TestGroup分組中

void AMyTest1Character::StatTest()
{
    FString MyBPObjectPath = TEXT("/Game/ThirdPersonCPP/Blueprints/MyBlueprintObject.MyBlueprintObject_C");
    UClass* MyBPObjectClass = LoadClass<UObject>(nullptr, *MyBPObjectPath);
    UMyBPObject* BPObj1 = NewObject<UMyBPObject>(this, MyBPObjectClass);

    FString TexturePath1 = TEXT("/Engine/EngineMaterials/DefaultDiffuse_TC_Masks");
    UTexture2D* TextureObj1 = LoadObject<UTexture2D>(nullptr, *TexturePath1);

    {
        FScopeCycleCounterUObject ObjScope(MyBPObjectClass);

        LoopCall(1);

        FPlatformProcess::Sleep(0.002);

        {
            FScopeCycleCounterUObject ObjScope2(MyBPObjectClass);

            LoopCall(1);

            {
// 通過GET_STATID宏來獲取,會被展開為:(StatPtr_STAT_ObjTest1.GetStatId())
TStatId StatBPObj1 = GET_STATID(STAT_ObjTest1); FScopeCycleCounterUObject ObjScope3(BPObj1, StatBPObj1); FPlatformProcess::SleepNoStats(
0.5); } {
// 通過GET_STATID宏來獲取,會被展開為:(StatPtr_STAT_ObjTest2.GetStatId()) FScopeCycleCounterUObject ObjScope4(
this, GET_STATID(STAT_ObjTest2)); FPlatformProcess::Sleep(0.003); } } LoopCall(2); // 動態創建TStatId對象 TStatId StatObjTest3 = FDynamicStats::CreateStatId<FStatGroup_STATGROUP_TestGroup>(FString(TEXT("ObjTest3"))); FScopeCycleCounterUObject ObjScope3(MyBPObjectClass, StatObjTest3); } }

 

 

即使對UObject指定了埋點類型,但對UObject的統計會放到STATGROUP_UObjects分組中,如下所示:

 

Tickable對象的Tick耗時

UCLASS()
class UMyBPObject : public UObject, public FTickableGameObject
{
    GENERATED_BODY()
public:
    UMyBPObject();
    ~UMyBPObject();

    virtual TStatId GetStatId() const override
    {
        RETURN_QUICK_DECLARE_CYCLE_STAT(MyBPObject, STATGROUP_Tickables); // 如果不希望被統計,直接返回return TStatId();即可
    }
    virtual bool IsTickable() const override { return !this->IsDefaultSubobject(); }
    virtual void Tick(float DeltaTime) override 
    {
        if (GFrameCounter % 300 == 0) { FPlatformProcess::SleepNoStats(0.03); }
    }
};


void AMyTest1Character::StatTest()
{
    FString MyBPObjectPath = TEXT("/Game/ThirdPersonCPP/Blueprints/MyBlueprintObject.MyBlueprintObject_C");
    UClass* MyBPObjectClass = LoadClass<UObject>(nullptr, *MyBPObjectPath); // MyBPObjectClass為UBlueprintGeneratedClass*類型

    // 創建UMyBPObject對象,並賦值給成員變量UMyBPObject* m_BPObj1
    m_BPObj1 = NewObject<UMyBPObject>(this, MyBPObjectClass);
}

 

Tickable對象一般會放在STATGROUP_Tickables分組中。以下為某個Stats文件收集到的Tickable對象名稱:

 

 

Task執行任務的耗時

DECLARE_STATS_GROUP(TEXT("TestGroupDesc"), STATGROUP_TestGroup, STATCAT_Advanced); // 定義名為TestGroup的分組

class FMyTestTask
{
public:
    FMyTestTask()
    {
    }
    static const TCHAR* GetTaskName()
    {
        return TEXT("FMyTestTask");
    }
    FORCEINLINE static TStatId GetStatId()
    {
        RETURN_QUICK_DECLARE_CYCLE_STAT(FMyTestTask, STATGROUP_TestGroup);
    }
    /** return the thread for this task **/
    static ENamedThreads::Type GetDesiredThread()
    {
        return ENamedThreads::AnyThread;
    }

    static ESubsequentsMode::Type GetSubsequentsMode()
    {
        return ESubsequentsMode::TrackSubsequents;
    }

    void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent) { LoopCall(5); }
};

void AMyTest1Character::StatTest()
{
    FGraphEventRef MyTestTaskEvent = TGraphTask<FMyTestTask>::CreateTask().ConstructAndDispatchWhenReady();
// 當前線程掛起,等待Task任務執行完成 FTaskGraphInterface::Get().WaitUntilTaskCompletes(MyTestTaskEvent); // 統計到CPU Stall - Wait For Event }

 

 

TaskGraph的任務一般會放在STATGROUP_TaskGraphTasks和STATGROUP_ThreadPoolAsyncTasks分組中。以下為某個Stats文件收集到的Task名稱:

 

 

其他統計說明

CPU停轉的統計:

 

Stat系統自己開銷的統計:

 

總結

① 通過定義線程安全的埋點變量,在其構造函數(從變量定義的地方開始記錄)與析構函數(結束記錄)中來計算埋點變量在生命周期范圍的耗時

② 在記錄數據時,會帶上Thread Id。因此,在Profiler工具中展示數據時,會按照線程做大類進行分類

③ 在Profiler工具的CallStack樹,是埋點變量之間的嵌套關系,與代碼的函數調用CallStack沒有關系

 

參考

https://docs.unrealengine.com/4.26/zh-CN/TestingAndOptimization/PerformanceAndProfiling/StatCommands/StatsSystemOverview/

https://docs.unrealengine.com/4.26/en-US/TestingAndOptimization/PerformanceAndProfiling/StatCommands/StatsSystemOverview/

UE高級性能剖析技術(2) -CPU幀率瓶頸和卡頓

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM