UE4之Game、Render、RHI多線程架構


游戲線程(GameThread)

GameThread是引擎運行的心臟,承載游戲邏輯、運行流程的工作,也是其它線程的數據發起者。在FEngineLoop::Tick函數執行每幀邏輯的更新。

在引擎啟動時會把GameThread的線程id存儲到全局變量GGameThreadId中,且稍后會設置到TaskGraph系統中。

int32 FEngineLoop::PreInitPreStartupScreen(const TCHAR* CmdLine)
{
    // ... ...
    
    // 創建線程自己的TLS數據FPerThreadFreeBlockLists  注:Binned2、Binned3內存分配器需要
    FMemory::SetupTLSCachesOnCurrentThread();
    
    // remember thread id of the main thread
    GGameThreadId = FPlatformTLS::GetCurrentThreadId();// 游戲線程id
    GIsGameThreadIdInitialized = true; // 游戲線程id是否被初始化

    FPlatformProcess::SetThreadAffinityMask(FPlatformAffinity::GetMainGameMask()); // 設置當前線程的cpu核的相關性  注:防止在多個cpu核心上跳來跳去,引發性能問題
    FPlatformProcess::SetupGameThread(); // 設置游戲線程數據(但很多平台都是空的實現體)
    
    // ... ...
    
    FTaskGraphInterface::Startup(FPlatformMisc::NumberOfCores()); // TaskGraph初始化,並根據當前機器cpu的核數來創建工作線程
    FTaskGraphInterface::Get().AttachToThread(ENamedThreads::GameThread); // 附加到TaskGraph的GameThread命名插槽中. 這樣游戲線程便和TaskGraph聯動了起來.
    

    if (GUseThreadedRendering)  // 如果使用渲染線程
    {
        if (GRHISupportsRHIThread) // 當前平台如果支持RHI線程
        {
            const bool DefaultUseRHIThread = true;
            GUseRHIThread_InternalUseOnly = DefaultUseRHIThread;
            if (FParse::Param(FCommandLine::Get(), TEXT("rhithread")))
            {
                GUseRHIThread_InternalUseOnly = true; // 創建獨立的RHIThread,並加入到TaskGraph中,RHI會跑在TaskGraph的RHIThread上
            }
            else if (FParse::Param(FCommandLine::Get(), TEXT("norhithread")))
            {
                GUseRHIThread_InternalUseOnly = false; // RHI跑在TaskGraph的AnyThread上
            }
        }
            
        SCOPED_BOOT_TIMING("StartRenderingThread");
        StartRenderingThread();  // 創建並啟動渲染線程
    }
    
    // ... ...
}

 

游戲線程和TaskGraph系統的ENamedThreads::GameThread其實是一回事,都是同一個線程!

 

經過上面的初始化和設置后,其它地方就可以通過TaskGraph系統並行地處理任務了,也可以訪問全局變量,以便判斷游戲線程是否初始化完,當前線程是否游戲線程:

bool IsInGameThread()
{
    return GIsGameThreadIdInitialized && FPlatformTLS::GetCurrentThreadId() == GGameThreadId;
}

 

渲染線程(RenderThread)

RenderThread在TaskGraph系統中有一個任務隊列,其他線程(主要是GameThread)通過宏ENQUEUE_RENDER_COMMAND(Type)向該隊列中填充任務     注:Type字符串要保持唯一,否則ENQUEUE_RENDER_COMMAND(Type)會失效

RenderThread則不斷從這個隊列中取出任務來執行,從而生成與平台無關的Command List(渲染指令列表)。注:整個過程是異步的

RenderThread是其他線程(主要是GameThread)的奴隸,只是簡單地作為工作線程不斷執行它們賦予的工作。 

 

RenderingThread.h聲明了全部對外的接口,部分如下:

// Engine\Source\Runtime\RenderCore\Public\RenderingThread.h

// 是否啟用了獨立的渲染線程, 如果為false, 則所有渲染命令會被立即執行, 而不是放入渲染命令隊列.
extern RENDERCORE_API bool GIsThreadedRendering;

// 渲染線程是否應該被創建. 通常被命令行參數或ToggleRenderingThread控制台參數設置.
extern RENDERCORE_API bool GUseThreadedRendering;

// 是否開啟RHI線程
extern RENDERCORE_API void SetRHIThreadEnabled(bool bEnableDedicatedThread, bool bEnableRHIOnTaskThreads);

(......)

// 開啟渲染線程.
extern RENDERCORE_API void StartRenderingThread();

// 停止渲染線程.
extern RENDERCORE_API void StopRenderingThread();

// 檢查渲染線程是否健康(是否Crash), 如果crash, 則會用UE_Log輸出日志.
extern RENDERCORE_API void CheckRenderingThreadHealth();

// 檢查渲染線程是否健康(是否Crash)
extern RENDERCORE_API bool IsRenderingThreadHealthy();

// 增加一個必須在下一個場景繪制前或flush渲染命令前完成的任務.
extern RENDERCORE_API void AddFrameRenderPrerequisite(const FGraphEventRef& TaskToAdd);

// 手機幀渲染前序任務, 保證所有渲染命令被入隊.
extern RENDERCORE_API void AdvanceFrameRenderPrerequisite();

// 等待所有渲染線程的渲染命令被執行完畢. 會卡住游戲線程, 只能被游戲線程調用.
extern RENDERCORE_API void FlushRenderingCommands(bool bFlushDeferredDeletes = false);

extern RENDERCORE_API void FlushPendingDeleteRHIResources_GameThread();
extern RENDERCORE_API void FlushPendingDeleteRHIResources_RenderThread();

extern RENDERCORE_API void TickRenderingTickables();

extern RENDERCORE_API void StartRenderCommandFenceBundler();
extern RENDERCORE_API void StopRenderCommandFenceBundler();

(......)

 

RenderingThread.h還有一個非常重要的宏ENQUEUE_RENDER_COMMAND,它的作用是向渲染線程入隊渲染指令。下面是它的聲明和實現:

// 向渲染線程入隊渲染指令, Type指明了渲染操作的名字.
#define ENQUEUE_RENDER_COMMAND(Type) \
    struct Type##Name \
    {  \
        static const char* CStr() { return #Type; } \
        static const TCHAR* TStr() { return TEXT(#Type); } \
    }; \
    EnqueueUniqueRenderCommand<Type##Name>

 

上面最后一句使用了EnqueueUniqueRenderCommand命令,繼續追蹤之:

/* UnrealEngine\Engine\Source\Runtime\RenderCore\Public\RenderingThread.h */

/** The parent class of commands stored in the rendering command queue. */
class RENDERCORE_API FRenderCommand
{
public:
    // All render commands run on the render thread
    static ENamedThreads::Type GetDesiredThread() // 所有渲染指令都必須在渲染線程執行.
    {
        check(!GIsThreadedRendering || ENamedThreads::GetRenderThread() != ENamedThreads::GameThread);
        return ENamedThreads::GetRenderThread(); // 開啟渲染多線程時,返回渲染線程。不開啟渲染多線程時,返回GameThread
    }

    static ESubsequentsMode::Type GetSubsequentsMode()
    {
        // Don't support tasks having dependencies on us, reduces task graph overhead tracking and dealing with subsequents
        return ESubsequentsMode::FireAndForget;
    }
};

template<typename TSTR, typename LAMBDA>
class TEnqueueUniqueRenderCommandType : public FRenderCommand
{
public:
    TEnqueueUniqueRenderCommandType(LAMBDA&& InLambda) : Lambda(Forward<LAMBDA>(InLambda)) {}

    void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
    {
        TRACE_CPUPROFILER_EVENT_SCOPE_ON_CHANNEL_STR(TSTR::TStr(), RenderCommandsChannel);
        FRHICommandListImmediate& RHICmdList = GetImmediateCommandList_ForRenderCommand();
        Lambda(RHICmdList);
    }

    FORCEINLINE_DEBUGGABLE TStatId GetStatId() const
    {
#if STATS
        static struct FThreadSafeStaticStat<FStat_EnqueueUniqueRenderCommandType> StatPtr_EnqueueUniqueRenderCommandType;
        return StatPtr_EnqueueUniqueRenderCommandType.GetStatId();
#else
        return TStatId();
#endif
    }

private:
    LAMBDA Lambda; // 緩存渲染回調函數.
};

/*************************************************************************************************************/

template<typename TSTR, typename LAMBDA> // 傳入的TSTR為結構體類型,里面包含CStr和TStr的靜態方法,為渲染命令名字。 LAMBDA是回調函數
FORCEINLINE_DEBUGGABLE void EnqueueUniqueRenderCommand(LAMBDA&& Lambda)
{
    QUICK_SCOPE_CYCLE_COUNTER(STAT_EnqueueUniqueRenderCommand);
    typedef TEnqueueUniqueRenderCommandType<TSTR, LAMBDA> EURCType;  // EURCType類型即為TEnqueueUniqueRenderCommandType<TSTR, LAMBDA>類型

#if 0 // UE_SERVER && UE_BUILD_DEBUG
    UE_LOG(LogRHI, Warning, TEXT("Render command '%s' is being executed on a dedicated server."), TSTR::TStr())
#endif

    if (IsInRenderingThread()) // 如果在渲染線程內直接執行回調而不入隊渲染命令.
    {
        FRHICommandListImmediate& RHICmdList = GetImmediateCommandList_ForRenderCommand();
        Lambda(RHICmdList); // 在渲染線程中,直接執行傳入的lamda匿名函數
    }
    else
    {
        if (ShouldExecuteOnRenderThread()) // if ((GIsThreadedRendering || !IsInGameThread())) // 使用渲染線程 or 當前不為GameThread  // 需要在獨立的渲染線程執行
        {
            CheckNotBlockedOnRenderThread();
            TGraphTask<EURCType>::CreateTask().ConstructAndDispatchWhenReady(Forward<LAMBDA>(Lambda)); // 向渲染線程的TaskGraph隊列里面投遞類型名為EURCType類型的任務,並將lamda匿名函數作為參數傳入該任務的構造函數
        }
        else
        {
            // 不在獨立的渲染線程執行,則構建EURCType類型的對象,然后直接執行
            EURCType TempCommand(Forward<LAMBDA>(Lambda));
            FScopeCycleCounter EURCMacro_Scope(TempCommand.GetStatId());
            TempCommand.DoTask(ENamedThreads::GameThread, FGraphEventRef());
        }
    }
}

 

EnqueueUniqueRenderCommand函數只負責向渲染線程的TaskGraph隊列里面投遞任務,它耗時統計到QuickEnqueueUniqueRenderCommand分類中

注:執行EnqueueUniqueRenderCommand函數的線程可能為:GameThread、RenderThread、RTHeartBeat、TaskGraph的AnyThread、PoolThread的FQueuedThread

 

ENQUEUE_RENDER_COMMAND(xxx),在RenderThread上執行的xxx的耗時統計在RenderThreadCommands分類中

 

為了更好理解入隊渲染命令操作,舉2個具體的例子:

例1:在GameThread執行LoadMap切地圖,在卸載掉Old World之后,會在TrimMemory()函數中使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph隊列中投遞一個FlushCommand任務

ENQUEUE_RENDER_COMMAND(FlushCommand)(
/* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展開后 ------------------------------------------------
struct FlushCommandName
{
    static const char* CStr() { return "FlushCommand"; }
    static const TCHAR* TStr() { return L"FlushCommand"; }
};
EnqueueUniqueRenderCommand<FlushCommandName>( */
    [](FRHICommandList& RHICmdList)
    {
        GRHICommandList.GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::FlushRHIThreadFlushResources);
        RHIFlushResources();
        GRHICommandList.GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::FlushRHIThreadFlushResources);
    });

例2:在GameThread中執行控制台變量命令,會使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph隊列中投遞一個OnCVarChange1任務,以便將新的數值傳遞到RenderThread的邏輯中使用

virtual void OnCVarChange(int32& Dest, int32 NewValue)
{
    int32* DestPtr = &Dest;
    ENQUEUE_RENDER_COMMAND(OnCVarChange1)(
    /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展開后 ------------------------------------------------
    struct OnCVarChange1Name
    {
        static const char* CStr() { return "OnCVarChange1"; }
        static const TCHAR* TStr() { return L"OnCVarChange1"; }
    };
    EnqueueUniqueRenderCommand<OnCVarChange1Name>( */
        [DestPtr, NewValue](FRHICommandListImmediate& RHICmdList)
        {
            *DestPtr = NewValue;
        });
}

 

FRenderingThread承載了渲染線程的主要工作,它的部分接口和實現代碼如下:

// Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp

class FRenderingThread : public FRunnable
{
private:
    bool bAcquiredThreadOwnership;    // 當沒有獨立的RHI線程時, 渲染線程將被其它線程捕獲.

public:
    FEvent* TaskGraphBoundSyncEvent; // TaskGraph同步事件, 以便在主線程使用渲染線程之前就將渲染線程綁定到TaskGraph體系中.
 FRenderingThread()
    {
        bAcquiredThreadOwnership = false;
        // 獲取同步事件.
        TaskGraphBoundSyncEvent    = FPlatformProcess::GetSynchEventFromPool(true);
        RHIFlushResources();
    }

    // FRunnable interface.
    virtual bool Init(void) override
    {
        // 獲取當前線程ID到全局變量GRenderThreadId, 以便其它地方引用.
        GRenderThreadId = FPlatformTLS::GetCurrentThreadId();
        
        // 處理線程捕獲關系.
        if (!IsRunningRHIInSeparateThread())
        {
            bAcquiredThreadOwnership = true;
            RHIAcquireThreadOwnership();
        }

        return true; 
    }
    
    (......)
    
    virtual uint32 Run(void) override
    {
        // 設置TLS.
        FMemory::SetupTLSCachesOnCurrentThread();
        // 設置渲染線程平台相關的數據.
        FPlatformProcess::SetupRenderThread();

        (......)
        
        {
            // 進入渲染線程主循環.
            RenderingThreadMain( TaskGraphBoundSyncEvent );
        }
        
        FMemory::ClearAndDisableTLSCachesOnCurrentThread();
        return 0;
    }
};

可見它在運行之后會進入渲染線程邏輯,這里再進入RenderingThreadMain代碼一探究竟:

void RenderingThreadMain( FEvent* TaskGraphBoundSyncEvent )
{
    LLM_SCOPE(ELLMTag::RenderingThreadMemory);
    
    // 將渲染線程和局部線程線程插槽設置成ActualRenderingThread和ActualRenderingThread_Local.
    ENamedThreads::Type RenderThread = ENamedThreads::Type(ENamedThreads::ActualRenderingThread);

    ENamedThreads::SetRenderThread(RenderThread);
    ENamedThreads::SetRenderThread_Local(ENamedThreads::Type(ENamedThreads::ActualRenderingThread_Local));
    
    // 將當前線程附加到TaskGraph的RenderThread插槽中.
    FTaskGraphInterface::Get().AttachToThread(RenderThread);
    FPlatformMisc::MemoryBarrier();

    // 觸發同步事件, 通知主線程渲染線程已經附加到TaskGraph, 已經准備好接收任務.
    if( TaskGraphBoundSyncEvent != NULL )
    {
        TaskGraphBoundSyncEvent->Trigger();
    }

    (......)
    
    // 渲染線程不同階段的處理.
    FCoreDelegates::PostRenderingThreadCreated.Broadcast();
    check(GIsThreadedRendering);
    FTaskGraphInterface::Get().ProcessThreadUntilRequestReturn(RenderThread);
    FPlatformMisc::MemoryBarrier();
    check(!GIsThreadedRendering);
    FCoreDelegates::PreRenderingThreadDestroyed.Broadcast();
    
    (......)
    
    // 恢復線程線程到游戲線程.
    ENamedThreads::SetRenderThread(ENamedThreads::GameThread);
    ENamedThreads::SetRenderThread_Local(ENamedThreads::GameThread_Local);
    FPlatformMisc::MemoryBarrier();
}

 

不過這里還留有一個很大的疑問,那就是FRenderingThread只是獲取當前線程作為渲染線程並附加到TaskGraph中,並沒有創建線程。

那么是哪里創建的渲染線程呢?繼續追蹤,結果發現是在StartRenderingThread()接口中創建了FRenderingThread實例,它的實現代碼如下(節選):

// Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp

void StartRenderingThread()
{
    (......)

    // Turn on the threaded rendering flag.
    GIsThreadedRendering = true;

    // 創建FRenderingThread實例.
    GRenderingThreadRunnable = new FRenderingThread();

    // 創建渲染線程!!
    GRenderingThread = FRunnableThread::Create(GRenderingThreadRunnable, *BuildRenderingThreadName(ThreadCount), 0, FPlatformAffinity::GetRenderingThreadPriority(), FPlatformAffinity::GetRenderingThreadMask(), FPlatformAffinity::GetRenderingThreadFlags());
    
    (......)

    // 開啟渲染命令的柵欄.
    FRenderCommandFence Fence;
    Fence.BeginFence();
    Fence.Wait();

    (......)
}

 

如果繼續追蹤,會發現StartRenderingThread()是在FEngineLoop::PreInitPostStartupScreen中調用的。

至此,渲染線程的創建、初始化以及主要接口的實現都剖析完了。

 

RHI線程(RHIThread)

RenderThread作為前端(frontend)產生的Command List是平台無關的,是抽象的圖形API調用;

而RHIThread作為后端(backend)會執行和轉換渲染線程的Command List成為指定圖形API的調用(稱為Graphical Command),並提交到GPU執行。

RHI線程的工作是轉換渲染指令到指定圖形API,創建、上傳渲染資源到GPU。實現代碼如下:

// Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp

class FRHIThread : public FRunnable
{
public:
    FRunnableThread* Thread;    // 所在的RHI線程.
 FRHIThread()
        : Thread(nullptr)
    {
        check(IsInGameThread());
    }
    
    void Start()
    {
        // 開始時創建RHI線程.
        Thread = FRunnableThread::Create(this, TEXT("RHIThread"), 512 * 1024, FPlatformAffinity::GetRHIThreadPriority(),
            FPlatformAffinity::GetRHIThreadMask(), FPlatformAffinity::GetRHIThreadFlags()
            );
        check(Thread);
    }

    virtual uint32 Run() override
    {
        LLM_SCOPE(ELLMTag::RHIMisc);
        
        // 初始化TLS
        FMemory::SetupTLSCachesOnCurrentThread();
        // 將FRHIThread所在的RHI線程附加到askGraph體系中,並指定到ENamedThreads::RHIThread。
        FTaskGraphInterface::Get().AttachToThread(ENamedThreads::RHIThread);
        // 啟動RHI線程,直到線程返回。
        FTaskGraphInterface::Get().ProcessThreadUntilRequestReturn(ENamedThreads::RHIThread);
        // 清理TLS.
        FMemory::ClearAndDisableTLSCachesOnCurrentThread();
        return 0;
    }
    
    // 單例接口。
    static FRHIThread& Get()
    {
        static FRHIThread Singleton; // 使用了局部靜態變量,可以保證線程安全。
        return Singleton;
    }
};

可見RHI線程不同於渲染線程,是直接在FRHIThread對象內創建實際的線程。而FRHIThread的創建也是在StartRenderingThread()中:

void StartRenderingThread()
{
    (......)

    if (GUseRHIThread_InternalUseOnly)
    {
        FRHICommandListExecutor::GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);        
        if (!FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::RHIThread))
        {
            // 創建FRHIThread實例並啟動它.
            FRHIThread::Get().Start();
        }
        DECLARE_CYCLE_STAT(TEXT("Wait For RHIThread"), STAT_WaitForRHIThread, STATGROUP_TaskGraphTasks);
        
        // 創建RHI線程擁有者捕獲任務, 讓游戲線程等待.
        FGraphEventRef CompletionEvent = TGraphTask<FOwnershipOfRHIThreadTask>::CreateTask(NULL, ENamedThreads::GameThread).ConstructAndDispatchWhenReady(true, GET_STATID(STAT_WaitForRHIThread));
        QUICK_SCOPE_CYCLE_COUNTER(STAT_StartRenderingThread);
        // 讓游戲線程或局部線程等待RHI線程處理(捕獲了線程擁有者, 大多數圖形API為空)完畢.
        FTaskGraphInterface::Get().WaitUntilTaskCompletes(CompletionEvent, ENamedThreads::GameThread_Local);
        // 存儲RHI線程id.
        GRHIThread_InternalUseOnly = FRHIThread::Get().Thread;
        check(GRHIThread_InternalUseOnly);
        GIsRunningRHIInDedicatedThread_InternalUseOnly = true;
        GIsRunningRHIInSeparateThread_InternalUseOnly = true;
        GRHIThreadId = GRHIThread_InternalUseOnly->GetThreadID();
        
        GRHICommandList.LatchBypass();
    }
    
    (......)
}

以Fortnite(堡壘之夜)移動端為例,在開啟RHI線程之前,渲染線程急劇地上下波動,而加了RHI線程之后,波動平緩許多,和游戲線程基本保持一致,幀率也提升不少:

 

GameThread、RenderThread、RHIThread之間的同步機制

 

這3個線程處理的數據通常是不同幀的,譬如GameThread處理N幀數據,RenderThread和RHIThread處理N-1幀數據。

但也存在例外,比如RenderThread和RHIThread運行很快,幾乎不存在延遲,這種情況下,GameThread處理N幀,而RenderThread可能處理N或N-1幀,RHIThread也可能在轉換N或N-1幀。

但是,RenderThread不能落后游戲線程一幀,否則GameThread會卡住,直到RenderThread處理所有指令。

 

 

 

游戲線程和渲染線程的同步

游戲線程不可能領先於渲染線程超過一幀(最多快一幀),否則游戲線程會等待渲染線程處理完。它們的同步機制涉及兩個關鍵的概念:

// Engine\Source\Runtime\RenderCore\Public\RenderCommandFence.h

// 渲染命令柵欄
class RENDERCORE_API FRenderCommandFence
{
public:
    // 向渲染命令隊列增加一個柵欄. bSyncToRHIAndGPU是否同步RHI和GPU交換Buffer, 否則只等待渲染線程.
    void BeginFence(bool bSyncToRHIAndGPU = false); 

    // 等待柵欄被執行. bProcessGameThreadTasks沒有作用.
    void Wait(bool bProcessGameThreadTasks = false) const;

    // 是否完成了柵欄.
    bool IsFenceComplete() const;

private:
    mutable FGraphEventRef CompletionEvent; // 處理完成同步的事件
    ENamedThreads::Type TriggerThreadIndex; // 處理完之后需要觸發的線程類型.
};

// Engine\Source\Runtime\Engine\Public\UnrealEngine.h
class FFrameEndSync
{
    FRenderCommandFence Fence[2]; // 渲染柵欄對.
    int32 EventIndex; // 當前事件索引
public:
    // 同步游戲線程和渲染線程. bAllowOneFrameThreadLag是否允許渲染線程一幀的延遲.
    void Sync( bool bAllowOneFrameThreadLag )
    {
        Fence[EventIndex].BeginFence(true); // 開啟柵欄, 強制同步RHI和GPU交換鏈的.

        bool bEmptyGameThreadTasks = !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread);
        
        // 保證游戲線程至少跑過一次任務.
        if (bEmptyGameThreadTasks)
        {
            FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread);
        }

        // 如果允許延遲, 交換事件索引.
        if( bAllowOneFrameThreadLag )
        {
            EventIndex = (EventIndex + 1) % 2;
        }

        (......)
        
        // 開啟柵欄等待.
        Fence[EventIndex].Wait(bEmptyGameThreadTasks);
    }
};

 

在FRenderCommandFence的BeginFence函數中

當GameThread與RHI線程及GPU同步時,GameThread會使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph隊列中投遞一個FSyncFrameCommand任務,以便將Command List同步投遞到RHI線程

當GameThread與RenderThread同步時,GameThread會創建一個FNullGraphTask空任務,放到RenderThread的TaskGraph隊列中讓其執行

在FRenderCommandFence的Wait函數中,會檢查投遞給RenderThread的CompletionEvent是否被執行,如果沒有執行則調用GameThreadWaitForTask函數來阻塞等待(通過Event實現)

void FRenderCommandFence::BeginFence(bool bSyncToRHIAndGPU)
{
    if (!GIsThreadedRendering)
    {
        return;
    }
    else
    {
        // Render thread is a default trigger for the CompletionEvent
        TriggerThreadIndex = ENamedThreads::ActualRenderingThread;
                
        if (BundledCompletionEvent.GetReference() && IsInGameThread())
        {
            CompletionEvent = BundledCompletionEvent;
            return;
        }

        int32 GTSyncType = CVarGTSyncType.GetValueOnAnyThread();
        if (bSyncToRHIAndGPU)
        {
            // Don't sync to the RHI and GPU if GtSyncType is disabled, or we're not vsyncing
            //@TODO: do this logic in the caller?
            static auto CVarVsync = IConsoleManager::Get().FindConsoleVariable(TEXT("r.VSync")); // 是否開了VSync
            check(CVarVsync != nullptr);

            if ( GTSyncType == 0 || CVarVsync->GetInt() == 0 ) // r.GTSyncType為0或r.VSync為0時,GameThread不與RHI線程及GPU同步
        {
                bSyncToRHIAndGPU = false;
            }
        }


        if (bSyncToRHIAndGPU) // GameThread與RHI線程及GPU同步時
        {
            if (IsRHIThreadRunning())
            {
                // Change trigger thread to RHI
                TriggerThreadIndex = ENamedThreads::RHIThread;
            }
            
            // Create a task graph event which we can pass to the render or RHI threads.
            CompletionEvent = FGraphEvent::CreateGraphEvent();

            FGraphEventRef InCompletionEvent = CompletionEvent;
            /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展開后 ------------------------------------------------
            struct FSyncFrameCommandName
            {
            static const char* CStr() { return "FSyncFrameCommand"; }
            static const TCHAR* TStr() { return L"FSyncFrameCommand"; }
            };
            EnqueueUniqueRenderCommand<FSyncFrameCommandName>( */ ENQUEUE_RENDER_COMMAND(FSyncFrameCommand)(
                [InCompletionEvent, GTSyncType](FRHICommandListImmediate& RHICmdList)
                {
                    if (IsRHIThreadRunning()) // 如果開啟了RHI線程
                    {
                        ALLOC_COMMAND_CL(RHICmdList, FRHISyncFrameCommand)(InCompletionEvent, GTSyncType); // 將創建的CompletionEvent投遞到RHI線程的TaskGraph的任務隊列中
                        RHICmdList.ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
                    }
                    else  // 渲染線程直接執行
                    {
                        FRHISyncFrameCommand Command(InCompletionEvent, GTSyncType);
                        Command.Execute(RHICmdList);
                    }
                });
        }
        else  // GameThead與RenderThread同步
        {
            // Sync Game Thread with Render Thread only
            DECLARE_CYCLE_STAT(TEXT("FNullGraphTask.FenceRenderCommand"),
            STAT_FNullGraphTask_FenceRenderCommand,
                STATGROUP_TaskGraphTasks);

            CompletionEvent = TGraphTask<FNullGraphTask>::CreateTask(NULL, ENamedThreads::GameThread).ConstructAndDispatchWhenReady(
                GET_STATID(STAT_FNullGraphTask_FenceRenderCommand), ENamedThreads::GetRenderThread());
        }
    }
}


/**
 * Waits for pending fence commands to retire.
 */
void FRenderCommandFence::Wait(bool bProcessGameThreadTasks) const
{
    if (!IsFenceComplete())
    {
        StopRenderCommandFenceBundler();

        GameThreadWaitForTask(CompletionEvent, TriggerThreadIndex, bProcessGameThreadTasks);
    }
}

bool FRenderCommandFence::IsFenceComplete() const
{
    if (!GIsThreadedRendering)
    {
        return true;
    }
    check(IsInGameThread() || IsInAsyncLoadingThread());
    CheckRenderingThreadHealth();
    if (!CompletionEvent.GetReference() || CompletionEvent->IsComplete())
    {
        CompletionEvent = NULL; // this frees the handle for other uses, the NULL state is considered completed
        return true;
    }
    return false;
}

 

FFrameEndSync的使用是在FEngineLoop::Tick中:

// Engine\Source\Runtime\Launch\Private\LaunchEngineLoop.cpp

void FEngineLoop::Tick()
{
    (......)
    
    // 在引擎循環的幀末尾添加游戲線程和渲染線程的同步事件.
    {
        static FFrameEndSync FrameEndSync; // 局部靜態變量, 線程安全.
        static auto CVarAllowOneFrameThreadLag = IConsoleManager::Get().FindTConsoleVariableDataInt(TEXT("r.OneFrameThreadLag"));
        // 同步游戲和渲染線程, 是否允許一幀的延遲可由控制台命令控制. 默認是開啟的.
        FrameEndSync.Sync( CVarAllowOneFrameThreadLag->GetValueOnGameThread() != 0 );
    }
    
    (......)
}

 

FlushRenderingCommands

在游戲線程中調用,會阻塞游戲線程,強行等待所有的渲染線程pending render command以及RHI線程中的指令執行完,相當於一次完整地對渲染線程的同步

/
 * Waits for the rendering thread to finish executing all pending rendering commands.  Should only be used from the game thread.
 */
void FlushRenderingCommands(bool bFlushDeferredDeletes)
{
    if (!GIsRHIInitialized)
    {
        return;
    }
    FSuspendRenderingTickables SuspendRenderingTickables;

    // Need to flush GT because render commands from threads other than GT are sent to
    // the main queue of GT when RT is disabled
    if (!GIsThreadedRendering
        && !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread)
        && !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread_Local))
    {
        FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread);
        FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread_Local);
    }

    ENQUEUE_RENDER_COMMAND(FlushPendingDeleteRHIResourcesCmd)(
        [bFlushDeferredDeletes](FRHICommandListImmediate& RHICmdList)
    {
        RHICmdList.ImmediateFlush(
            bFlushDeferredDeletes ?
            EImmediateFlushType::FlushRHIThreadFlushResourcesFlushDeferredDeletes :
            EImmediateFlushType::FlushRHIThreadFlushResources);
    });

    // Find the objects which may be cleaned up once the rendering thread command queue has been flushed.
    FPendingCleanupObjects* PendingCleanupObjects = GetPendingCleanupObjects();

    // Issue a fence command to the rendering thread and wait for it to complete.
    FRenderCommandFence Fence;
    Fence.BeginFence(); // 創建一個FNullGraphTask空任務,放到RenderThread的TaskGraph隊列中讓其執行
    Fence.Wait(); // 檢查投遞給RenderThread的CompletionEvent是否被執行,如果沒有執行就會阻塞等待 // Delete the objects which were enqueued for deferred cleanup before the command queue flush.
    delete PendingCleanupObjects;
}

 

渲染線程和RHI線程的同步

RenderThread每次在調用RenderViewFamily_RenderThread的起始處,會阻塞等待所有RHI指令處理完成,然后才開始當前幀的渲染邏輯。

FMobileSceneRender渲染管線下,RenderThread每一幀都會執行ImmediateFlush,阻塞等待RHI處理完FGraphEventRef RHIThreadTask任務,代碼如下:

 

FDeferredShadingSceneRender渲染管線下,RenderThread每一幀都會執行ImmediateFlush,阻塞等待RHI處理完FGraphEventRef RHIThreadTask任務,代碼如下:

 

RHI線程與GPU的同步

 

UE4Editor-D3D11RHI-Win64-Debug.dll!FD3D11Viewport::PresentChecked(int SyncInterval=0) Line 374    C++
UE4Editor-D3D11RHI-Win64-Debug.dll!FD3D11Viewport::Present(bool bLockToVsync=false) Line 622    C++
UE4Editor-D3D11RHI-Win64-Debug.dll!FD3D11DynamicRHI::RHIEndDrawingViewport(FRHIViewport * ViewportRHI=0x00000273786c9180, bool bPresent=true, bool bLockToVsync=false) Line 770    C++
UE4Editor-RHI-Win64-Debug.dll!FRHICommandEndDrawingViewport::Execute(FRHICommandListBase & CmdList={...}) Line 704    C++
UE4Editor-RHI-Win64-Debug.dll!FRHICommand<FRHICommandEndDrawingViewport,FRHICommandEndDrawingViewportString1847>::ExecuteAndDestruct(FRHICommandListBase & CmdList={...}, FRHICommandListDebugContext & Context={...}) Line 763    C++
UE4Editor-RHI-Win64-Debug.dll!FRHICommandListExecutor::ExecuteInner_DoExecute(FRHICommandListBase & CmdList={...}) Line 374    C++
UE4Editor-RHI-Win64-Debug.dll!FExecuteRHIThreadTask::DoTask(ENamedThreads::Type CurrentThread=RHIThread, const TRefCountPtr<FGraphEvent> & MyCompletionGraphEvent={...}) Line 429    C++
UE4Editor-RHI-Win64-Debug.dll!TGraphTask<FExecuteRHIThreadTask>::ExecuteTask(TArray<FBaseGraphTask *,TSizedDefaultAllocator<32>> & NewTasks={...}, ENamedThreads::Type CurrentThread=RHIThread) Line 888    C++
UE4Editor-Core-Win64-Debug.dll!FNamedTaskThread::ProcessTasksNamedThread(int QueueIndex=0, bool bAllowStall=true) Line 709    C++
UE4Editor-Core-Win64-Debug.dll!FNamedTaskThread::ProcessTasksUntilQuit(int QueueIndex=0) Line 601    C++
UE4Editor-Core-Win64-Debug.dll!FTaskGraphImplementation::ProcessThreadUntilRequestReturn(ENamedThreads::Type CurrentThread=RHIThread) Line 1474    C++
UE4Editor-RenderCore-Win64-Debug.dll!FRHIThread::Run() Line 320    C++
UE4Editor-Core-Win64-Debug.dll!FRunnableThreadWin::Run() Line 84    C++
UE4Editor-Core-Win64-Debug.dll!FRunnableThreadWin::GuardedRun() Line 27    C++
UE4Editor-Core-Win64-Debug.dll!FRunnableThreadWin::_ThreadProc(void * pThis=0x000002730781fc90) Line 38    C++
kernel32.dll!BaseThreadInitThunk()    Unknown
ntdll.dll!RtlUserThreadStart()    Unknown

 

阻塞時的stats棧(移動端)

 

RHI等待eglSwapBuffers(Android移動端)

調用堆棧為:

1 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 0000000000128c48 (EsxContext::WriteBinScissor(EsxRenderMode, EsxBinData*, EsxRenderBucket*, int, int)+232) [arm64-v8a]
2 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 0000000000131020 (EsxContext::ProcessAndSubmitRendering(EsxFlushReason)+3168) [arm64-v8a]
3 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 0000000000253814 (EsxCmdMgr::Flush(EsxFlushReason)+596) [arm64-v8a]
4 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 000000000012d6fc (EsxContext::SwapBuffers(EsxRect const*, unsigned int)+124) [arm64-v8a]
5 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 00000000001b4d9c (EglContext::SwapBuffers(EsxRect const*, unsigned int)+92) [arm64-v8a]
6 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 00000000001c4578 (EglWindowSurface::PerformContextSwapOperation(EglContext*, EsxRect const*, unsigned int, EglResourceAccessInfo*)+56) [arm64-v8a]
7 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 00000000001c4678 (EglWindowSurface::SwapBuffers(EglContext*, EsxRect const*, unsigned int)+56) [arm64-v8a]
8 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 00000000001b029c (EglApi::SwapBuffers(void*, void*)+156) [arm64-v8a]
9 /system/vendor/lib64/egl/libEGL_adreno.so pc 0000000000009348 (eglSwapBuffers+40) [arm64-v8a]
10 /system/lib64/libEGL.so pc 0000000000027af8 (eglSwapBuffers+168) [arm64-v8a]
11 libUE4.so pc 00000000057c01dc SwapBuffers (D:/Code\UnrealEngine\Engine\Source\Runtime\OpenGLDrv\Private\Android/AndroidOpenGLFramePacer.cpp:408) [arm64-v8a]
12 libUE4.so pc 00000000057ba094 PlatformBlitToViewport (D:/Code\UnrealEngine\Engine\Source\Runtime\OpenGLDrv\Private\Android/AndroidOpenGL.cpp:184 [Inline: SwapBuffers]) [arm64-v8a]
13 libUE4.so pc 000000000583a310 RHIEndDrawingViewport (D:/Code\UnrealEngine\Engine\Source\Runtime\OpenGLDrv\Private/OpenGLViewport.cpp:143) [arm64-v8a]
14 libUE4.so pc 0000000003246198 ExecuteInner_DoExecute (D:/Code\UnrealEngine\Engine\Source\Runtime\RHI\Private/RHICommandList.cpp:400) [arm64-v8a]
15 libUE4.so pc 000000000326934c DoTask (D:/Code\UnrealEngine\Engine\Source\Runtime\RHI\Private/RHICommandList.cpp:461) [arm64-v8a]
16 libUE4.so pc 0000000003269114 ExecuteTask (D:/Code\UnrealEngine\Engine\Source\Runtime\Core\Public\Async/TaskGraphInterfaces.h:886) [arm64-v8a]
17 libUE4.so pc 00000000029ebf2c ProcessTasksNamedThread (D:/Code\UnrealEngine\Engine\Source\Runtime\Core\Public\Async/TaskGraphInterfaces.h:524) [arm64-v8a]
18 libUE4.so pc 00000000029ebac0 ProcessTasksUntilQuit (D:/Code\UnrealEngine\Engine\Source\Runtime\Core\Private\Async/TaskGraph.cpp:600) [arm64-v8a]
19 libUE4.so pc 00000000032a7754 Run (D:/Code\UnrealEngine\Engine\Source\Runtime\RenderCore\Private/RenderingThread.cpp:319) [arm64-v8a]
20 libUE4.so pc 0000000002a575f8 Run (D:/Code\UnrealEngine\Engine\Source\Runtime\Core\Private\HAL/PThreadRunnableThread.cpp:25) [arm64-v8a]
21 libUE4.so pc 00000000029e9eb4 _ThreadProc (D:/Code\UnrealEngine\Engine\Source\Runtime\Core\Private\HAL/PThreadRunnableThread.h:185) [arm64-v8a]
22 /system/lib64/libc.so pc 0000000000018ff0 (__pthread_start(void*)+52) [arm64-v8a]
23 /system/lib64/libc.so pc 0000000000015170 (__start_thread+16) [arm64-v8a]

注:eglSwapBuffers高一般是等待GPU完成

 

iOS下RenderThread等待Present    注:iOS默認不開啟RHI線程

MyGame 0x0000000105ea8fd0 FMetalViewport::Present(FMetalCommandQueue&, bool) (Runtime/Apple/MetalRHI/Private/MetalViewport.cpp:573)
MyGame 0x0000000105e39424 FMetalDeviceContext::EndDrawingViewport(FMetalViewport*, bool, bool) (Runtime/Apple/MetalRHI/Private/MetalContext.cpp:724)
MyGame 0x0000000105ea9b6c FMetalRHIImmediateCommandContext::RHIEndDrawingViewport(FRHIViewport*, bool, bool) (Runtime/Apple/MetalRHI/Private/MetalViewport.cpp:668)
MyGame 0x00000001066e0be8 FRHICommandList::EndDrawingViewport(FRHIViewport*, bool, bool) (Runtime/RHI/Public/RHICommandList.h:102)
MyGame 0x00000001074e92bc FSlateRHIRenderer::DrawWindow_RenderThread(FRHICommandListImmediate&, FViewportInfo&, FSlateWindowElementList&, FSlateDrawWindowCommandParams const&) (Runtime/Core/Public/Apple/ApplePlatformTime.h:41)
MyGame 0x0000000107502bdc _ZN10TGraphTaskI31TEnqueueUniqueRenderCommandTypeIZN17FSlateRHIRenderer19DrawWindows_PrivateER16FSlateDrawBufferE27SlateDrawWindowsCommandNameZNS1_19DrawWindows_PrivateES3_E4$_15EE11ExecuteTaskER6TArrayIP14FBaseGraphTask22TSizedDefaultAllocatorILi32EEEN13ENamedThreads4TypeE (Runtime/Core/Public/Async/TaskGraphInterfaces.h:891)
MyGame 0x0000000105ece890 FNamedTaskThread::ProcessTasksNamedThread(int, bool) (Runtime/Core/Private/Async/TaskGraph.cpp:710)
MyGame 0x0000000105ece404 FNamedTaskThread::ProcessTasksUntilQuit(int) (Runtime/Core/Private/Async/TaskGraph.cpp:601)
MyGame 0x000000010671e610 RenderingThreadMain(FEvent*) (Runtime/Core/Public/Delegates/MulticastDelegateBase.h:248)
MyGame 0x0000000106729d90 FRenderingThread::Run() (Runtime/RenderCore/Private/RenderingThread.cpp:532)
MyGame 0x0000000105f32bec FRunnableThreadPThread::Run() (Runtime/Core/Private/HAL/PThreadRunnableThread.cpp:25)
MyGame 0x0000000105f03a80 FRunnableThreadPThread::_ThreadProc(void*) (Runtime/Core/Private/HAL/PThreadRunnableThread.h:186)
libsystem_pthread.dylib 0x00000001d6ca4bfc _pthread_start + 320

 

參考

剖析虛幻渲染體系(02)- 多線程渲染

UE4主線程與渲染線程同步

UE的render線程和RHI


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM