游戲線程(GameThread)
GameThread是引擎運行的心臟,承載游戲邏輯、運行流程的工作,也是其它線程的數據發起者。在FEngineLoop::Tick函數執行每幀邏輯的更新。
在引擎啟動時會把GameThread的線程id存儲到全局變量GGameThreadId中,且稍后會設置到TaskGraph系統中。
int32 FEngineLoop::PreInitPreStartupScreen(const TCHAR* CmdLine) { // ... ... // 創建線程自己的TLS數據FPerThreadFreeBlockLists 注:Binned2、Binned3內存分配器需要 FMemory::SetupTLSCachesOnCurrentThread(); // remember thread id of the main thread GGameThreadId = FPlatformTLS::GetCurrentThreadId();// 游戲線程id GIsGameThreadIdInitialized = true; // 游戲線程id是否被初始化 FPlatformProcess::SetThreadAffinityMask(FPlatformAffinity::GetMainGameMask()); // 設置當前線程的cpu核的相關性 注:防止在多個cpu核心上跳來跳去,引發性能問題 FPlatformProcess::SetupGameThread(); // 設置游戲線程數據(但很多平台都是空的實現體) // ... ... FTaskGraphInterface::Startup(FPlatformMisc::NumberOfCores()); // TaskGraph初始化,並根據當前機器cpu的核數來創建工作線程 FTaskGraphInterface::Get().AttachToThread(ENamedThreads::GameThread); // 附加到TaskGraph的GameThread命名插槽中. 這樣游戲線程便和TaskGraph聯動了起來. if (GUseThreadedRendering) // 如果使用渲染線程 { if (GRHISupportsRHIThread) // 當前平台如果支持RHI線程 { const bool DefaultUseRHIThread = true; GUseRHIThread_InternalUseOnly = DefaultUseRHIThread; if (FParse::Param(FCommandLine::Get(), TEXT("rhithread"))) { GUseRHIThread_InternalUseOnly = true; // 創建獨立的RHIThread,並加入到TaskGraph中,RHI會跑在TaskGraph的RHIThread上 } else if (FParse::Param(FCommandLine::Get(), TEXT("norhithread"))) { GUseRHIThread_InternalUseOnly = false; // RHI跑在TaskGraph的AnyThread上 } } SCOPED_BOOT_TIMING("StartRenderingThread"); StartRenderingThread(); // 創建並啟動渲染線程 } // ... ... }
游戲線程和TaskGraph系統的ENamedThreads::GameThread其實是一回事,都是同一個線程!
經過上面的初始化和設置后,其它地方就可以通過TaskGraph系統並行地處理任務了,也可以訪問全局變量,以便判斷游戲線程是否初始化完,當前線程是否游戲線程:
bool IsInGameThread() { return GIsGameThreadIdInitialized && FPlatformTLS::GetCurrentThreadId() == GGameThreadId; }
渲染線程(RenderThread)
RenderThread在TaskGraph系統中有一個任務隊列,其他線程(主要是GameThread)通過宏ENQUEUE_RENDER_COMMAND(Type)向該隊列中填充任務 注:Type字符串要保持唯一,否則ENQUEUE_RENDER_COMMAND(Type)會失效
RenderThread則不斷從這個隊列中取出任務來執行,從而生成與平台無關的Command List(渲染指令列表)。注:整個過程是異步的
RenderThread是其他線程(主要是GameThread)的奴隸,只是簡單地作為工作線程不斷執行它們賦予的工作。
RenderingThread.h聲明了全部對外的接口,部分如下:
// Engine\Source\Runtime\RenderCore\Public\RenderingThread.h // 是否啟用了獨立的渲染線程, 如果為false, 則所有渲染命令會被立即執行, 而不是放入渲染命令隊列. extern RENDERCORE_API bool GIsThreadedRendering; // 渲染線程是否應該被創建. 通常被命令行參數或ToggleRenderingThread控制台參數設置. extern RENDERCORE_API bool GUseThreadedRendering; // 是否開啟RHI線程 extern RENDERCORE_API void SetRHIThreadEnabled(bool bEnableDedicatedThread, bool bEnableRHIOnTaskThreads); (......) // 開啟渲染線程. extern RENDERCORE_API void StartRenderingThread(); // 停止渲染線程. extern RENDERCORE_API void StopRenderingThread(); // 檢查渲染線程是否健康(是否Crash), 如果crash, 則會用UE_Log輸出日志. extern RENDERCORE_API void CheckRenderingThreadHealth(); // 檢查渲染線程是否健康(是否Crash) extern RENDERCORE_API bool IsRenderingThreadHealthy(); // 增加一個必須在下一個場景繪制前或flush渲染命令前完成的任務. extern RENDERCORE_API void AddFrameRenderPrerequisite(const FGraphEventRef& TaskToAdd); // 手機幀渲染前序任務, 保證所有渲染命令被入隊. extern RENDERCORE_API void AdvanceFrameRenderPrerequisite(); // 等待所有渲染線程的渲染命令被執行完畢. 會卡住游戲線程, 只能被游戲線程調用. extern RENDERCORE_API void FlushRenderingCommands(bool bFlushDeferredDeletes = false); extern RENDERCORE_API void FlushPendingDeleteRHIResources_GameThread(); extern RENDERCORE_API void FlushPendingDeleteRHIResources_RenderThread(); extern RENDERCORE_API void TickRenderingTickables(); extern RENDERCORE_API void StartRenderCommandFenceBundler(); extern RENDERCORE_API void StopRenderCommandFenceBundler(); (......)
RenderingThread.h還有一個非常重要的宏ENQUEUE_RENDER_COMMAND
,它的作用是向渲染線程入隊渲染指令。下面是它的聲明和實現:
// 向渲染線程入隊渲染指令, Type指明了渲染操作的名字. #define ENQUEUE_RENDER_COMMAND(Type) \ struct Type##Name \ { \ static const char* CStr() { return #Type; } \ static const TCHAR* TStr() { return TEXT(#Type); } \ }; \ EnqueueUniqueRenderCommand<Type##Name>
上面最后一句使用了EnqueueUniqueRenderCommand
命令,繼續追蹤之:
/* UnrealEngine\Engine\Source\Runtime\RenderCore\Public\RenderingThread.h */ /** The parent class of commands stored in the rendering command queue. */ class RENDERCORE_API FRenderCommand { public: // All render commands run on the render thread static ENamedThreads::Type GetDesiredThread() // 所有渲染指令都必須在渲染線程執行. { check(!GIsThreadedRendering || ENamedThreads::GetRenderThread() != ENamedThreads::GameThread); return ENamedThreads::GetRenderThread(); // 開啟渲染多線程時,返回渲染線程。不開啟渲染多線程時,返回GameThread } static ESubsequentsMode::Type GetSubsequentsMode() { // Don't support tasks having dependencies on us, reduces task graph overhead tracking and dealing with subsequents return ESubsequentsMode::FireAndForget; } }; template<typename TSTR, typename LAMBDA> class TEnqueueUniqueRenderCommandType : public FRenderCommand { public: TEnqueueUniqueRenderCommandType(LAMBDA&& InLambda) : Lambda(Forward<LAMBDA>(InLambda)) {} void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent) { TRACE_CPUPROFILER_EVENT_SCOPE_ON_CHANNEL_STR(TSTR::TStr(), RenderCommandsChannel); FRHICommandListImmediate& RHICmdList = GetImmediateCommandList_ForRenderCommand(); Lambda(RHICmdList); } FORCEINLINE_DEBUGGABLE TStatId GetStatId() const { #if STATS static struct FThreadSafeStaticStat<FStat_EnqueueUniqueRenderCommandType> StatPtr_EnqueueUniqueRenderCommandType; return StatPtr_EnqueueUniqueRenderCommandType.GetStatId(); #else return TStatId(); #endif } private: LAMBDA Lambda; // 緩存渲染回調函數. }; /*************************************************************************************************************/ template<typename TSTR, typename LAMBDA> // 傳入的TSTR為結構體類型,里面包含CStr和TStr的靜態方法,為渲染命令名字。 LAMBDA是回調函數 FORCEINLINE_DEBUGGABLE void EnqueueUniqueRenderCommand(LAMBDA&& Lambda) { QUICK_SCOPE_CYCLE_COUNTER(STAT_EnqueueUniqueRenderCommand); typedef TEnqueueUniqueRenderCommandType<TSTR, LAMBDA> EURCType; // EURCType類型即為TEnqueueUniqueRenderCommandType<TSTR, LAMBDA>類型 #if 0 // UE_SERVER && UE_BUILD_DEBUG UE_LOG(LogRHI, Warning, TEXT("Render command '%s' is being executed on a dedicated server."), TSTR::TStr()) #endif if (IsInRenderingThread()) // 如果在渲染線程內直接執行回調而不入隊渲染命令. { FRHICommandListImmediate& RHICmdList = GetImmediateCommandList_ForRenderCommand(); Lambda(RHICmdList); // 在渲染線程中,直接執行傳入的lamda匿名函數 } else { if (ShouldExecuteOnRenderThread()) // if ((GIsThreadedRendering || !IsInGameThread())) // 使用渲染線程 or 當前不為GameThread // 需要在獨立的渲染線程執行 { CheckNotBlockedOnRenderThread(); TGraphTask<EURCType>::CreateTask().ConstructAndDispatchWhenReady(Forward<LAMBDA>(Lambda)); // 向渲染線程的TaskGraph隊列里面投遞類型名為EURCType類型的任務,並將lamda匿名函數作為參數傳入該任務的構造函數 } else { // 不在獨立的渲染線程執行,則構建EURCType類型的對象,然后直接執行 EURCType TempCommand(Forward<LAMBDA>(Lambda)); FScopeCycleCounter EURCMacro_Scope(TempCommand.GetStatId()); TempCommand.DoTask(ENamedThreads::GameThread, FGraphEventRef()); } } }
EnqueueUniqueRenderCommand函數只負責向渲染線程的TaskGraph隊列里面投遞任務,它耗時統計到Quick的EnqueueUniqueRenderCommand分類中
注:執行EnqueueUniqueRenderCommand函數的線程可能為:GameThread、RenderThread、RTHeartBeat、TaskGraph的AnyThread、PoolThread的FQueuedThread
ENQUEUE_RENDER_COMMAND(xxx),在RenderThread上執行的xxx的耗時統計在RenderThreadCommands分類中
為了更好理解入隊渲染命令操作,舉2個具體的例子:
例1:在GameThread執行LoadMap切地圖,在卸載掉Old World之后,會在TrimMemory()函數中使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph隊列中投遞一個FlushCommand任務
ENQUEUE_RENDER_COMMAND(FlushCommand)( /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展開后 ------------------------------------------------ struct FlushCommandName { static const char* CStr() { return "FlushCommand"; } static const TCHAR* TStr() { return L"FlushCommand"; } }; EnqueueUniqueRenderCommand<FlushCommandName>( */ [](FRHICommandList& RHICmdList) { GRHICommandList.GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::FlushRHIThreadFlushResources); RHIFlushResources(); GRHICommandList.GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::FlushRHIThreadFlushResources); });
例2:在GameThread中執行控制台變量命令,會使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph隊列中投遞一個OnCVarChange1任務,以便將新的數值傳遞到RenderThread的邏輯中使用
virtual void OnCVarChange(int32& Dest, int32 NewValue) { int32* DestPtr = &Dest; ENQUEUE_RENDER_COMMAND(OnCVarChange1)( /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展開后 ------------------------------------------------ struct OnCVarChange1Name { static const char* CStr() { return "OnCVarChange1"; } static const TCHAR* TStr() { return L"OnCVarChange1"; } }; EnqueueUniqueRenderCommand<OnCVarChange1Name>( */ [DestPtr, NewValue](FRHICommandListImmediate& RHICmdList) { *DestPtr = NewValue; }); }
FRenderingThread承載了渲染線程的主要工作,它的部分接口和實現代碼如下:
// Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp class FRenderingThread : public FRunnable { private: bool bAcquiredThreadOwnership; // 當沒有獨立的RHI線程時, 渲染線程將被其它線程捕獲. public: FEvent* TaskGraphBoundSyncEvent; // TaskGraph同步事件, 以便在主線程使用渲染線程之前就將渲染線程綁定到TaskGraph體系中. FRenderingThread() { bAcquiredThreadOwnership = false; // 獲取同步事件. TaskGraphBoundSyncEvent = FPlatformProcess::GetSynchEventFromPool(true); RHIFlushResources(); } // FRunnable interface. virtual bool Init(void) override { // 獲取當前線程ID到全局變量GRenderThreadId, 以便其它地方引用. GRenderThreadId = FPlatformTLS::GetCurrentThreadId(); // 處理線程捕獲關系. if (!IsRunningRHIInSeparateThread()) { bAcquiredThreadOwnership = true; RHIAcquireThreadOwnership(); } return true; } (......) virtual uint32 Run(void) override { // 設置TLS. FMemory::SetupTLSCachesOnCurrentThread(); // 設置渲染線程平台相關的數據. FPlatformProcess::SetupRenderThread(); (......) { // 進入渲染線程主循環. RenderingThreadMain( TaskGraphBoundSyncEvent ); } FMemory::ClearAndDisableTLSCachesOnCurrentThread(); return 0; } };
可見它在運行之后會進入渲染線程邏輯,這里再進入RenderingThreadMain代碼一探究竟:
void RenderingThreadMain( FEvent* TaskGraphBoundSyncEvent ) { LLM_SCOPE(ELLMTag::RenderingThreadMemory); // 將渲染線程和局部線程線程插槽設置成ActualRenderingThread和ActualRenderingThread_Local. ENamedThreads::Type RenderThread = ENamedThreads::Type(ENamedThreads::ActualRenderingThread); ENamedThreads::SetRenderThread(RenderThread); ENamedThreads::SetRenderThread_Local(ENamedThreads::Type(ENamedThreads::ActualRenderingThread_Local)); // 將當前線程附加到TaskGraph的RenderThread插槽中. FTaskGraphInterface::Get().AttachToThread(RenderThread); FPlatformMisc::MemoryBarrier(); // 觸發同步事件, 通知主線程渲染線程已經附加到TaskGraph, 已經准備好接收任務. if( TaskGraphBoundSyncEvent != NULL ) { TaskGraphBoundSyncEvent->Trigger(); } (......) // 渲染線程不同階段的處理. FCoreDelegates::PostRenderingThreadCreated.Broadcast(); check(GIsThreadedRendering); FTaskGraphInterface::Get().ProcessThreadUntilRequestReturn(RenderThread); FPlatformMisc::MemoryBarrier(); check(!GIsThreadedRendering); FCoreDelegates::PreRenderingThreadDestroyed.Broadcast(); (......) // 恢復線程線程到游戲線程. ENamedThreads::SetRenderThread(ENamedThreads::GameThread); ENamedThreads::SetRenderThread_Local(ENamedThreads::GameThread_Local); FPlatformMisc::MemoryBarrier(); }
不過這里還留有一個很大的疑問,那就是FRenderingThread只是獲取當前線程作為渲染線程並附加到TaskGraph中,並沒有創建線程。
那么是哪里創建的渲染線程呢?繼續追蹤,結果發現是在StartRenderingThread()
接口中創建了FRenderingThread實例,它的實現代碼如下(節選):
// Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp void StartRenderingThread() { (......) // Turn on the threaded rendering flag. GIsThreadedRendering = true; // 創建FRenderingThread實例. GRenderingThreadRunnable = new FRenderingThread(); // 創建渲染線程!! GRenderingThread = FRunnableThread::Create(GRenderingThreadRunnable, *BuildRenderingThreadName(ThreadCount), 0, FPlatformAffinity::GetRenderingThreadPriority(), FPlatformAffinity::GetRenderingThreadMask(), FPlatformAffinity::GetRenderingThreadFlags()); (......) // 開啟渲染命令的柵欄. FRenderCommandFence Fence; Fence.BeginFence(); Fence.Wait(); (......) }
如果繼續追蹤,會發現StartRenderingThread()
是在FEngineLoop::PreInitPostStartupScreen
中調用的。
至此,渲染線程的創建、初始化以及主要接口的實現都剖析完了。
RHI線程(RHIThread)
RenderThread作為前端(frontend)產生的Command List是平台無關的,是抽象的圖形API調用;
而RHIThread作為后端(backend)會執行和轉換渲染線程的Command List成為指定圖形API的調用(稱為Graphical Command),並提交到GPU執行。
RHI線程的工作是轉換渲染指令到指定圖形API,創建、上傳渲染資源到GPU。實現代碼如下:
// Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp class FRHIThread : public FRunnable { public: FRunnableThread* Thread; // 所在的RHI線程. FRHIThread() : Thread(nullptr) { check(IsInGameThread()); } void Start() { // 開始時創建RHI線程. Thread = FRunnableThread::Create(this, TEXT("RHIThread"), 512 * 1024, FPlatformAffinity::GetRHIThreadPriority(), FPlatformAffinity::GetRHIThreadMask(), FPlatformAffinity::GetRHIThreadFlags() ); check(Thread); } virtual uint32 Run() override { LLM_SCOPE(ELLMTag::RHIMisc); // 初始化TLS FMemory::SetupTLSCachesOnCurrentThread(); // 將FRHIThread所在的RHI線程附加到askGraph體系中,並指定到ENamedThreads::RHIThread。 FTaskGraphInterface::Get().AttachToThread(ENamedThreads::RHIThread); // 啟動RHI線程,直到線程返回。 FTaskGraphInterface::Get().ProcessThreadUntilRequestReturn(ENamedThreads::RHIThread); // 清理TLS. FMemory::ClearAndDisableTLSCachesOnCurrentThread(); return 0; } // 單例接口。 static FRHIThread& Get() { static FRHIThread Singleton; // 使用了局部靜態變量,可以保證線程安全。 return Singleton; } };
可見RHI線程不同於渲染線程,是直接在FRHIThread對象內創建實際的線程。而FRHIThread的創建也是在StartRenderingThread()
中:
void StartRenderingThread() { (......) if (GUseRHIThread_InternalUseOnly) { FRHICommandListExecutor::GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::DispatchToRHIThread); if (!FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::RHIThread)) { // 創建FRHIThread實例並啟動它. FRHIThread::Get().Start(); } DECLARE_CYCLE_STAT(TEXT("Wait For RHIThread"), STAT_WaitForRHIThread, STATGROUP_TaskGraphTasks); // 創建RHI線程擁有者捕獲任務, 讓游戲線程等待. FGraphEventRef CompletionEvent = TGraphTask<FOwnershipOfRHIThreadTask>::CreateTask(NULL, ENamedThreads::GameThread).ConstructAndDispatchWhenReady(true, GET_STATID(STAT_WaitForRHIThread)); QUICK_SCOPE_CYCLE_COUNTER(STAT_StartRenderingThread); // 讓游戲線程或局部線程等待RHI線程處理(捕獲了線程擁有者, 大多數圖形API為空)完畢. FTaskGraphInterface::Get().WaitUntilTaskCompletes(CompletionEvent, ENamedThreads::GameThread_Local); // 存儲RHI線程id. GRHIThread_InternalUseOnly = FRHIThread::Get().Thread; check(GRHIThread_InternalUseOnly); GIsRunningRHIInDedicatedThread_InternalUseOnly = true; GIsRunningRHIInSeparateThread_InternalUseOnly = true; GRHIThreadId = GRHIThread_InternalUseOnly->GetThreadID(); GRHICommandList.LatchBypass(); } (......) }
以Fortnite(堡壘之夜)移動端為例,在開啟RHI線程之前,渲染線程急劇地上下波動,而加了RHI線程之后,波動平緩許多,和游戲線程基本保持一致,幀率也提升不少:
GameThread、RenderThread、RHIThread之間的同步機制
這3個線程處理的數據通常是不同幀的,譬如GameThread處理N幀數據,RenderThread和RHIThread處理N-1幀數據。
但也存在例外,比如RenderThread和RHIThread運行很快,幾乎不存在延遲,這種情況下,GameThread處理N幀,而RenderThread可能處理N或N-1幀,RHIThread也可能在轉換N或N-1幀。
但是,RenderThread不能落后游戲線程一幀,否則GameThread會卡住,直到RenderThread處理所有指令。
游戲線程和渲染線程的同步
游戲線程不可能領先於渲染線程超過一幀(最多快一幀),否則游戲線程會等待渲染線程處理完。它們的同步機制涉及兩個關鍵的概念:
// Engine\Source\Runtime\RenderCore\Public\RenderCommandFence.h // 渲染命令柵欄 class RENDERCORE_API FRenderCommandFence { public: // 向渲染命令隊列增加一個柵欄. bSyncToRHIAndGPU是否同步RHI和GPU交換Buffer, 否則只等待渲染線程. void BeginFence(bool bSyncToRHIAndGPU = false); // 等待柵欄被執行. bProcessGameThreadTasks沒有作用. void Wait(bool bProcessGameThreadTasks = false) const; // 是否完成了柵欄. bool IsFenceComplete() const; private: mutable FGraphEventRef CompletionEvent; // 處理完成同步的事件 ENamedThreads::Type TriggerThreadIndex; // 處理完之后需要觸發的線程類型. }; // Engine\Source\Runtime\Engine\Public\UnrealEngine.h class FFrameEndSync { FRenderCommandFence Fence[2]; // 渲染柵欄對. int32 EventIndex; // 當前事件索引 public: // 同步游戲線程和渲染線程. bAllowOneFrameThreadLag是否允許渲染線程一幀的延遲. void Sync( bool bAllowOneFrameThreadLag ) { Fence[EventIndex].BeginFence(true); // 開啟柵欄, 強制同步RHI和GPU交換鏈的. bool bEmptyGameThreadTasks = !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread); // 保證游戲線程至少跑過一次任務. if (bEmptyGameThreadTasks) { FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread); } // 如果允許延遲, 交換事件索引. if( bAllowOneFrameThreadLag ) { EventIndex = (EventIndex + 1) % 2; } (......) // 開啟柵欄等待. Fence[EventIndex].Wait(bEmptyGameThreadTasks); } };
在FRenderCommandFence的BeginFence函數中
當GameThread與RHI線程及GPU同步時,GameThread會使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph隊列中投遞一個FSyncFrameCommand任務,以便將Command List同步投遞到RHI線程
當GameThread與RenderThread同步時,GameThread會創建一個FNullGraphTask空任務,放到RenderThread的TaskGraph隊列中讓其執行
在FRenderCommandFence的Wait函數中,會檢查投遞給RenderThread的CompletionEvent是否被執行,如果沒有執行則調用GameThreadWaitForTask函數來阻塞等待(通過Event實現)
void FRenderCommandFence::BeginFence(bool bSyncToRHIAndGPU) { if (!GIsThreadedRendering) { return; } else { // Render thread is a default trigger for the CompletionEvent TriggerThreadIndex = ENamedThreads::ActualRenderingThread; if (BundledCompletionEvent.GetReference() && IsInGameThread()) { CompletionEvent = BundledCompletionEvent; return; } int32 GTSyncType = CVarGTSyncType.GetValueOnAnyThread(); if (bSyncToRHIAndGPU) { // Don't sync to the RHI and GPU if GtSyncType is disabled, or we're not vsyncing //@TODO: do this logic in the caller? static auto CVarVsync = IConsoleManager::Get().FindConsoleVariable(TEXT("r.VSync")); // 是否開了VSync check(CVarVsync != nullptr); if ( GTSyncType == 0 || CVarVsync->GetInt() == 0 ) // r.GTSyncType為0或r.VSync為0時,GameThread不與RHI線程及GPU同步 { bSyncToRHIAndGPU = false; } } if (bSyncToRHIAndGPU) // GameThread與RHI線程及GPU同步時 { if (IsRHIThreadRunning()) { // Change trigger thread to RHI TriggerThreadIndex = ENamedThreads::RHIThread; } // Create a task graph event which we can pass to the render or RHI threads. CompletionEvent = FGraphEvent::CreateGraphEvent(); FGraphEventRef InCompletionEvent = CompletionEvent; /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展開后 ------------------------------------------------ struct FSyncFrameCommandName { static const char* CStr() { return "FSyncFrameCommand"; } static const TCHAR* TStr() { return L"FSyncFrameCommand"; } }; EnqueueUniqueRenderCommand<FSyncFrameCommandName>( */ ENQUEUE_RENDER_COMMAND(FSyncFrameCommand)( [InCompletionEvent, GTSyncType](FRHICommandListImmediate& RHICmdList) { if (IsRHIThreadRunning()) // 如果開啟了RHI線程 { ALLOC_COMMAND_CL(RHICmdList, FRHISyncFrameCommand)(InCompletionEvent, GTSyncType); // 將創建的CompletionEvent投遞到RHI線程的TaskGraph的任務隊列中 RHICmdList.ImmediateFlush(EImmediateFlushType::DispatchToRHIThread); } else // 渲染線程直接執行 { FRHISyncFrameCommand Command(InCompletionEvent, GTSyncType); Command.Execute(RHICmdList); } }); } else // GameThead與RenderThread同步 { // Sync Game Thread with Render Thread only DECLARE_CYCLE_STAT(TEXT("FNullGraphTask.FenceRenderCommand"), STAT_FNullGraphTask_FenceRenderCommand, STATGROUP_TaskGraphTasks); CompletionEvent = TGraphTask<FNullGraphTask>::CreateTask(NULL, ENamedThreads::GameThread).ConstructAndDispatchWhenReady( GET_STATID(STAT_FNullGraphTask_FenceRenderCommand), ENamedThreads::GetRenderThread()); } } } /** * Waits for pending fence commands to retire. */ void FRenderCommandFence::Wait(bool bProcessGameThreadTasks) const { if (!IsFenceComplete()) { StopRenderCommandFenceBundler(); GameThreadWaitForTask(CompletionEvent, TriggerThreadIndex, bProcessGameThreadTasks); } } bool FRenderCommandFence::IsFenceComplete() const { if (!GIsThreadedRendering) { return true; } check(IsInGameThread() || IsInAsyncLoadingThread()); CheckRenderingThreadHealth(); if (!CompletionEvent.GetReference() || CompletionEvent->IsComplete()) { CompletionEvent = NULL; // this frees the handle for other uses, the NULL state is considered completed return true; } return false; }
而FFrameEndSync
的使用是在FEngineLoop::Tick
中:
// Engine\Source\Runtime\Launch\Private\LaunchEngineLoop.cpp void FEngineLoop::Tick() { (......) // 在引擎循環的幀末尾添加游戲線程和渲染線程的同步事件. { static FFrameEndSync FrameEndSync; // 局部靜態變量, 線程安全. static auto CVarAllowOneFrameThreadLag = IConsoleManager::Get().FindTConsoleVariableDataInt(TEXT("r.OneFrameThreadLag")); // 同步游戲和渲染線程, 是否允許一幀的延遲可由控制台命令控制. 默認是開啟的. FrameEndSync.Sync( CVarAllowOneFrameThreadLag->GetValueOnGameThread() != 0 ); } (......) }
FlushRenderingCommands
在游戲線程中調用,會阻塞游戲線程,強行等待所有的渲染線程pending render command以及RHI線程中的指令執行完,相當於一次完整地對渲染線程的同步
/ * Waits for the rendering thread to finish executing all pending rendering commands. Should only be used from the game thread. */ void FlushRenderingCommands(bool bFlushDeferredDeletes) { if (!GIsRHIInitialized) { return; } FSuspendRenderingTickables SuspendRenderingTickables; // Need to flush GT because render commands from threads other than GT are sent to // the main queue of GT when RT is disabled if (!GIsThreadedRendering && !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread) && !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread_Local)) { FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread); FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread_Local); } ENQUEUE_RENDER_COMMAND(FlushPendingDeleteRHIResourcesCmd)( [bFlushDeferredDeletes](FRHICommandListImmediate& RHICmdList) { RHICmdList.ImmediateFlush( bFlushDeferredDeletes ? EImmediateFlushType::FlushRHIThreadFlushResourcesFlushDeferredDeletes : EImmediateFlushType::FlushRHIThreadFlushResources); }); // Find the objects which may be cleaned up once the rendering thread command queue has been flushed. FPendingCleanupObjects* PendingCleanupObjects = GetPendingCleanupObjects(); // Issue a fence command to the rendering thread and wait for it to complete. FRenderCommandFence Fence; Fence.BeginFence(); // 創建一個FNullGraphTask空任務,放到RenderThread的TaskGraph隊列中讓其執行 Fence.Wait(); // 檢查投遞給RenderThread的CompletionEvent是否被執行,如果沒有執行就會阻塞等待 // Delete the objects which were enqueued for deferred cleanup before the command queue flush. delete PendingCleanupObjects; }
渲染線程和RHI線程的同步
RenderThread每次在調用RenderViewFamily_RenderThread的起始處,會阻塞等待所有RHI指令處理完成,然后才開始當前幀的渲染邏輯。
FMobileSceneRender渲染管線下,RenderThread每一幀都會執行ImmediateFlush,阻塞等待RHI處理完FGraphEventRef RHIThreadTask任務,代碼如下:
FDeferredShadingSceneRender渲染管線下,RenderThread每一幀都會執行ImmediateFlush,阻塞等待RHI處理完FGraphEventRef RHIThreadTask任務,代碼如下:
RHI線程與GPU的同步
UE4Editor-D3D11RHI-Win64-Debug.dll!FD3D11Viewport::PresentChecked(int SyncInterval=0) Line 374 C++ UE4Editor-D3D11RHI-Win64-Debug.dll!FD3D11Viewport::Present(bool bLockToVsync=false) Line 622 C++ UE4Editor-D3D11RHI-Win64-Debug.dll!FD3D11DynamicRHI::RHIEndDrawingViewport(FRHIViewport * ViewportRHI=0x00000273786c9180, bool bPresent=true, bool bLockToVsync=false) Line 770 C++ UE4Editor-RHI-Win64-Debug.dll!FRHICommandEndDrawingViewport::Execute(FRHICommandListBase & CmdList={...}) Line 704 C++ UE4Editor-RHI-Win64-Debug.dll!FRHICommand<FRHICommandEndDrawingViewport,FRHICommandEndDrawingViewportString1847>::ExecuteAndDestruct(FRHICommandListBase & CmdList={...}, FRHICommandListDebugContext & Context={...}) Line 763 C++ UE4Editor-RHI-Win64-Debug.dll!FRHICommandListExecutor::ExecuteInner_DoExecute(FRHICommandListBase & CmdList={...}) Line 374 C++ UE4Editor-RHI-Win64-Debug.dll!FExecuteRHIThreadTask::DoTask(ENamedThreads::Type CurrentThread=RHIThread, const TRefCountPtr<FGraphEvent> & MyCompletionGraphEvent={...}) Line 429 C++ UE4Editor-RHI-Win64-Debug.dll!TGraphTask<FExecuteRHIThreadTask>::ExecuteTask(TArray<FBaseGraphTask *,TSizedDefaultAllocator<32>> & NewTasks={...}, ENamedThreads::Type CurrentThread=RHIThread) Line 888 C++ UE4Editor-Core-Win64-Debug.dll!FNamedTaskThread::ProcessTasksNamedThread(int QueueIndex=0, bool bAllowStall=true) Line 709 C++ UE4Editor-Core-Win64-Debug.dll!FNamedTaskThread::ProcessTasksUntilQuit(int QueueIndex=0) Line 601 C++ UE4Editor-Core-Win64-Debug.dll!FTaskGraphImplementation::ProcessThreadUntilRequestReturn(ENamedThreads::Type CurrentThread=RHIThread) Line 1474 C++ UE4Editor-RenderCore-Win64-Debug.dll!FRHIThread::Run() Line 320 C++ UE4Editor-Core-Win64-Debug.dll!FRunnableThreadWin::Run() Line 84 C++ UE4Editor-Core-Win64-Debug.dll!FRunnableThreadWin::GuardedRun() Line 27 C++ UE4Editor-Core-Win64-Debug.dll!FRunnableThreadWin::_ThreadProc(void * pThis=0x000002730781fc90) Line 38 C++ kernel32.dll!BaseThreadInitThunk() Unknown ntdll.dll!RtlUserThreadStart() Unknown
阻塞時的stats棧(移動端)
RHI等待eglSwapBuffers(Android移動端)
調用堆棧為:
1 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 0000000000128c48 (EsxContext::WriteBinScissor(EsxRenderMode, EsxBinData*, EsxRenderBucket*, int, int)+232) [arm64-v8a] 2 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 0000000000131020 (EsxContext::ProcessAndSubmitRendering(EsxFlushReason)+3168) [arm64-v8a] 3 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 0000000000253814 (EsxCmdMgr::Flush(EsxFlushReason)+596) [arm64-v8a] 4 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 000000000012d6fc (EsxContext::SwapBuffers(EsxRect const*, unsigned int)+124) [arm64-v8a] 5 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 00000000001b4d9c (EglContext::SwapBuffers(EsxRect const*, unsigned int)+92) [arm64-v8a] 6 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 00000000001c4578 (EglWindowSurface::PerformContextSwapOperation(EglContext*, EsxRect const*, unsigned int, EglResourceAccessInfo*)+56) [arm64-v8a] 7 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 00000000001c4678 (EglWindowSurface::SwapBuffers(EglContext*, EsxRect const*, unsigned int)+56) [arm64-v8a] 8 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 00000000001b029c (EglApi::SwapBuffers(void*, void*)+156) [arm64-v8a] 9 /system/vendor/lib64/egl/libEGL_adreno.so pc 0000000000009348 (eglSwapBuffers+40) [arm64-v8a] 10 /system/lib64/libEGL.so pc 0000000000027af8 (eglSwapBuffers+168) [arm64-v8a] 11 libUE4.so pc 00000000057c01dc SwapBuffers (D:/Code\UnrealEngine\Engine\Source\Runtime\OpenGLDrv\Private\Android/AndroidOpenGLFramePacer.cpp:408) [arm64-v8a] 12 libUE4.so pc 00000000057ba094 PlatformBlitToViewport (D:/Code\UnrealEngine\Engine\Source\Runtime\OpenGLDrv\Private\Android/AndroidOpenGL.cpp:184 [Inline: SwapBuffers]) [arm64-v8a] 13 libUE4.so pc 000000000583a310 RHIEndDrawingViewport (D:/Code\UnrealEngine\Engine\Source\Runtime\OpenGLDrv\Private/OpenGLViewport.cpp:143) [arm64-v8a] 14 libUE4.so pc 0000000003246198 ExecuteInner_DoExecute (D:/Code\UnrealEngine\Engine\Source\Runtime\RHI\Private/RHICommandList.cpp:400) [arm64-v8a] 15 libUE4.so pc 000000000326934c DoTask (D:/Code\UnrealEngine\Engine\Source\Runtime\RHI\Private/RHICommandList.cpp:461) [arm64-v8a] 16 libUE4.so pc 0000000003269114 ExecuteTask (D:/Code\UnrealEngine\Engine\Source\Runtime\Core\Public\Async/TaskGraphInterfaces.h:886) [arm64-v8a] 17 libUE4.so pc 00000000029ebf2c ProcessTasksNamedThread (D:/Code\UnrealEngine\Engine\Source\Runtime\Core\Public\Async/TaskGraphInterfaces.h:524) [arm64-v8a] 18 libUE4.so pc 00000000029ebac0 ProcessTasksUntilQuit (D:/Code\UnrealEngine\Engine\Source\Runtime\Core\Private\Async/TaskGraph.cpp:600) [arm64-v8a] 19 libUE4.so pc 00000000032a7754 Run (D:/Code\UnrealEngine\Engine\Source\Runtime\RenderCore\Private/RenderingThread.cpp:319) [arm64-v8a] 20 libUE4.so pc 0000000002a575f8 Run (D:/Code\UnrealEngine\Engine\Source\Runtime\Core\Private\HAL/PThreadRunnableThread.cpp:25) [arm64-v8a] 21 libUE4.so pc 00000000029e9eb4 _ThreadProc (D:/Code\UnrealEngine\Engine\Source\Runtime\Core\Private\HAL/PThreadRunnableThread.h:185) [arm64-v8a] 22 /system/lib64/libc.so pc 0000000000018ff0 (__pthread_start(void*)+52) [arm64-v8a] 23 /system/lib64/libc.so pc 0000000000015170 (__start_thread+16) [arm64-v8a]
注:eglSwapBuffers高一般是等待GPU完成
iOS下RenderThread等待Present 注:iOS默認不開啟RHI線程
MyGame 0x0000000105ea8fd0 FMetalViewport::Present(FMetalCommandQueue&, bool) (Runtime/Apple/MetalRHI/Private/MetalViewport.cpp:573) MyGame 0x0000000105e39424 FMetalDeviceContext::EndDrawingViewport(FMetalViewport*, bool, bool) (Runtime/Apple/MetalRHI/Private/MetalContext.cpp:724) MyGame 0x0000000105ea9b6c FMetalRHIImmediateCommandContext::RHIEndDrawingViewport(FRHIViewport*, bool, bool) (Runtime/Apple/MetalRHI/Private/MetalViewport.cpp:668) MyGame 0x00000001066e0be8 FRHICommandList::EndDrawingViewport(FRHIViewport*, bool, bool) (Runtime/RHI/Public/RHICommandList.h:102) MyGame 0x00000001074e92bc FSlateRHIRenderer::DrawWindow_RenderThread(FRHICommandListImmediate&, FViewportInfo&, FSlateWindowElementList&, FSlateDrawWindowCommandParams const&) (Runtime/Core/Public/Apple/ApplePlatformTime.h:41) MyGame 0x0000000107502bdc _ZN10TGraphTaskI31TEnqueueUniqueRenderCommandTypeIZN17FSlateRHIRenderer19DrawWindows_PrivateER16FSlateDrawBufferE27SlateDrawWindowsCommandNameZNS1_19DrawWindows_PrivateES3_E4$_15EE11ExecuteTaskER6TArrayIP14FBaseGraphTask22TSizedDefaultAllocatorILi32EEEN13ENamedThreads4TypeE (Runtime/Core/Public/Async/TaskGraphInterfaces.h:891) MyGame 0x0000000105ece890 FNamedTaskThread::ProcessTasksNamedThread(int, bool) (Runtime/Core/Private/Async/TaskGraph.cpp:710) MyGame 0x0000000105ece404 FNamedTaskThread::ProcessTasksUntilQuit(int) (Runtime/Core/Private/Async/TaskGraph.cpp:601) MyGame 0x000000010671e610 RenderingThreadMain(FEvent*) (Runtime/Core/Public/Delegates/MulticastDelegateBase.h:248) MyGame 0x0000000106729d90 FRenderingThread::Run() (Runtime/RenderCore/Private/RenderingThread.cpp:532) MyGame 0x0000000105f32bec FRunnableThreadPThread::Run() (Runtime/Core/Private/HAL/PThreadRunnableThread.cpp:25) MyGame 0x0000000105f03a80 FRunnableThreadPThread::_ThreadProc(void*) (Runtime/Core/Private/HAL/PThreadRunnableThread.h:186) libsystem_pthread.dylib 0x00000001d6ca4bfc _pthread_start + 320