UE4之RHI命令執行


多線程架構下(GameThread --  RenderThread -- RHIThread)渲染時,不會直接調用圖形API的上下文的接口

而是創建一個個FRHICommand對象(構成一個鏈表),並賦值給FExecuteRHIThreadTask對象的FRHICommandListBase* RHICmdList

接着FExecuteRHIThreadTask對象會被壓入到TaskGraph[ENamedThreads::RHIThread]的Queue隊列中

RHIThread依次處理TaskGraph[ENamedThreads::RHIThread]的Queue隊列中任務,執行FExecuteRHIThreadTask時

將該對象中FRHICommandListBase* RHICmdList鏈表中的FRHICommand轉換成圖形API的上下文的接口並調用執行

 

FRHICommand

FRHICommand是RHI模塊的渲染指令基類,這些指令通常由渲染線程通過命令隊列Push到RHI線程,在合適的時機由RHI線程執行。

FRHICommand繼承自FRHICommandBase,它們的定義如下:

// Engine\Source\Runtime\RHI\Public\RHICommandList.h

// RHI命令基類.
struct FRHICommandBase
{
    // 下一個命令. (命令鏈表的節點)
    FRHICommandBase* Next = nullptr;
    
    // 執行命令后銷毀.
    virtual void ExecuteAndDestruct(FRHICommandListBase& CmdList, FRHICommandListDebugContext& DebugContext) = 0;
};

template<typename TCmd, typename NameType = FUnnamedRhiCommand>
struct FRHICommand : public FRHICommandBase
{
    // 執行命令后銷毀.
    void ExecuteAndDestruct(FRHICommandListBase& CmdList, FRHICommandListDebugContext& Context) override final
    {
        TCmd *ThisCmd = static_cast<TCmd*>(this);
        ThisCmd->Execute(CmdList);
        ThisCmd->~TCmd();
    }
};

注:FRHICommandBase有指向下一個節點的Next變量,意味着FRHICommandBase是命令鏈表的節點。

 

FRHICommand擁有數量眾多的子類,是通過特殊的宏來快速聲明:

// 定義RHI命令子類的宏,會從FRHICommand上派生
#define FRHICOMMAND_MACRO(CommandName)                                \
struct PREPROCESSOR_JOIN(CommandName##String, __LINE__)                \
{                                                                    \
    static const TCHAR* TStr() { return TEXT(#CommandName); }        \
};                                                                    \
struct CommandName final : public FRHICommand<CommandName, PREPROCESSOR_JOIN(CommandName##String, __LINE__)>

// PREPROCESSOR_JOIN宏定義如下 詳見: UnrealEngine\Engine\Source\Runtime\Core\Public\HAL\PreprocessorHelpers.h // Concatenates two preprocessor tokens, performing macro expansion on them first #define PREPROCESSOR_JOIN(x, y) PREPROCESSOR_JOIN_INNER(x, y) #define PREPROCESSOR_JOIN_INNER(x, y) x##y

 

有了以上的宏,就可以快速定義FRHICommand的子類(亦即具體的RHI命令),以FRHICommandSetStencilRef為例:

FRHICOMMAND_MACRO(FRHICommandSetStencilRef)
{
    uint32 StencilRef;
    FORCEINLINE_DEBUGGABLE FRHICommandSetStencilRef(uint32 InStencilRef)
        : StencilRef(InStencilRef)
    {
    }
    RHI_API void Execute(FRHICommandListBase& CmdList);
};

展開宏定義之后,代碼如下:

struct FRHICommandSetStencilRefString853
{
    static const TCHAR* TStr() { return TEXT("FRHICommandSetStencilRef"); }
};

// FRHICommandSetStencilRef繼承了FRHICommand.
struct FRHICommandSetStencilRef final : public FRHICommand<FRHICommandSetStencilRef, FRHICommandSetStencilRefString853>
{
    uint32 StencilRef;
    FRHICommandSetStencilRef(uint32 InStencilRef)
        : StencilRef(InStencilRef)
    {
    }
    RHI_API void Execute(FRHICommandListBase& CmdList);
};

利用FRHICOMMAND_MACRO聲明的RHI命令數量眾多,下面列舉其中一部分:

FRHICOMMAND_MACRO(FRHISyncFrameCommand)
FRHICOMMAND_MACRO(FRHICommandStat)
FRHICOMMAND_MACRO(FRHICommandRHIThreadFence)
FRHICOMMAND_MACRO(FRHIAsyncComputeSubmitList)
FRHICOMMAND_MACRO(FRHICommandSubmitSubList)

FRHICOMMAND_MACRO(FRHICommandWaitForAndSubmitSubListParallel)
FRHICOMMAND_MACRO(FRHICommandWaitForAndSubmitSubList)
FRHICOMMAND_MACRO(FRHICommandWaitForAndSubmitRTSubList)
FRHICOMMAND_MACRO(FRHICommandWaitForTemporalEffect)
FRHICOMMAND_MACRO(FRHICommandBroadcastTemporalEffect)
    
FRHICOMMAND_MACRO(FRHICommandBeginUpdateMultiFrameResource)
FRHICOMMAND_MACRO(FRHICommandEndUpdateMultiFrameResource)
FRHICOMMAND_MACRO(FRHICommandBeginUpdateMultiFrameUAV)
FRHICOMMAND_MACRO(FRHICommandEndUpdateMultiFrameUAV)
FRHICOMMAND_MACRO(FRHICommandSetGPUMask)

FRHICOMMAND_MACRO(FRHICommandSetStencilRef)
FRHICOMMAND_MACRO(FRHICommandSetBlendFactor)
FRHICOMMAND_MACRO(FRHICommandSetStreamSource)
FRHICOMMAND_MACRO(FRHICommandSetStreamSource)
FRHICOMMAND_MACRO(FRHICommandSetViewport)
FRHICOMMAND_MACRO(FRHICommandSetScissorRect)
    
FRHICOMMAND_MACRO(FRHICommandBeginRenderPass)
FRHICOMMAND_MACRO(FRHICommandEndRenderPass)
FRHICOMMAND_MACRO(FRHICommandNextSubpass)
FRHICOMMAND_MACRO(FRHICommandBeginParallelRenderPass)
FRHICOMMAND_MACRO(FRHICommandEndParallelRenderPass)
FRHICOMMAND_MACRO(FRHICommandBeginRenderSubPass)
FRHICOMMAND_MACRO(FRHICommandEndRenderSubPass)
    
FRHICOMMAND_MACRO(FRHICommandDrawPrimitive)
FRHICOMMAND_MACRO(FRHICommandDrawIndexedPrimitive)
FRHICOMMAND_MACRO(FRHICommandDrawPrimitiveIndirect)
FRHICOMMAND_MACRO(FRHICommandDrawIndexedIndirect)
FRHICOMMAND_MACRO(FRHICommandDrawIndexedPrimitiveIndirect)
    
FRHICOMMAND_MACRO(FRHICommandSetGraphicsPipelineState)
FRHICOMMAND_MACRO(FRHICommandBeginUAVOverlap)
FRHICOMMAND_MACRO(FRHICommandEndUAVOverlap)

FRHICOMMAND_MACRO(FRHICommandSetDepthBounds)
FRHICOMMAND_MACRO(FRHICommandSetShadingRate)
FRHICOMMAND_MACRO(FRHICommandSetShadingRateImage)
FRHICOMMAND_MACRO(FRHICommandClearUAVFloat)
FRHICOMMAND_MACRO(FRHICommandCopyToResolveTarget)
FRHICOMMAND_MACRO(FRHICommandCopyTexture)
FRHICOMMAND_MACRO(FRHICommandBeginTransitions)
FRHICOMMAND_MACRO(FRHICommandEndTransitions)
FRHICOMMAND_MACRO(FRHICommandResourceTransition)
FRHICOMMAND_MACRO(FRHICommandClearColorTexture)
FRHICOMMAND_MACRO(FRHICommandClearDepthStencilTexture)
FRHICOMMAND_MACRO(FRHICommandClearColorTextures)

FRHICOMMAND_MACRO(FRHICommandSetGlobalUniformBuffers)
FRHICOMMAND_MACRO(FRHICommandBuildLocalUniformBuffer)

FRHICOMMAND_MACRO(FRHICommandBeginRenderQuery)
FRHICOMMAND_MACRO(FRHICommandEndRenderQuery)
FRHICOMMAND_MACRO(FRHICommandPollOcclusionQueries)

FRHICOMMAND_MACRO(FRHICommandBeginScene)
FRHICOMMAND_MACRO(FRHICommandEndScene)
FRHICOMMAND_MACRO(FRHICommandBeginFrame)
FRHICOMMAND_MACRO(FRHICommandEndFrame)
FRHICOMMAND_MACRO(FRHICommandBeginDrawingViewport)
FRHICOMMAND_MACRO(FRHICommandEndDrawingViewport)

FRHICOMMAND_MACRO(FRHICommandInvalidateCachedState)
FRHICOMMAND_MACRO(FRHICommandDiscardRenderTargets)

FRHICOMMAND_MACRO(FRHICommandUpdateTextureReference)
FRHICOMMAND_MACRO(FRHICommandUpdateRHIResources)
FRHICOMMAND_MACRO(FRHICommandBackBufferWaitTrackingBeginFrame)
FRHICOMMAND_MACRO(FRHICommandFlushTextureCacheBOP)
FRHICOMMAND_MACRO(FRHICommandCopyBufferRegion)
FRHICOMMAND_MACRO(FRHICommandCopyBufferRegions)

FRHICOMMAND_MACRO(FClearCachedRenderingDataCommand)
FRHICOMMAND_MACRO(FClearCachedElementDataCommand)

FRHICOMMAND_MACRO(FRHICommandRayTraceOcclusion)
FRHICOMMAND_MACRO(FRHICommandRayTraceIntersection)
FRHICOMMAND_MACRO(FRHICommandRayTraceDispatch)
FRHICOMMAND_MACRO(FRHICommandSetRayTracingBindings)
FRHICOMMAND_MACRO(FRHICommandClearRayTracingBindings)

這些RHI命令的void Execute(FRHICommandListBase& CmdList)函數大多實現在UnrealEngine\Engine\Source\Runtime\RHI\Public\RHICommandListCommandExecutes.inl文件中

void FRHICommandDrawPrimitive::Execute(FRHICommandListBase& CmdList)的實現體如下:

void FRHICommandDrawPrimitive::Execute(FRHICommandListBase& CmdList)
{
    RHISTAT(DrawPrimitive);  // 需開啟RHI_STATS宏,才能統計
    INTERNAL_DECORATOR(RHIDrawPrimitive)(BaseVertexIndex, NumPrimitives, NumInstances); //宏展開后為:CmdList.GetContext().RHIDrawPrimitive(BaseVertexIndex, NumPrimitives, NumInstances);
}

 

FRHICommand的子類除了以上用FRHICOMMAND_MACRO聲明的,還擁有以下直接派生的:

FRHICommandSetShaderParameter
FRHICommandSetShaderUniformBuffer
FRHICommandSetShaderTexture
FRHICommandSetShaderResourceViewParameter
FRHICommandSetUAVParameter
FRHICommandSetShaderSampler
FRHICommandSetComputeShader
FRHICommandSetComputePipelineState
FRHICommandDispatchComputeShader
FRHICommandDispatchIndirectComputeShader
FRHICommandSetAsyncComputeBudget
FRHICommandCopyToStagingBuffer
FRHICommandWriteGPUFence
FRHICommandSetLocalUniformBuffer
FRHICommandSubmitCommandsHint
FRHICommandPushEvent
FRHICommandPopEvent
FRHICommandBuildAccelerationStructure
FRHICommandBuildAccelerationStructures
......

無論是直接派生還是用FRHICOMMAND_MACRO宏,都是FRHICommand的子類,都可以提供給渲染線程操作的RHI層渲染命令。只是用FRHICOMMAND_MACRO會更簡便,少寫一些重復的代碼罷了。

因此可知,RHI命令種類繁多,主要包含以下幾大類:

  • 數據和資源的設置、更新、清理、轉換、拷貝、回讀。
  • 圖元繪制。
  • Pass、SubPass、場景、ViewPort等的開始和結束事件。
  • 柵欄、等待、廣播接口。
  • 光線追蹤。
  • Slate、調試相關的命令。

下面繪制出FRHICommand的核心繼承體系:

 

FRHICommandList

FRHICommandList是RHI的指令隊列,用來管理、執行一組FRHICommand的對象。它和父類的定義如下:

// Engine\Source\Runtime\RHI\Public\RHICommandList.h

// RHI命令列表基類.
class FRHICommandListBase : public FNoncopyable  // 不允許拷貝構造
{
public:
    ~FRHICommandListBase();

    // 附帶了循環利用的自定義new/delete操作.
    void* operator new(size_t Size);
    void operator delete(void *RawMemory);

    // 刷新命令隊列.
    inline void Flush();
    // 是否立即模式.
    inline bool IsImmediate();
    // 是否立即的異步計算.
    inline bool IsImmediateAsyncCompute();

    // 獲取已占用的內存.
    const int32 GetUsedMemory() const;
    
    // 入隊異步命令隊列的提交.
    void QueueAsyncCommandListSubmit(FGraphEventRef& AnyThreadCompletionEvent, class FRHICommandList* CmdList);
    // 入隊並行的異步命令隊列的提交.
    void QueueParallelAsyncCommandListSubmit(FGraphEventRef* AnyThreadCompletionEvents, bool bIsPrepass, class FRHICommandList** CmdLists, int32* NumDrawsIfKnown, int32 Num, int32 MinDrawsPerTranslate, bool bSpewMerge);
    // 入隊渲染線程命令隊列的提交.
    void QueueRenderThreadCommandListSubmit(FGraphEventRef& RenderThreadCompletionEvent, class FRHICommandList* CmdList);
    // 入隊命令隊列的提交.
    void QueueCommandListSubmit(class FRHICommandList* CmdList);
    // 增加派發前序任務.
    void AddDispatchPrerequisite(const FGraphEventRef& Prereq);
    
    // 等待接口.
    void WaitForTasks(bool bKnownToBeComplete = false);
    void WaitForDispatch();
    void WaitForRHIThreadTasks();
    void HandleRTThreadTaskCompletion(const FGraphEventRef& MyCompletionGraphEvent);

    // 分配接口.
    void* Alloc(int32 AllocSize, int32 Alignment);
    template <typename T>
    void* Alloc();
    template <typename T>
    const TArrayView<T> AllocArray(const TArrayView<T> InArray);
    TCHAR* AllocString(const TCHAR* Name);
    // 分配指令.
    void* AllocCommand(int32 AllocSize, int32 Alignment);
    template <typename TCmd>
    void* AllocCommand();

    bool HasCommands() const;
    bool IsExecuting() const;
    bool IsBottomOfPipe() const;
    bool IsTopOfPipe() const;
    bool IsGraphics() const;
    bool IsAsyncCompute() const;
    // RHI管線, ERHIPipeline::Graphics或ERHIPipeline::AsyncCompute.
    ERHIPipeline GetPipeline() const;

    // 是否忽略RHI線程而直接當同步執行.
    bool Bypass() const;

    // 交換命令隊列.
    void ExchangeCmdList(FRHICommandListBase& Other);
    // 設置Context.
    void SetContext(IRHICommandContext* InContext);
    IRHICommandContext& GetContext();
    void SetComputeContext(IRHIComputeContext* InComputeContext);
    IRHIComputeContext& GetComputeContext();
    void CopyContext(FRHICommandListBase& ParentCommandList);
    
    void MaybeDispatchToRHIThread();
    void MaybeDispatchToRHIThreadInner();
    
    (......)

private:
    // 命令鏈表的頭.
    FRHICommandBase* Root;
    // 指向Root的指針.
    FRHICommandBase** CommandLink;
    
    bool bExecuting;
    uint32 NumCommands;
    uint32 UID;
    
    // 設備上下文.
    IRHICommandContext* Context;
    // 計算上下文.
    IRHIComputeContext* ComputeContext;
    
    FMemStackBase MemManager; 
    FGraphEventArray RTTasks;

    // 重置.
    void Reset();

public:
    enum class ERenderThreadContext
    {
        SceneRenderTargets,
        Num
    };
    
    // 渲染線程上下文.
    void *RenderThreadContexts[(int32)ERenderThreadContext::Num];

protected:
    //the values of this struct must be copied when the commandlist is split 
    struct FPSOContext
    {
        uint32 CachedNumSimultanousRenderTargets = 0;
        TStaticArray<FRHIRenderTargetView, MaxSimultaneousRenderTargets> CachedRenderTargets;
        FRHIDepthRenderTargetView CachedDepthStencilTarget;
        
        ESubpassHint SubpassHint = ESubpassHint::None;
        uint8 SubpassIndex = 0;
        uint8 MultiViewCount = 0;
        bool HasFragmentDensityAttachment = false;
    } PSOContext;

    // 綁定的着色器輸入.
 FBoundShaderStateInput BoundShaderInput;
    // 綁定的計算着色器RHI資源.
    FRHIComputeShader* BoundComputeShaderRHI;

    // 使綁定的着色器生效.
    void ValidateBoundShader(FRHIVertexShader* ShaderRHI);
    void ValidateBoundShader(FRHIPixelShader* ShaderRHI);
    (......)

    void CacheActiveRenderTargets(...);
    void CacheActiveRenderTargets(const FRHIRenderPassInfo& Info);
    void IncrementSubpass();
    void ResetSubpass(ESubpassHint SubpassHint);
    
public:
    void CopyRenderThreadContexts(const FRHICommandListBase& ParentCommandList);
    void SetRenderThreadContext(void* InContext, ERenderThreadContext Slot);
    void* GetRenderThreadContext(ERenderThreadContext Slot);

    // 通用數據.
    struct FCommonData
    {
        class FRHICommandListBase* Parent = nullptr;

        enum class ECmdListType
        {
            Immediate = 1,
            Regular,
        };
        ECmdListType Type = ECmdListType::Regular;
        bool bInsideRenderPass = false;
        bool bInsideComputePass = false;
    };

    bool DoValidation() const;
    inline bool IsOutsideRenderPass() const;
    inline bool IsInsideRenderPass() const;
    inline bool IsInsideComputePass() const;

    FCommonData Data;
};

// 計算命令隊列.
class FRHIComputeCommandList : public FRHICommandListBase
{
public:
    FRHIComputeCommandList(FRHIGPUMask GPUMask) : FRHICommandListBase(GPUMask) {}
    
    void* operator new(size_t Size);
    void operator delete(void *RawMemory);

    // 着色器參數設置和獲取.
    inline FRHIComputeShader* GetBoundComputeShader() const;
    void SetGlobalUniformBuffers(const FUniformBufferStaticBindings& UniformBuffers);
    void SetShaderUniformBuffer(FRHIComputeShader* Shader, uint32 BaseIndex, FRHIUniformBuffer* UniformBuffer);
    void SetShaderUniformBuffer(const FComputeShaderRHIRef& Shader, uint32 BaseIndex, FRHIUniformBuffer* UniformBuffer);
    void SetShaderParameter(FRHIComputeShader* Shader, uint32 BufferIndex, uint32 BaseIndex, uint32 NumBytes, const void* NewValue);
    void SetShaderParameter(FComputeShaderRHIRef& Shader, uint32 BufferIndex, uint32 BaseIndex, uint32 NumBytes, const void* NewValue);
    void SetShaderTexture(FRHIComputeShader* Shader, uint32 TextureIndex, FRHITexture* Texture);
    void SetShaderResourceViewParameter(FRHIComputeShader* Shader, uint32 SamplerIndex, FRHIShaderResourceView* SRV);
    void SetShaderSampler(FRHIComputeShader* Shader, uint32 SamplerIndex, FRHISamplerState* State);
    void SetUAVParameter(FRHIComputeShader* Shader, uint32 UAVIndex, FRHIUnorderedAccessView* UAV);
    void SetUAVParameter(FRHIComputeShader* Shader, uint32 UAVIndex, FRHIUnorderedAccessView* UAV, uint32 InitialCount);
    void SetComputeShader(FRHIComputeShader* ComputeShader);
    void SetComputePipelineState(FComputePipelineState* ComputePipelineState, FRHIComputeShader* ComputeShader);

    void SetAsyncComputeBudget(EAsyncComputeBudget Budget);
    // 派發計算着色器.
    void DispatchComputeShader(uint32 ThreadGroupCountX, uint32 ThreadGroupCountY, uint32 ThreadGroupCountZ);
    void DispatchIndirectComputeShader(FRHIVertexBuffer* ArgumentBuffer, uint32 ArgumentOffset);

    // 清理.
    void ClearUAVFloat(FRHIUnorderedAccessView* UnorderedAccessViewRHI, const FVector4& Values);
    void ClearUAVUint(FRHIUnorderedAccessView* UnorderedAccessViewRHI, const FUintVector4& Values);
    
    // 資源轉換.
    void BeginTransitions(TArrayView<const FRHITransition*> Transitions);
    void EndTransitions(TArrayView<const FRHITransition*> Transitions);
    inline void Transition(TArrayView<const FRHITransitionInfo> Infos);
    void BeginTransition(const FRHITransition* Transition);
    void EndTransition(const FRHITransition* Transition);
    void Transition(const FRHITransitionInfo& Info)

    // ---- 舊有的API ----

    void TransitionResource(ERHIAccess TransitionType, const FTextureRHIRef& InTexture);
    void TransitionResource(ERHIAccess TransitionType, FRHITexture* InTexture);
    inline void TransitionResources(ERHIAccess TransitionType, FRHITexture* const* InTextures, int32 NumTextures);
    void TransitionResourceArrayNoCopy(ERHIAccess TransitionType, TArray<FRHITexture*>& InTextures);
    inline void TransitionResources(ERHIAccess TransitionType, EResourceTransitionPipeline /* ignored TransitionPipeline */, FRHIUnorderedAccessView* const* InUAVs, int32 NumUAVs, FRHIComputeFence* WriteFence);
    void TransitionResource(ERHIAccess TransitionType, EResourceTransitionPipeline TransitionPipeline, FRHIUnorderedAccessView* InUAV, FRHIComputeFence* WriteFence);
    void TransitionResource(ERHIAccess TransitionType, EResourceTransitionPipeline TransitionPipeline, FRHIUnorderedAccessView* InUAV);
    void TransitionResources(ERHIAccess TransitionType, EResourceTransitionPipeline TransitionPipeline, FRHIUnorderedAccessView* const* InUAVs, int32 NumUAVs);
    void WaitComputeFence(FRHIComputeFence* WaitFence);

    void BeginUAVOverlap();
    void EndUAVOverlap();
    void BeginUAVOverlap(FRHIUnorderedAccessView* UAV);
    void EndUAVOverlap(FRHIUnorderedAccessView* UAV);
    void BeginUAVOverlap(TArrayView<FRHIUnorderedAccessView* const> UAVs);
    void EndUAVOverlap(TArrayView<FRHIUnorderedAccessView* const> UAVs);

    void PushEvent(const TCHAR* Name, FColor Color);
    void PopEvent();
    void BreakPoint();

    void SubmitCommandsHint();
    void CopyToStagingBuffer(FRHIVertexBuffer* SourceBuffer, FRHIStagingBuffer* DestinationStagingBuffer, uint32 Offset, uint32 NumBytes);

    void WriteGPUFence(FRHIGPUFence* Fence);
    void SetGPUMask(FRHIGPUMask InGPUMask);

    (......)
};

// RHI命令隊列.
class FRHICommandList : public FRHIComputeCommandList
{
public:
    FRHICommandList(FRHIGPUMask GPUMask) : FRHIComputeCommandList(GPUMask) {}

    bool AsyncPSOCompileAllowed() const;

    void* operator new(size_t Size);
    void operator delete(void *RawMemory);
    
    // 獲取綁定的着色器.
    inline FRHIVertexShader* GetBoundVertexShader() const;
    inline FRHIHullShader* GetBoundHullShader() const;
    inline FRHIDomainShader* GetBoundDomainShader() const;
    inline FRHIPixelShader* GetBoundPixelShader() const;
    inline FRHIGeometryShader* GetBoundGeometryShader() const;

    // 更新多幀資源.
    void BeginUpdateMultiFrameResource(FRHITexture* Texture);
    void EndUpdateMultiFrameResource(FRHITexture* Texture);
    void BeginUpdateMultiFrameResource(FRHIUnorderedAccessView* UAV);
    void EndUpdateMultiFrameResource(FRHIUnorderedAccessView* UAV);

    // Uniform Buffer接口.
    FLocalUniformBuffer BuildLocalUniformBuffer(const void* Contents, uint32 ContentsSize, const FRHIUniformBufferLayout& Layout);
    template <typename TRHIShader>
    void SetLocalShaderUniformBuffer(TRHIShader* Shader, uint32 BaseIndex, const FLocalUniformBuffer& UniformBuffer);
    template <typename TShaderRHI>
    void SetLocalShaderUniformBuffer(const TRefCountPtr<TShaderRHI>& Shader, uint32 BaseIndex, const FLocalUniformBuffer& UniformBuffer);
    void SetShaderUniformBuffer(FRHIGraphicsShader* Shader, uint32 BaseIndex, FRHIUniformBuffer* UniformBuffer);
    template <typename TShaderRHI>
    FORCEINLINE void SetShaderUniformBuffer(const TRefCountPtr<TShaderRHI>& Shader, uint32 BaseIndex, FRHIUniformBuffer* UniformBuffer);
    
    // 着色器參數.
    void SetShaderParameter(FRHIGraphicsShader* Shader, uint32 BufferIndex, uint32 BaseIndex, uint32 NumBytes, const void* NewValue);
    template <typename TShaderRHI>
    void SetShaderParameter(const TRefCountPtr<TShaderRHI>& Shader, uint32 BufferIndex, uint32 BaseIndex, uint32 NumBytes, const void* NewValue);
    void SetShaderTexture(FRHIGraphicsShader* Shader, uint32 TextureIndex, FRHITexture* Texture);
    template <typename TShaderRHI>
    void SetShaderTexture(const TRefCountPtr<TShaderRHI>& Shader, uint32 TextureIndex, FRHITexture* Texture);
    void SetShaderResourceViewParameter(FRHIGraphicsShader* Shader, uint32 SamplerIndex, FRHIShaderResourceView* SRV);
    template <typename TShaderRHI>
    void SetShaderResourceViewParameter(const TRefCountPtr<TShaderRHI>& Shader, uint32 SamplerIndex, FRHIShaderResourceView* SRV);
    void SetShaderSampler(FRHIGraphicsShader* Shader, uint32 SamplerIndex, FRHISamplerState* State);
    template <typename TShaderRHI>
    void SetShaderSampler(const TRefCountPtr<TShaderRHI>& Shader, uint32 SamplerIndex, FRHISamplerState* State);
    void SetUAVParameter(FRHIPixelShader* Shader, uint32 UAVIndex, FRHIUnorderedAccessView* UAV);
    void SetUAVParameter(const TRefCountPtr<FRHIPixelShader>& Shader, uint32 UAVIndex, FRHIUnorderedAccessView* UAV);
    void SetBlendFactor(const FLinearColor& BlendFactor = FLinearColor::White);
    
    // 圖元繪制.
    void DrawPrimitive(uint32 BaseVertexIndex, uint32 NumPrimitives, uint32 NumInstances);
    void DrawIndexedPrimitive(FRHIIndexBuffer* IndexBuffer, int32 BaseVertexIndex, uint32 FirstInstance, uint32 NumVertices, uint32 StartIndex, uint32 NumPrimitives, uint32 NumInstances);
    void DrawPrimitiveIndirect(FRHIVertexBuffer* ArgumentBuffer, uint32 ArgumentOffset);
    void DrawIndexedIndirect(FRHIIndexBuffer* IndexBufferRHI, FRHIStructuredBuffer* ArgumentsBufferRHI, uint32 DrawArgumentsIndex, uint32 NumInstances);
    void DrawIndexedPrimitiveIndirect(FRHIIndexBuffer* IndexBuffer, FRHIVertexBuffer* ArgumentsBuffer, uint32 ArgumentOffset);
    
    // 設置數據.
    void SetStreamSource(uint32 StreamIndex, FRHIVertexBuffer* VertexBuffer, uint32 Offset);
    void SetStencilRef(uint32 StencilRef);
    void SetViewport(float MinX, float MinY, float MinZ, float MaxX, float MaxY, float MaxZ);
    void SetStereoViewport(float LeftMinX, float RightMinX, float LeftMinY, float RightMinY, float MinZ, float LeftMaxX, float RightMaxX, float LeftMaxY, float RightMaxY, float MaxZ);
    void SetScissorRect(bool bEnable, uint32 MinX, uint32 MinY, uint32 MaxX, uint32 MaxY);
    void ApplyCachedRenderTargets(FGraphicsPipelineStateInitializer& GraphicsPSOInit);
    void SetGraphicsPipelineState(class FGraphicsPipelineState* GraphicsPipelineState, const FBoundShaderStateInput& ShaderInput, bool bApplyAdditionalState);
    void SetDepthBounds(float MinDepth, float MaxDepth);
    void SetShadingRate(EVRSShadingRate ShadingRate, EVRSRateCombiner Combiner);
    void SetShadingRateImage(FRHITexture* RateImageTexture, EVRSRateCombiner Combiner);
    
    // 拷貝紋理.
    void CopyToResolveTarget(FRHITexture* SourceTextureRHI, FRHITexture* DestTextureRHI, const FResolveParams& ResolveParams);
    void CopyTexture(FRHITexture* SourceTextureRHI, FRHITexture* DestTextureRHI, const FRHICopyTextureInfo& CopyInfo);
    
    void ResummarizeHTile(FRHITexture2D* DepthTexture);
    
    // 渲染查詢.
    void BeginRenderQuery(FRHIRenderQuery* RenderQuery)
    void EndRenderQuery(FRHIRenderQuery* RenderQuery)
    void CalibrateTimers(FRHITimestampCalibrationQuery* CalibrationQuery);
    void PollOcclusionQueries()

    /* LEGACY API */
    void TransitionResource(FExclusiveDepthStencil DepthStencilMode, FRHITexture* DepthTexture);
    void BeginRenderPass(const FRHIRenderPassInfo& InInfo, const TCHAR* Name);
    void EndRenderPass();
    void NextSubpass();

    // 下面接口需要在立即模式的命令隊列調用.
    void BeginScene();
    void EndScene();
    void BeginDrawingViewport(FRHIViewport* Viewport, FRHITexture* RenderTargetRHI);
    void EndDrawingViewport(FRHIViewport* Viewport, bool bPresent, bool bLockToVsync);
    void BeginFrame();
    void EndFrame();

    void RHIInvalidateCachedState();
    void DiscardRenderTargets(bool Depth, bool Stencil, uint32 ColorBitMask);
    
    void CopyBufferRegion(FRHIVertexBuffer* DestBuffer, uint64 DstOffset, FRHIVertexBuffer* SourceBuffer, uint64 SrcOffset, uint64 NumBytes);

    (......)
};

 

FRHICommandListBase定義了命令隊列所需的基本數據(命令列表、設備上下文)和接口(命令的刷新、等待、入隊、派發等,內存分配)。

FRHIComputeCommandList定義了計算着色器相關的接口、GPU資源狀態轉換和着色器部分參數的設置。

FRHICommandList定義了普通渲染管線的接口,與IRHICommandContext的接口高度相似且重疊。包含VS、PS、GS的綁定,圖元繪制,更多着色器參數的設置和資源狀態轉換,資源創建、更新和等待等等。

 

FRHICommandList還有數個子類,定義如下:

// 立即模式的命令隊列.
class FRHICommandListImmediate : public FRHICommandList
{
    // 命令匿名函數.
    template <typename LAMBDA>
    struct TRHILambdaCommand final : public FRHICommandBase
    {
        LAMBDA Lambda;

        void ExecuteAndDestruct(FRHICommandListBase& CmdList, FRHICommandListDebugContext&) override final;
    };

    FRHICommandListImmediate();
    ~FRHICommandListImmediate();
    
public:
    // 立即刷新命令.
    void ImmediateFlush(EImmediateFlushType::Type FlushType);
    // 阻塞RHI線程.
    bool StallRHIThread();
    // 取消阻塞RHI線程.
    void UnStallRHIThread();
    // 是否阻塞中.
    static bool IsStalled();

    void SetCurrentStat(TStatId Stat);

    static FGraphEventRef RenderThreadTaskFence();
    static FGraphEventArray& GetRenderThreadTaskArray();
    static void WaitOnRenderThreadTaskFence(FGraphEventRef& Fence);
    static bool AnyRenderThreadTasksOutstanding();
    FGraphEventRef RHIThreadFence(bool bSetLockFence = false);

    // 將給定的異步計算命令列表按當前立即命令列表的順序排列.
    void QueueAsyncCompute(FRHIComputeCommandList& RHIComputeCmdList);

    bool IsBottomOfPipe();
    bool IsTopOfPipe();
    template <typename LAMBDA>
    void EnqueueLambda(LAMBDA&& Lambda);

    // 資源創建.
    FSamplerStateRHIRef CreateSamplerState(const FSamplerStateInitializerRHI& Initializer)
    FRasterizerStateRHIRef CreateRasterizerState(const FRasterizerStateInitializerRHI& Initializer)
    FDepthStencilStateRHIRef CreateDepthStencilState(const FDepthStencilStateInitializerRHI& Initializer)
    FBlendStateRHIRef CreateBlendState(const FBlendStateInitializerRHI& Initializer)
    FPixelShaderRHIRef CreatePixelShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
    FVertexShaderRHIRef CreateVertexShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
    FHullShaderRHIRef CreateHullShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
    FDomainShaderRHIRef CreateDomainShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
    FGeometryShaderRHIRef CreateGeometryShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
    FComputeShaderRHIRef CreateComputeShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
    FComputeFenceRHIRef CreateComputeFence(const FName& Name)
    FGPUFenceRHIRef CreateGPUFence(const FName& Name)
    FStagingBufferRHIRef CreateStagingBuffer()
    FBoundShaderStateRHIRef CreateBoundShaderState(...)
    FGraphicsPipelineStateRHIRef CreateGraphicsPipelineState(const FGraphicsPipelineStateInitializer& Initializer)
    TRefCountPtr<FRHIComputePipelineState> CreateComputePipelineState(FRHIComputeShader* ComputeShader)
    FUniformBufferRHIRef CreateUniformBuffer(...)
    FIndexBufferRHIRef CreateAndLockIndexBuffer(uint32 Stride, uint32 Size, EBufferUsageFlags InUsage, ERHIAccess InResourceState, FRHIResourceCreateInfo& CreateInfo, void*& OutDataBuffer)
    FIndexBufferRHIRef CreateAndLockIndexBuffer(uint32 Stride, uint32 Size, uint32 InUsage, FRHIResourceCreateInfo& CreateInfo, void*& OutDataBuffer)
    
    // 頂點/索引接口.
    void* LockIndexBuffer(FRHIIndexBuffer* IndexBuffer, uint32 Offset, uint32 SizeRHI, EResourceLockMode LockMode);
    void UnlockIndexBuffer(FRHIIndexBuffer* IndexBuffer);
    void* LockStagingBuffer(FRHIStagingBuffer* StagingBuffer, FRHIGPUFence* Fence, uint32 Offset, uint32 SizeRHI);
    void UnlockStagingBuffer(FRHIStagingBuffer* StagingBuffer);
    FVertexBufferRHIRef CreateAndLockVertexBuffer(uint32 Size, EBufferUsageFlags InUsage, ...);
    FVertexBufferRHIRef CreateAndLockVertexBuffer(uint32 Size, uint32 InUsage, FRHIResourceCreateInfo& CreateInfo, void*& OutDataBuffer);
    void* LockVertexBuffer(FRHIVertexBuffer* VertexBuffer, uint32 Offset, uint32 SizeRHI, EResourceLockMode LockMode);
    void UnlockVertexBuffer(FRHIVertexBuffer* VertexBuffer);
    void CopyVertexBuffer(FRHIVertexBuffer* SourceBuffer, FRHIVertexBuffer* DestBuffer);
    void* LockStructuredBuffer(FRHIStructuredBuffer* StructuredBuffer, uint32 Offset, uint32 SizeRHI, EResourceLockMode LockMode);
    void UnlockStructuredBuffer(FRHIStructuredBuffer* StructuredBuffer);
    
    // UAV/SRV創建.
    FUnorderedAccessViewRHIRef CreateUnorderedAccessView(FRHIStructuredBuffer* StructuredBuffer, bool bUseUAVCounter, bool bAppendBuffer)
    FUnorderedAccessViewRHIRef CreateUnorderedAccessView(FRHITexture* Texture, uint32 MipLevel)
    FUnorderedAccessViewRHIRef CreateUnorderedAccessView(FRHITexture* Texture, uint32 MipLevel, uint8 Format)
    FUnorderedAccessViewRHIRef CreateUnorderedAccessView(FRHIVertexBuffer* VertexBuffer, uint8 Format)
    FUnorderedAccessViewRHIRef CreateUnorderedAccessView(FRHIIndexBuffer* IndexBuffer, uint8 Format)
    FShaderResourceViewRHIRef CreateShaderResourceView(FRHIStructuredBuffer* StructuredBuffer)
    FShaderResourceViewRHIRef CreateShaderResourceView(FRHIVertexBuffer* VertexBuffer, uint32 Stride, uint8 Format)
    FShaderResourceViewRHIRef CreateShaderResourceView(const FShaderResourceViewInitializer& Initializer)
    FShaderResourceViewRHIRef CreateShaderResourceView(FRHIIndexBuffer* Buffer)
        
    uint64 CalcTexture2DPlatformSize(...);
    uint64 CalcTexture3DPlatformSize(...);
    uint64 CalcTextureCubePlatformSize(...);
    
    // 紋理操作.
    void GetTextureMemoryStats(FTextureMemoryStats& OutStats);
    bool GetTextureMemoryVisualizeData(...);
    void CopySharedMips(FRHITexture2D* DestTexture2D, FRHITexture2D* SrcTexture2D);
    void TransferTexture(FRHITexture2D* Texture, FIntRect Rect, uint32 SrcGPUIndex, uint32 DestGPUIndex, bool PullData);
    void TransferTextures(const TArrayView<const FTransferTextureParams> Params);
    void GetResourceInfo(FRHITexture* Ref, FRHIResourceInfo& OutInfo);
    FShaderResourceViewRHIRef CreateShaderResourceView(FRHITexture* Texture, const FRHITextureSRVCreateInfo& CreateInfo);
    FShaderResourceViewRHIRef CreateShaderResourceView(FRHITexture* Texture, uint8 MipLevel);
    FShaderResourceViewRHIRef CreateShaderResourceView(FRHITexture* Texture, uint8 MipLevel, uint8 NumMipLevels, uint8 Format);
    FShaderResourceViewRHIRef CreateShaderResourceViewWriteMask(FRHITexture2D* Texture2DRHI);
    FShaderResourceViewRHIRef CreateShaderResourceViewFMask(FRHITexture2D* Texture2DRHI);
    uint32 ComputeMemorySize(FRHITexture* TextureRHI);
    FTexture2DRHIRef AsyncReallocateTexture2D(...);
    ETextureReallocationStatus FinalizeAsyncReallocateTexture2D(FRHITexture2D* Texture2D, bool bBlockUntilCompleted);
    ETextureReallocationStatus CancelAsyncReallocateTexture2D(FRHITexture2D* Texture2D, bool bBlockUntilCompleted);
    void* LockTexture2D(...);
    void UnlockTexture2D(FRHITexture2D* Texture, uint32 MipIndex, bool bLockWithinMiptail, bool bFlushRHIThread = true);
    void* LockTexture2DArray(...);
    void UnlockTexture2DArray(FRHITexture2DArray* Texture, uint32 TextureIndex, uint32 MipIndex, bool bLockWithinMiptail);
    void UpdateTexture2D(...);
    void UpdateFromBufferTexture2D(...);
    FUpdateTexture3DData BeginUpdateTexture3D(...);
    void EndUpdateTexture3D(FUpdateTexture3DData& UpdateData);
    void EndMultiUpdateTexture3D(TArray<FUpdateTexture3DData>& UpdateDataArray);
    void UpdateTexture3D(...);
    void* LockTextureCubeFace(...);
    void UnlockTextureCubeFace(FRHITextureCube* Texture, ...);

    // 讀取紋理表面數據.
    void ReadSurfaceData(FRHITexture* Texture, ...);
    void ReadSurfaceData(FRHITexture* Texture, ...);
    void MapStagingSurface(FRHITexture* Texture, void*& OutData, int32& OutWidth, int32& OutHeight);
    void MapStagingSurface(FRHITexture* Texture, ...);
    void UnmapStagingSurface(FRHITexture* Texture);
    void ReadSurfaceFloatData(FRHITexture* Texture, ...);
    void ReadSurfaceFloatData(FRHITexture* Texture, ...);
    void Read3DSurfaceFloatData(FRHITexture* Texture,...);
    
    // 渲染線程的資源狀態轉換.
    void AcquireTransientResource_RenderThread(FRHITexture* Texture);
    void DiscardTransientResource_RenderThread(FRHITexture* Texture);
    void AcquireTransientResource_RenderThread(FRHIVertexBuffer* Buffer);
    void DiscardTransientResource_RenderThread(FRHIVertexBuffer* Buffer);
    void AcquireTransientResource_RenderThread(FRHIStructuredBuffer* Buffer);
    void DiscardTransientResource_RenderThread(FRHIStructuredBuffer* Buffer);
   
    // 獲取渲染查詢結果.
    bool GetRenderQueryResult(FRHIRenderQuery* RenderQuery, ...);
    void PollRenderQueryResults();
    
    // 視口
    FViewportRHIRef CreateViewport(void* WindowHandle, ...);
    uint32 GetViewportNextPresentGPUIndex(FRHIViewport* Viewport);
    FTexture2DRHIRef GetViewportBackBuffer(FRHIViewport* Viewport);
    void AdvanceFrameForGetViewportBackBuffer(FRHIViewport* Viewport);
    void ResizeViewport(FRHIViewport* Viewport, ...);
    
    void AcquireThreadOwnership();
    void ReleaseThreadOwnership();
    
    // 提交命令並刷新到GPU.
    void SubmitCommandsAndFlushGPU();
    // 執行命令隊列.
    void ExecuteCommandList(FRHICommandList* CmdList);
    
    // 更新資源.
    void UpdateTextureReference(FRHITextureReference* TextureRef, FRHITexture* NewTexture);
    void UpdateRHIResources(FRHIResourceUpdateInfo* UpdateInfos, int32 Num, bool bNeedReleaseRefs);
    // 刷新資源.
    void FlushResources();
    
    // 幀更新.
    void Tick(float DeltaTime);
    // 阻塞直到GPU空閑. // 強制把當前的所有rhi中指令執行完畢,並且把commandbuffer發送給gpu,並且等待gpu執行完成,相當於一個強制同步到GPU的過程.
    void BlockUntilGPUIdle();
    
    // 暫停/開啟渲染.
    void SuspendRendering();
    void ResumeRendering();
    bool IsRenderingSuspended();
    
    // 壓縮/解壓數據.
    bool EnqueueDecompress(uint8_t* SrcBuffer, uint8_t* DestBuffer, int CompressedSize, void* ErrorCodeBuffer);
    bool EnqueueCompress(uint8_t* SrcBuffer, uint8_t* DestBuffer, int UnCompressedSize, void* ErrorCodeBuffer);
    
    // 其它接口.
    bool GetAvailableResolutions(FScreenResolutionArray& Resolutions, bool bIgnoreRefreshRate);
    void GetSupportedResolution(uint32& Width, uint32& Height);
    void VirtualTextureSetFirstMipInMemory(FRHITexture2D* Texture, uint32 FirstMip);
    void VirtualTextureSetFirstMipVisible(FRHITexture2D* Texture, uint32 FirstMip);

    // 獲取原生的數據.
    void* GetNativeDevice();
    void* GetNativeInstance();
    // 獲取立即模式的命令上下文.
    IRHICommandContext* GetDefaultContext();
    // 獲取命令上下文容器.
    IRHICommandContextContainer* GetCommandContextContainer(int32 Index, int32 Num);
    
    uint32 GetGPUFrameCycles();
};

// 在RHI實現中標記命令列表的遞歸使用的類型定義.
class FRHICommandList_RecursiveHazardous : public FRHICommandList
{
public:
    FRHICommandList_RecursiveHazardous(IRHICommandContext *Context, FRHIGPUMask InGPUMask = FRHIGPUMask::All());
};

// RHI內部使用的工具類,以更安全地使用FRHICommandList_RecursiveHazardous
template <typename ContextType>
class TRHICommandList_RecursiveHazardous : public FRHICommandList_RecursiveHazardous
{
    template <typename LAMBDA>
    struct TRHILambdaCommand final : public FRHICommandBase
    {
        LAMBDA Lambda;

        TRHILambdaCommand(LAMBDA&& InLambda);
        void ExecuteAndDestruct(FRHICommandListBase& CmdList, FRHICommandListDebugContext&) override final;
    };

public:
    TRHICommandList_RecursiveHazardous(ContextType *Context, FRHIGPUMask GPUMask = FRHIGPUMask::All());

    template <typename LAMBDA>
    void RunOnContext(LAMBDA&& Lambda);
};

 

FRHICommandListImmediate封裝了立即模式的圖形API接口,在UE渲染體系中被應用得非常廣泛。它額外定義了資源的操作、創建、更新、讀取和狀態轉換接口,也增加了線程同步和GPU同步的接口。

 

下面對FRHICommandList核心繼承體系來個UML圖總結一下:

 

RHI體系總覽

若拋開圖形API的具體實現細節和眾多的RHI具體子類,將RHI Context/CommandList/Command/Resource等的頂層概念匯總成UML關系圖,則是如下模樣:

 

下圖是在上面的基礎上細化了子類的UML:

 

RHI命令執行

FRHICommandListExecutor

FRHICommandListExecutor負責將Renderer層的RHI中間指令轉譯(或直接調用)到目標平台的圖形API,它在RHI體系中起着舉足輕重的作用,定義如下:

// Engine\Source\Runtime\RHI\Public\RHICommandList.h

class RHI_API FRHICommandListExecutor
{
public:
    enum
    {
        DefaultBypass = PLATFORM_RHITHREAD_DEFAULT_BYPASS
    };
    FRHICommandListExecutor()
        : bLatchedBypass(!!DefaultBypass)
        , bLatchedUseParallelAlgorithms(false)
    {
    }
    
    // 靜態接口, 獲取立即命令列表.
    static inline FRHICommandListImmediate& GetImmediateCommandList();
    // 靜態接口, 獲取立即異步計算命令列表.
    static inline FRHIAsyncComputeCommandListImmediate& GetImmediateAsyncComputeCommandList();

    // 執行命令列表.
    void ExecuteList(FRHICommandListBase& CmdList);
    void ExecuteList(FRHICommandListImmediate& CmdList);
    void LatchBypass();

    // 等待RHI線程柵欄.
    static void WaitOnRHIThreadFence(FGraphEventRef& Fence);

    // 是否繞過命令生成模式, 如果是, 則直接調用目標平台的圖形API.
    FORCEINLINE_DEBUGGABLE bool Bypass()
    {
#if CAN_TOGGLE_COMMAND_LIST_BYPASS
        return bLatchedBypass;
#else
        return !!DefaultBypass;
#endif
    }
    // 是否使用並行算法.
    FORCEINLINE_DEBUGGABLE bool UseParallelAlgorithms()
    {
#if CAN_TOGGLE_COMMAND_LIST_BYPASS
        return bLatchedUseParallelAlgorithms;
#else
        return  FApp::ShouldUseThreadingForPerformance() && !Bypass() && (GSupportsParallelRenderingTasksWithSeparateRHIThread || !IsRunningRHIInSeparateThread());
#endif
    }
    static void CheckNoOutstandingCmdLists();
    static bool IsRHIThreadActive();
    static bool IsRHIThreadCompletelyFlushed();

private:
    // 內部執行.
    void ExecuteInner(FRHICommandListBase& CmdList);
    // 內部執行, 真正執行轉譯.
    static void ExecuteInner_DoExecute(FRHICommandListBase& CmdList);

    bool bLatchedBypass;
    bool bLatchedUseParallelAlgorithms;
    
    // 同步變量.
    FThreadSafeCounter UIDCounter;
    FThreadSafeCounter OutstandingCmdListCount;
    
    // 立即模式的命令隊列.
    FRHICommandListImmediate CommandListImmediate;
    // 立即模式的異步計算命令隊列.
    FRHIAsyncComputeCommandListImmediate AsyncComputeCmdListImmediate;
};

下面是FRHICommandListExecutor部分重要接口的實現代碼:

// Engine\Source\Runtime\RHI\Private\RHICommandList.cpp

// 檢測RHI線程是否激活狀態.
bool FRHICommandListExecutor::IsRHIThreadActive()
{
    // 是否異步提交.
    bool bAsyncSubmit = CVarRHICmdAsyncRHIThreadDispatch.GetValueOnRenderThread() > 0;  // r.RHICmdAsyncRHIThreadDispatch // 1. 先檢測是否存在未完成的子命令列表提交任務.
    if (bAsyncSubmit)
    {
        if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
        {
            RenderThreadSublistDispatchTask = nullptr;
        }
        if (RenderThreadSublistDispatchTask.GetReference())
        {
            return true; // it might become active at any time
        }
        // otherwise we can safely look at RHIThreadTask
    }

    // 2. 再檢測是否存在未完成的RHI線程任務.
    if (RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
    {
        RHIThreadTask = nullptr;
        PrevRHIThreadTask = nullptr;
    }
    return !!RHIThreadTask.GetReference();
}

// 檢測RHI線程是否完全刷新了數據.
bool FRHICommandListExecutor::IsRHIThreadCompletelyFlushed()
{
    if (IsRHIThreadActive() || GetImmediateCommandList().HasCommands())
    {
        return false;
    }
    if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
    {
#if NEEDS_DEBUG_INFO_ON_PRESENT_HANG
        bRenderThreadSublistDispatchTaskClearedOnRT = IsInActualRenderingThread();
        bRenderThreadSublistDispatchTaskClearedOnGT = IsInGameThread();
#endif
        RenderThreadSublistDispatchTask = nullptr;
    }
    return !RenderThreadSublistDispatchTask;
}

void FRHICommandListExecutor::ExecuteList(FRHICommandListImmediate& CmdList)
{
    {
        SCOPE_CYCLE_COUNTER(STAT_ImmedCmdListExecuteTime);
        ExecuteInner(CmdList);
    }
}

void FRHICommandListExecutor::ExecuteList(FRHICommandListBase& CmdList)
{
    // 執行命令隊列轉換之前先刷新已有的命令.
    if (IsInRenderingThread() && !GetImmediateCommandList().IsExecuting())
    {
        GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
    }

    // 內部執行.
    ExecuteInner(CmdList);
}

void FRHICommandListExecutor::ExecuteInner(FRHICommandListBase& CmdList)
{
    // 是否在渲染線程中.
    bool bIsInRenderingThread = IsInRenderingThread();
    // 是否在游戲線程中.
    bool bIsInGameThread = IsInGameThread();
    
    // 開啟了專用的RHI線程.
    if (IsRunningRHIInSeparateThread())
    {
        bool bAsyncSubmit = false;
        ENamedThreads::Type RenderThread_Local = ENamedThreads::GetRenderThread_Local();
        if (bIsInRenderingThread)
        {
            if (!bIsInGameThread && !FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
            {
                // 把所有需要傳遞的東西都處理掉.
                FTaskGraphInterface::Get().ProcessThreadUntilIdle(RenderThread_Local);
            }
            // 檢測子命令列表任務是否完成.
            bAsyncSubmit = CVarRHICmdAsyncRHIThreadDispatch.GetValueOnRenderThread() > 0;  // r.RHICmdAsyncRHIThreadDispatch if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
            {
                RenderThreadSublistDispatchTask = nullptr;
                if (bAsyncSubmit && RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
                {
                    RHIThreadTask = nullptr;
                    PrevRHIThreadTask = nullptr;
                }
            }
            // 檢測RHI線程任務是否完成.
            if (!bAsyncSubmit && RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
            {
                RHIThreadTask = nullptr;
                PrevRHIThreadTask = nullptr;
            }
        }
        
        if (CVarRHICmdUseThread.GetValueOnRenderThread() > 0 && bIsInRenderingThread && !bIsInGameThread) // r.RHICmdUseThread
        {
             // 交換前序和RT線程任務的列表.
            FRHICommandList* SwapCmdList;
            FGraphEventArray Prereq;
            Exchange(Prereq, CmdList.RTTasks); 
            {
                QUICK_SCOPE_CYCLE_COUNTER(STAT_FRHICommandListExecutor_SwapCmdLists);
                SwapCmdList = new FRHICommandList(CmdList.GetGPUMask());

                static_assert(sizeof(FRHICommandList) == sizeof(FRHICommandListImmediate), "We are memswapping FRHICommandList and FRHICommandListImmediate; they need to be swappable.");
                SwapCmdList->ExchangeCmdList(CmdList);
                CmdList.CopyContext(*SwapCmdList);
                CmdList.GPUMask = SwapCmdList->GPUMask;
                CmdList.InitialGPUMask = SwapCmdList->GPUMask;
                CmdList.PSOContext = SwapCmdList->PSOContext;
                CmdList.Data.bInsideRenderPass = SwapCmdList->Data.bInsideRenderPass;
                CmdList.Data.bInsideComputePass = SwapCmdList->Data.bInsideComputePass;
            }
            
            // 提交任務.
            QUICK_SCOPE_CYCLE_COUNTER(STAT_FRHICommandListExecutor_SubmitTasks);

            // 創建FDispatchRHIThreadTask, 並將AllOutstandingTasks和RenderThreadSublistDispatchTask作為它的前序任務.
            if (AllOutstandingTasks.Num() || RenderThreadSublistDispatchTask.GetReference())
            {
                Prereq.Append(AllOutstandingTasks);
                AllOutstandingTasks.Reset();
                if (RenderThreadSublistDispatchTask.GetReference())
                {
                    Prereq.Add(RenderThreadSublistDispatchTask);
                }
                RenderThreadSublistDispatchTask = TGraphTask<FDispatchRHIThreadTask>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(SwapCmdList, bAsyncSubmit);
            }
            // 創建FExecuteRHIThreadTask, 並將RHIThreadTask作為它的前序任務.
            else
            {
                if (RHIThreadTask.GetReference())
                {
                    Prereq.Add(RHIThreadTask);
                }
                PrevRHIThreadTask = RHIThreadTask;
                RHIThreadTask = TGraphTask<FExecuteRHIThreadTask>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(SwapCmdList);
            }
            
            if (CVarRHICmdForceRHIFlush.GetValueOnRenderThread() > 0 ) // r.RHICmdForceRHIFlush
            {
                // 檢測渲染線程是否死鎖.
                if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
                {
                    // this is a deadlock. RT tasks must be done by now or they won't be done. We could add a third queue...
                    UE_LOG(LogRHI, Fatal, TEXT("Deadlock in FRHICommandListExecutor::ExecuteInner 2."));
                }
                
                // 檢測RenderThreadSublistDispatchTask是否完成.
                if (RenderThreadSublistDispatchTask.GetReference())
                {
                    FTaskGraphInterface::Get().WaitUntilTaskCompletes(RenderThreadSublistDispatchTask, RenderThread_Local);
                    RenderThreadSublistDispatchTask = nullptr;
                }
                
                // 等待RHIThreadTask完成.
                while (RHIThreadTask.GetReference())
                {
                    FTaskGraphInterface::Get().WaitUntilTaskCompletes(RHIThreadTask, RenderThread_Local);
                    if (RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
                    {
                        RHIThreadTask = nullptr;
                        PrevRHIThreadTask = nullptr;
                    }
                }
            }
            
            return;
        }
        
        // 執行RTTasks/RenderThreadSublistDispatchTask/RHIThreadTask等任務.
        if (bIsInRenderingThread)
        {
            if (CmdList.RTTasks.Num())
            {
                if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
                {
                    UE_LOG(LogRHI, Fatal, TEXT("Deadlock in FRHICommandListExecutor::ExecuteInner (RTTasks)."));
                }
                FTaskGraphInterface::Get().WaitUntilTasksComplete(CmdList.RTTasks, RenderThread_Local);
                CmdList.RTTasks.Reset();

            }
            if (RenderThreadSublistDispatchTask.GetReference())
            {
                if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
                {
                    // this is a deadlock. RT tasks must be done by now or they won't be done. We could add a third queue...
                    UE_LOG(LogRHI, Fatal, TEXT("Deadlock in FRHICommandListExecutor::ExecuteInner (RenderThreadSublistDispatchTask)."));
                }
                FTaskGraphInterface::Get().WaitUntilTaskCompletes(RenderThreadSublistDispatchTask, RenderThread_Local);
#if NEEDS_DEBUG_INFO_ON_PRESENT_HANG
                bRenderThreadSublistDispatchTaskClearedOnRT = IsInActualRenderingThread();
                bRenderThreadSublistDispatchTaskClearedOnGT = bIsInGameThread;
#endif
                RenderThreadSublistDispatchTask = nullptr;
            }
            while (RHIThreadTask.GetReference())
            {
                if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
                {
                    // this is a deadlock. RT tasks must be done by now or they won't be done. We could add a third queue...
                    UE_LOG(LogRHI, Fatal, TEXT("Deadlock in FRHICommandListExecutor::ExecuteInner (RHIThreadTask)."));
                }
                FTaskGraphInterface::Get().WaitUntilTaskCompletes(RHIThreadTask, RenderThread_Local);
                if (RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
                {
                    RHIThreadTask = nullptr;
                    PrevRHIThreadTask = nullptr;
                }
            }
        }
    }
    // 非RHI專用線程.
    else
    {
        if (bIsInRenderingThread && CmdList.RTTasks.Num())
        {
            ENamedThreads::Type RenderThread_Local = ENamedThreads::GetRenderThread_Local();
            if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
            {
                // this is a deadlock. RT tasks must be done by now or they won't be done. We could add a third queue...
                UE_LOG(LogRHI, Fatal, TEXT("Deadlock in FRHICommandListExecutor::ExecuteInner (RTTasks)."));
            }
            FTaskGraphInterface::Get().WaitUntilTasksComplete(CmdList.RTTasks, RenderThread_Local);
            CmdList.RTTasks.Reset();
        }
    }

    // 內部執行命令.
    ExecuteInner_DoExecute(CmdList);
}

void FRHICommandListExecutor::ExecuteInner_DoExecute(FRHICommandListBase& CmdList)
{
    FScopeCycleCounter ScopeOuter(CmdList.ExecuteStat);

    CmdList.bExecuting = true;
    check(CmdList.Context || CmdList.ComputeContext);

    FMemMark Mark(FMemStack::Get());

    // 設置多GPU的Mask.
#if WITH_MGPU
    if (CmdList.Context != nullptr)
    {
        CmdList.Context->RHISetGPUMask(CmdList.InitialGPUMask);
    }
    if (CmdList.ComputeContext != nullptr && CmdList.ComputeContext != CmdList.Context)
    {
        CmdList.ComputeContext->RHISetGPUMask(CmdList.InitialGPUMask);
    }
#endif

    FRHICommandListDebugContext DebugContext;
    FRHICommandListIterator Iter(CmdList);
    // 統計執行信息.
#if STATS
    bool bDoStats =  CVarRHICmdCollectRHIThreadStatsFromHighLevel.GetValueOnRenderThread() > 0 && FThreadStats::IsCollectingData() && (IsInRenderingThread() || IsInRHIThread()); //r.RHICmdCollectRHIThreadStatsFromHighLevel if (bDoStats)
    {
        while (Iter.HasCommandsLeft())
        {
            TStatIdData const* Stat = GCurrentExecuteStat.GetRawPointer();
            FScopeCycleCounter Scope(GCurrentExecuteStat);
            while (Iter.HasCommandsLeft() && Stat == GCurrentExecuteStat.GetRawPointer())
            {
                FRHICommandBase* Cmd = Iter.NextCommand();
                Cmd->ExecuteAndDestruct(CmdList, DebugContext);
            }
        }
    }
    else
    // 統計指定事件.
#elif ENABLE_STATNAMEDEVENTS
    bool bDoStats = CVarRHICmdCollectRHIThreadStatsFromHighLevel.GetValueOnRenderThread() > 0 && GCycleStatsShouldEmitNamedEvents && (IsInRenderingThread() || IsInRHIThread()); //r.RHICmdCollectRHIThreadStatsFromHighLevel if (bDoStats)
    {
        while (Iter.HasCommandsLeft())
        {
            PROFILER_CHAR const* Stat = GCurrentExecuteStat.StatString;
            FScopeCycleCounter Scope(GCurrentExecuteStat);
            while (Iter.HasCommandsLeft() && Stat == GCurrentExecuteStat.StatString)
            {
                FRHICommandBase* Cmd = Iter.NextCommand();
                Cmd->ExecuteAndDestruct(CmdList, DebugContext);
            }
        }
    }
    else
#endif
    // 不調試或不統計信息的版本.
    {
        // 循環所有命令, 執行並銷毀之.
        while (Iter.HasCommandsLeft())
        {
            FRHICommandBase* Cmd = Iter.NextCommand();
            GCurrentCommand = Cmd;
            Cmd->ExecuteAndDestruct(CmdList, DebugContext);
        }
    }
    // 充值命令列表.
    CmdList.Reset();
}

由此可知,FRHICommandListExecutor處理了復雜的各類任務,並且要判定任務的前序、等待、依賴關系,還有各個線程之間的依賴和等待關系。上述代碼中涉及到了兩個重要的任務類型:

// 派發RHI線程任務.
class FDispatchRHIThreadTask
{
    FRHICommandListBase* RHICmdList; // 待派發的命令列表.
    bool bRHIThread; // 是否在RHI線程中派發.

public:
    FDispatchRHIThreadTask(FRHICommandListBase* InRHICmdList, bool bInRHIThread)
        : RHICmdList(InRHICmdList)
        , bRHIThread(bInRHIThread)
    {        
    }
    FORCEINLINE TStatId GetStatId() const;
    static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; }

    // 預期的線程由是否在RHI線程/是否在獨立的RHI線程等變量決定.
    ENamedThreads::Type GetDesiredThread()
    {
        return bRHIThread ? (IsRunningRHIInDedicatedThread() ? ENamedThreads::RHIThread : CPrio_RHIThreadOnTaskThreads.Get()) : ENamedThreads::GetRenderThread_Local();
    }
    
    void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
    {
        // 前序任務是RHIThreadTask.
        FGraphEventArray Prereq;
        if (RHIThreadTask.GetReference())
        {
            Prereq.Add(RHIThreadTask);
        }
        // 將當前任務放到PrevRHIThreadTask中.
        PrevRHIThreadTask = RHIThreadTask;
        // 創建FExecuteRHIThreadTask任務並賦值到RHIThreadTask.
        RHIThreadTask = TGraphTask<FExecuteRHIThreadTask>::CreateTask(&Prereq, CurrentThread).ConstructAndDispatchWhenReady(RHICmdList);
    }
};

// 執行RHI線程任務.
class FExecuteRHIThreadTask
{
    FRHICommandListBase* RHICmdList;

public:
    FExecuteRHIThreadTask(FRHICommandListBase* InRHICmdList)
        : RHICmdList(InRHICmdList)
    {
    }

    FORCEINLINE TStatId GetStatId() const;
    static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; }

    // 根據是否在專用的RHI線程而選擇RHI或渲染線程.
    ENamedThreads::Type GetDesiredThread()
    {
        return IsRunningRHIInDedicatedThread() ? ENamedThreads::RHIThread : CPrio_RHIThreadOnTaskThreads.Get();
    }
    
    void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
    {
        // 設置全局變量GRHIThreadId
        if (IsRunningRHIInTaskThread())
        {
            GRHIThreadId = FPlatformTLS::GetCurrentThreadId();
        }
        
        // 執行RHI命令隊列.
        {
            // 臨界區, 保證線程訪問安全.
            FScopeLock Lock(&GRHIThreadOnTasksCritical);
            
            FRHICommandListExecutor::ExecuteInner_DoExecute(*RHICmdList);
            delete RHICmdList;
        }
        
        // 清空全局變量GRHIThreadId
        if (IsRunningRHIInTaskThread())
        {
            GRHIThreadId = 0;
        }
    }
};

由上可知,在派發和轉譯命令隊列時,可能在專用的RHI線程執行,也可能在渲染線程或工作線程執行。

 

GRHICommandList

GRHICommandList乍一看以為是FRHICommandListBase的實例,但實際類型是FRHICommandListExecutor。它的聲明和實現如下:

// Engine\Source\Runtime\RHI\Public\RHICommandList.h
extern RHI_API FRHICommandListExecutor GRHICommandList;

// Engine\Source\Runtime\RHI\Private\RHICommandList.cpp
RHI_API FRHICommandListExecutor GRHICommandList;

有關GRHICommandList的全局或靜態接口如下:

FRHICommandListImmediate& FRHICommandListExecutor::GetImmediateCommandList()
{
    return GRHICommandList.CommandListImmediate;
}

FRHIAsyncComputeCommandListImmediate& FRHICommandListExecutor::GetImmediateAsyncComputeCommandList()
{
    return GRHICommandList.AsyncComputeCmdListImmediate;
}

 

在UE的渲染模塊和RHI模塊中擁有大量的GRHICommandList使用案例,取其中之一:

// Engine\Source\Runtime\Renderer\Private\DeferredShadingRenderer.cpp

void ServiceLocalQueue()
{
    FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GetRenderThread_Local());

    if (IsRunningRHIInSeparateThread())
    {
        FRHICommandListExecutor::GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
    }
}

在RHI命令隊列模塊,除了涉及GRHICommandList,還涉及諸多全局的任務變量:

// Engine\Source\Runtime\RHI\Private\RHICommandList.cpp

static FGraphEventArray AllOutstandingTasks;
static FGraphEventArray WaitOutstandingTasks;
static FGraphEventRef RHIThreadTask;
static FGraphEventRef PrevRHIThreadTask;
static FGraphEventRef RenderThreadSublistDispatchTask;

它們的創建或添加任務的代碼如下:

void FRHICommandListBase::QueueParallelAsyncCommandListSubmit(FGraphEventRef* AnyThreadCompletionEvents, ...)
{
    (......)
    
    if (Num && IsRunningRHIInSeparateThread())
    {
        (......)
            
        // 創建FParallelTranslateSetupCommandList任務.
        FGraphEventRef TranslateSetupCompletionEvent = TGraphTask<FParallelTranslateSetupCommandList>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(CmdList, &RHICmdLists[0], Num, bIsPrepass);
        QueueCommandListSubmit(CmdList);
        // 添加到AllOutstandingTasks.
        AllOutstandingTasks.Add(TranslateSetupCompletionEvent);
        
        (......)
        
        FGraphEventArray Prereq;
        FRHICommandListBase** RHICmdLists = (FRHICommandListBase**)Alloc(sizeof(FRHICommandListBase*) * (1 + Last - Start), alignof(FRHICommandListBase*));
        // 將所有外部任務AnyThreadCompletionEvents加入到對應的列表中.
        for (int32 Index = Start; Index <= Last; Index++)
        {
            FGraphEventRef& AnyThreadCompletionEvent = AnyThreadCompletionEvents[Index];
            FRHICommandList* CmdList = CmdLists[Index];
            RHICmdLists[Index - Start] = CmdList;
            if (AnyThreadCompletionEvent.GetReference())
            {
                Prereq.Add(AnyThreadCompletionEvent);
                AllOutstandingTasks.Add(AnyThreadCompletionEvent);
                WaitOutstandingTasks.Add(AnyThreadCompletionEvent);
            }
        }
        
        (......)
        
        // 並行轉譯任務FParallelTranslateCommandList.
        FGraphEventRef TranslateCompletionEvent = TGraphTask<FParallelTranslateCommandList>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(&RHICmdLists[0], 1 + Last - Start, ContextContainer, bIsPrepass);
        AllOutstandingTasks.Add(TranslateCompletionEvent);
        
        (......)
}
    
void FRHICommandListBase::QueueAsyncCommandListSubmit(FGraphEventRef& AnyThreadCompletionEvent, class FRHICommandList* CmdList)
{
    (......)
    
    // 處理外部任務AnyThreadCompletionEvent
    if (AnyThreadCompletionEvent.GetReference())
    {
        if (IsRunningRHIInSeparateThread())
        {
            AllOutstandingTasks.Add(AnyThreadCompletionEvent);
        }
        WaitOutstandingTasks.Add(AnyThreadCompletionEvent);
    }
    
    (......)
}
    
class FDispatchRHIThreadTask
{
    void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
    {
        (......)
        
        // 創建RHI線程任務FExecuteRHIThreadTask.
        PrevRHIThreadTask = RHIThreadTask;
        RHIThreadTask = TGraphTask<FExecuteRHIThreadTask>::CreateTask(&Prereq, CurrentThread).ConstructAndDispatchWhenReady(RHICmdList);
    }
};
    
class FParallelTranslateSetupCommandList
{
    void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
    {
        (......)

        // 創建並行轉譯任務FParallelTranslateCommandList.
        FGraphEventRef TranslateCompletionEvent = TGraphTask<FParallelTranslateCommandList>::CreateTask(nullptr, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(&RHICmdLists[Start], 1 + Last - Start, ContextContainer, bIsPrepass);
        MyCompletionGraphEvent->DontCompleteUntil(TranslateCompletionEvent);
        // 利用RHICmdList的接口FRHICommandWaitForAndSubmitSubListParallel提交任務, 最終會進入AllOutstandingTasks和WaitOutstandingTasks.
        ALLOC_COMMAND_CL(*RHICmdList, FRHICommandWaitForAndSubmitSubListParallel)(TranslateCompletionEvent, ContextContainer, EffectiveThreads, ThreadIndex++);
    
};
    
void FRHICommandListExecutor::ExecuteInner(FRHICommandListBase& CmdList)
{
    (......)
    
    if (IsRunningRHIInSeparateThread())
    {
        (......)
        
        if (AllOutstandingTasks.Num() || RenderThreadSublistDispatchTask.GetReference())
        {
            (......)
            // 創建渲染線程子命令派發(提交)任務FDispatchRHIThreadTask.
            RenderThreadSublistDispatchTask = TGraphTask<FDispatchRHIThreadTask>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(SwapCmdList, bAsyncSubmit);
        }
        else
        {
            (......)
            PrevRHIThreadTask = RHIThreadTask;
            // 創建渲染線程子命令轉譯任務FExecuteRHIThreadTask.
            RHIThreadTask = TGraphTask<FExecuteRHIThreadTask>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(SwapCmdList);
        }
        
        (......)
}

總結一下這些任務變量的作用:

任務變量 執行線程 描述
AllOutstandingTasks 渲染、RHI、工作 所有在處理或待處理的任務列表。類型是FParallelTranslateSetupCommandList、FParallelTranslateCommandList。
WaitOutstandingTasks 渲染、RHI、工作 待處理的任務列表。類型是FParallelTranslateSetupCommandList、FParallelTranslateCommandList。
RHIThreadTask RHI、工作 正在處理的RHI線程任務。類型是FExecuteRHIThreadTask。
PrevRHIThreadTask RHI、工作 上一次處理的RHIThreadTask。類型是FExecuteRHIThreadTask。
RenderThreadSublistDispatchTask 渲染、RHI、工作 正在派發(提交)的任務。類型是FDispatchRHIThreadTask。注:與並行渲染AllOutstandingTasks、WaitOutstandingTasks有關

 

D3D11命令執行

本節將研究UE4.26在PC平台的通用RHI及D3D11命令運行過程和機制。由於UE4.26在PC平台默認的RHI是D3D11,並且關鍵的幾個控制台變量的默認值如下:

 

也就是說開啟了命令跳過模式,並且禁用了RHI線程。在此情況下,FRHICommandList的某個接口被調用時,不會生成單獨的FRHICommand,而是直接調用Context的方法。以FRHICommandList::DrawPrimitive為例:

class RHI_API FRHICommandList : public FRHIComputeCommandList
{
    void DrawPrimitive(uint32 BaseVertexIndex, uint32 NumPrimitives, uint32 NumInstances)
    {
        // 默認情況下Bypass為1, 進入此分支.
        if (Bypass())
        {
            // 直接調用圖形API的上下文的對應方法.
            GetContext().RHIDrawPrimitive(BaseVertexIndex, NumPrimitives, NumInstances);
            return;
        }
        
        // 分配單獨的FRHICommandDrawPrimitive命令.
 ALLOC_COMMAND(FRHICommandDrawPrimitive)(BaseVertexIndex, NumPrimitives, NumInstances);

// 展開后變為:

          // new(AllocCommand(sizeof(FRHICommandDrawPrimitive), alignof(FRHICommandDrawPrimitive))) FRHICommandDrawPrimitive(
          //        BaseVertexIndex, NumPrimitives, NumInstances);

    }
}

因此,在PC的默認圖形API(D3D11)下,r.RHICmdBypass 1且r.RHIThread.Enable 0,FRHICommandList將直接調用圖形API的上下文的接口,相當於同步調用圖形API,此時的圖形API運行於渲染線程(如果開啟)。

接着將r.RHICmdBypass設為0,但保持r.RHIThread.Enable為0,此時不再直接調用Context的方法,而是通過生成一條條單獨的FRHICommand,然后由FRHICommandList相關的對象執行。還是以FRHICommandList::DrawPrimitive為例,調用堆棧如下所示:

class RHI_API FRHICommandList : public FRHIComputeCommandList
{
    void FRHICommandList::DrawPrimitive(uint32 BaseVertexIndex, uint32 NumPrimitives, uint32 NumInstances)
    {
        // 默認情況下Bypass為1, 進入此分支.
        if (Bypass())
        {
            // 直接調用圖形API的上下文的對應方法.
            GetContext().RHIDrawPrimitive(BaseVertexIndex, NumPrimitives, NumInstances);
            return;
        }
        
        // 分配單獨的FRHICommandDrawPrimitive命令.
        // ALLOC_COMMAND宏會調用AllocCommand接口.
        ALLOC_COMMAND(FRHICommandDrawPrimitive)(BaseVertexIndex, NumPrimitives, NumInstances);
    }
    
    template <typename TCmd>
    void* AllocCommand()
    {
        return AllocCommand(sizeof(TCmd), alignof(TCmd));
    }
    
    void* AllocCommand(int32 AllocSize, int32 Alignment)
    {
        FRHICommandBase* Result = (FRHICommandBase*) MemManager.Alloc(AllocSize, Alignment);
        ++NumCommands;
        // CommandLink指向了上一個命令節點的Next.
        *CommandLink = Result;
        // 將CommandLink賦值為當前節點的Next.
        CommandLink = &Result->Next;
        return Result;
    }
}

利用ALLOC_COMMAND宏分配的命令實例會進入FRHICommandListBase的命令鏈表,但此時並未執行,而是等待其它合適的時機執行,例如在FRHICommandListImmediate::ImmediateFlush。下面是執行FRHICommandList的調用堆棧:

 

由調用堆棧可以得知,在此情況下,命令執行的過程變得復雜起來,多了很多中間執行步驟。還是以FRHICommandList::DrawPrimitive為例,調用流程示意圖如下:

 

上圖的使用了宏INTERNAL_DECORATOR,其和相關宏的定義如下:

// Engine\Source\Runtime\RHI\Public\RHICommandListCommandExecutes.inl

#define INTERNAL_DECORATOR(Method) CmdList.GetContext().Method
#define INTERNAL_DECORATOR_COMPUTE(Method) CmdList.GetComputeContext().Method

相當於通過宏來調用CommandList的Context接口。

在RHI禁用(r.RHIThread.Enable 0)情況下,以上的調用在渲染線程執行:

 

接下來將r.RHIThread.Enable設為1,以開啟RHI線程。此時運行命令的線程變成了RHI:

 

並且調用堆棧是從TaskGraph的RHI線程發起任務:

 

此時,命令執行的流程圖如下:

上面流程圖中,方角表示在渲染線程執行,而圓角在RHI線程執行(綠框中)。

 

開啟RHI線程后,將出現它的統計數據:

左:未開啟RHI線程的統計數據;右:開啟RHI線程后的統計數據。

 

下面繪制出開啟或關閉Bypass和RHI線程的流程圖(以調用D3D11的DrawPrimitive為例):

上面流程圖中,方角表示在渲染線程中執行,圓角表示在RHI線程中執行(綠框中)。

 

ImmediateFlush

在FDynamicRHI中,提及了刷新類型(FlushType),是指EImmediateFlushType定義的類型:

// Engine\Source\Runtime\RHI\Public\RHICommandList.h

namespace EImmediateFlushType
{
    enum Type
    { 
        WaitForOutstandingTasksOnly = 0, // 等待僅正在處理的任務完成.
        DispatchToRHIThread,             // 派發到RHI線程. 注:創建一個FExecuteRHIThreadTask任務並投遞到RHIThread的TaskGraph任務隊列中
        WaitForDispatchToRHIThread,      // 等待派發到RHI線程.
        FlushRHIThread,                  // 刷新RHI線程.
        FlushRHIThreadFlushResources,    // 刷新RHI線程和資源
        FlushRHIThreadFlushResourcesFlushDeferredDeletes // 刷新RHI線程/資源和延遲刪除.
    };
};

EImmediateFlushType中各個值的區別在FRHICommandListImmediate::ImmediateFlush的實現代碼中體現出來:

// Engine\Source\Runtime\RHI\Public\RHICommandList.inl

void FRHICommandListImmediate::ImmediateFlush(EImmediateFlushType::Type FlushType)
{
    switch (FlushType)
    {
    // 等待任務完成.
    case EImmediateFlushType::WaitForOutstandingTasksOnly:
        {
            WaitForTasks();
        }
        break;
    // 派發RHI線程(執行命令隊列)
    case EImmediateFlushType::DispatchToRHIThread:
        {
            if (HasCommands())
            {
                GRHICommandList.ExecuteList(*this);
            }
        }
        break;
    // 等待RHI線程派發.
    case EImmediateFlushType::WaitForDispatchToRHIThread:
        {
            if (HasCommands())
            {
                GRHICommandList.ExecuteList(*this);
            }
            WaitForDispatch();
        }
        break;
    // 刷新RHI線程.
    case EImmediateFlushType::FlushRHIThread:
        {
            // 派發並等待RHI線程.
            if (HasCommands())
            {
                GRHICommandList.ExecuteList(*this);
            }
            WaitForDispatch();
            
            // 等待RHI線程任務.
            if (IsRunningRHIInSeparateThread())
            {
                WaitForRHIThreadTasks();
            }
            
            // 重置正在處理的任務列表.
            WaitForTasks(true);
        }
        break;
    case EImmediateFlushType::FlushRHIThreadFlushResources:
    case EImmediateFlushType::FlushRHIThreadFlushResourcesFlushDeferredDeletes:
        {
            if (HasCommands())
            {
                GRHICommandList.ExecuteList(*this);
            }
            WaitForDispatch();
            WaitForRHIThreadTasks();
            WaitForTasks(true);
            
            // 刷新管線狀態緩存的資源.
            PipelineStateCache::FlushResources();
            // 刷新將要刪除的資源.
            FRHIResource::FlushPendingDeletes(FlushType == EImmediateFlushType::FlushRHIThreadFlushResourcesFlushDeferredDeletes);
        }
        break;
    }
}

上面代碼中涉及到了若干種處理和等待任務的接口,它們的實現如下:

// 等待任務完成.
void FRHICommandListBase::WaitForTasks(bool bKnownToBeComplete)
{
    if (WaitOutstandingTasks.Num())
    {
        // 檢測是否存在未完成的等待任務.
        bool bAny = false;
        for (int32 Index = 0; Index < WaitOutstandingTasks.Num(); Index++)
        {
            if (!WaitOutstandingTasks[Index]->IsComplete())
            {
                bAny = true;
                break;
            }
        }
        // 存在就利用TaskGraph的接口開啟線程等待.
        if (bAny)
        {
            ENamedThreads::Type RenderThread_Local = ENamedThreads::GetRenderThread_Local();
            FTaskGraphInterface::Get().WaitUntilTasksComplete(WaitOutstandingTasks, RenderThread_Local);
        }
        // 重置等待任務列表.
        WaitOutstandingTasks.Reset();
    }
}

// 等待渲染線程派發完成.
void FRHICommandListBase::WaitForDispatch()
{
    // 如果RenderThreadSublistDispatchTask已完成, 則置空.
    if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
    {
        RenderThreadSublistDispatchTask = nullptr;
    }
    
    // RenderThreadSublistDispatchTask有未完成的任務.
    while (RenderThreadSublistDispatchTask.GetReference())
    {
        ENamedThreads::Type RenderThread_Local = ENamedThreads::GetRenderThread_Local();
        FTaskGraphInterface::Get().WaitUntilTaskCompletes(RenderThreadSublistDispatchTask, RenderThread_Local);
        if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
        {
            RenderThreadSublistDispatchTask = nullptr;
        }
    }
}

// 等待RHI線程任務完成.
void FRHICommandListBase::WaitForRHIThreadTasks()
{
    bool bAsyncSubmit = CVarRHICmdAsyncRHIThreadDispatch.GetValueOnRenderThread() > 0; //r.RHICmdAsyncRHIThreadDispatch
    ENamedThreads::Type RenderThread_Local = ENamedThreads::GetRenderThread_Local();
    
    // 相當於執行FRHICommandListBase::WaitForDispatch()
    if (bAsyncSubmit)
    {
        if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
        {
            RenderThreadSublistDispatchTask = nullptr;
        }
        while (RenderThreadSublistDispatchTask.GetReference())
        {
            if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
            {
                while (!RenderThreadSublistDispatchTask->IsComplete())
                {
                    FPlatformProcess::SleepNoStats(0);
                }
            }
            else
            {
                FTaskGraphInterface::Get().WaitUntilTaskCompletes(RenderThreadSublistDispatchTask, RenderThread_Local);
            }
            
            if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
            {
                RenderThreadSublistDispatchTask = nullptr;
            }
        }
        // now we can safely look at RHIThreadTask
    }
    
    // 如果RHI線程任務已完成, 則置空任務.
    if (RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
    {
        RHIThreadTask = nullptr;
        PrevRHIThreadTask = nullptr;
    }
    
    // 如果RHI線程有任務未完成, 則執行並等待.
    while (RHIThreadTask.GetReference())
    {
        // 如果已在處理, 則用sleep(0)跳過此時間片.
        if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
        {
            while (!RHIThreadTask->IsComplete())
            {
                FPlatformProcess::SleepNoStats(0);
            }
        }
        // 任務尚未處理, 開始並等待之.
        else
        {
            FTaskGraphInterface::Get().WaitUntilTaskCompletes(RHIThreadTask, RenderThread_Local);
        }
        
        // 如果RHI線程任務已完成, 則置空任務.
        if (RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
        {
            RHIThreadTask = nullptr;
            PrevRHIThreadTask = nullptr;
        }
    }
}

 

RHI控制台變量

前面章節的代碼也顯示RHI體系涉及的控制台變量非常多,下面列出部分控制台變量,以便調試、優化RHI渲染效果或效率:

名稱 描述
r.RHI.Name 顯示當前RHI的名字,如D3D11。
r.RHICmdAsyncRHIThreadDispatch 實驗選項,是否執行RHI調度異步。可使數據更快地刷新到RHI線程,避免幀末尾出現卡頓。
r.RHICmdBalanceParallelLists 允許啟用DrawList的預處理,以嘗試在命令列表之間均衡負載。0:關閉,1:開啟,2:實驗選項,使用上一幀的結果(在分屏等不做任何事情)。
r.RHICmdBalanceTranslatesAfterTasks 實驗選項,平衡並行翻譯后的渲染任務完成。可最小化延遲上下文的數量,但會增加啟動轉譯的延遲。
r.RHICmdBufferWriteLocks 僅與RHI線程相關。用於診斷緩沖鎖問題的調試選項。
r.RHICmdBypass 是否繞過RHI命令列表,立即發送RHI命令。0:禁用(需開啟多線程渲染),1:開啟。
r.RHICmdCollectRHIThreadStatsFromHighLevel 這將在執行的RHI線程上推送統計信息,這樣就可以確定它們來自哪個高層級的Pass。對幀速率有不利影響。默認開啟。
r.RHICmdFlushOnQueueParallelSubmit 在提交后立即等待並行命令列表的完成。問題診斷。只適用於部分RHI。
r.RHICmdFlushRenderThreadTasks 如果為真,則每次調用時都刷新渲染線程任務。問題診斷。這是一個更細粒度cvars的主開關。
r.RHICmdForceRHIFlush 對每個任務強制刷新發送給RHI線程。問題診斷。
r.RHICmdMergeSmallDeferredContexts 合並小的並行轉譯任務,基於r.RHICmdMinDrawsPerParallelCmdList。
r.RHICmdUseDeferredContexts 使用延遲上下文並行執行命令列表。只適用於部分RHI。
r.RHICmdUseParallelAlgorithms True使用並行算法。如果r.RHICmdBypass為1則忽略。
r.RHICmdUseThread 使用RHI線程。問題診斷。
r.RHICmdWidth 控制並行渲染器中大量事物的任務粒度。
r.RHIThread.Enable 啟用/禁用RHI線程,並確定RHI工作是否在專用線程上運行。
RHI.GPUHitchThreshold GPU上檢測卡頓的閾值(毫秒)。
RHI.MaximumFrameLatency 可以排隊進行渲染的幀數。
RHI.SyncThreshold 在垂直同步功能啟用前的連續“快速”幀數。
RHI.TargetRefreshRate 如果非零,則顯示的更新頻率永遠不會超過目標刷新率(以Hz為單位)。

注:以上只列出部分RHI相關的變量,還有很多未列出。

 

參考

剖析虛幻渲染體系(10)- RHI

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM