PaddlePaddle inference 源碼分析（二）

本文轉載自查看原文 2021-12-14 16:48 854 Paddle

這一部分開始介紹創建Predictor過程, 以下代碼均位於paddle/fluid/inference/api目錄下

1、對外暴露的接口均在paddle_inference_api.h中

namespace paddle_infer
using Config = paddle::AnalysisConfig;
///
/// \brief A factory to help create predictors.
///
/// Usage:
///
/// \code{.cpp}
/// Config config;
/// ... // change the configs.
/// auto predictor = CreatePredictor(config);
/// \endcode
///
PD_INFER_DECL std::shared_ptr<Predictor> CreatePredictor(
    const Config& config);  // NOLINT

2、Config的具體實現在analysis_config.cc,包括開啟GPU、XPU（百度昆侖）、NPU（華為昇騰），設置mkl等待配置均使用Config設置。以開啟GPU為例

void AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb,
                                  int device_id) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
  use_gpu_ = true;
  memory_pool_init_size_mb_ = memory_pool_init_size_mb;
  FLAGS_initial_gpu_memory_in_mb = memory_pool_init_size_mb_;
  gpu_device_id_ = device_id;
#else
  LOG(ERROR) << "Please compile with gpu to EnableGpu()";
  use_gpu_ = false;
#endif

  Update();
}

每次修改配置都會調用Update函數

2.1 Update函數會將修改的配置更新到Config保存的pass_builder_中。

mutable std::unique_ptr<PassStrategy> pass_builder_;

如果開啟GPU，會將padd_builder設置為GpuPassStrategy,包含了各種預設好的GPU配置

// Transfer pass_builder and copy the existing compatible passes.
  if (!pass_builder_ || ((use_gpu() ^ pass_builder_->use_gpu())) ||
      ((use_xpu() ^ pass_builder_->use_xpu())) ||
      ((use_npu() ^ pass_builder_->use_npu()))) {
    if (use_gpu()) {
      pass_builder_.reset(new GpuPassStrategy);

      if (use_tensorrt_) {
        // Append after the Affine_channel_conv_fuse pass.
        pass_builder()->InsertPass(3, "tensorrt_subgraph_pass");
      }
    } else if (use_xpu()) {
      PADDLE_ENFORCE_EQ(
          use_gpu(), false,
          platform::errors::InvalidArgument(
              "Only one choice can be made between CPU and XPU."));
      pass_builder_.reset(new XpuPassStrategy);
    } else if (use_npu()) {
      PADDLE_ENFORCE_EQ(
          use_gpu(), false,
          platform::errors::InvalidArgument(
              "Only one choice can be made between GPU and NPU."));
      pass_builder_.reset(new NpuPassStrategy);
    } else {
      pass_builder_.reset(new CpuPassStrategy);
    }

  }

3、設置好Config后，就可以進行CreatePredictor操作。具體實現在analysis_predictor.cc中。

namespace paddle_infer {
std::shared_ptr<Predictor> CreatePredictor(const Config &config) {  // NOLINT
  std::shared_ptr<Predictor> predictor(new Predictor(config));
  return predictor;
}

Predictor::Predictor(const Config &config) {
  const_cast<Config *>(&config)->SwitchUseFeedFetchOps(false);
  // The second parameter indicates that the discard log is not printed
  predictor_ = paddle::CreatePaddlePredictor<
      Config, paddle::PaddleEngineKind::kAnalysis>(config);
}

4、CreatePaddlePredictor的聲明在paddle_api.h中，有2種特化，這里使用的是Analysis

enum class PaddleEngineKind {
  kNative = 0,         ///< Use the native Fluid facility.
  kAutoMixedTensorRT,  ///< Automatically mix Fluid with TensorRT.
  kAnalysis,           ///< More optimization.
};

template <typename ConfigT, PaddleEngineKind engine>
PD_INFER_DECL std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(
    const ConfigT& config);

template <>
PD_INFER_DECL std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
    NativeConfig, PaddleEngineKind::kNative>(const NativeConfig& config);

template <>
PD_INFER_DECL std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
    AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig& config);

Native的具體實現在api_impl.cc中，而Analysis的實現仍在analysis_predictor.cc中

5、CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>的具體實現如下，這里省略了部分代碼，着重介紹邏輯

template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
    AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) {
  ...
  // 創建完成會將Config設置InValid,保障一個Config對應一個Predictor

  VLOG(3) << "create AnalysisConfig";
  PADDLE_ENFORCE_EQ(
      config.is_valid(), true,
      platform::errors::InvalidArgument(
          "Note: Each config can only be used for one predictor."));
  // 注冊OP，只會執行一次
  // Register custom operators compiled by the user.
  // This function can only be executed once per process.
  static std::once_flag custom_operators_registered;
  std::call_once(custom_operators_registered,
                 []() { inference::RegisterAllCustomOperator(); });
  // 設置GPU參數
  if (config.use_gpu()) {
    ...if (config.thread_local_stream_enabled() &&
        process_level_allocator_enabled) {
      PADDLE_THROW(platform::errors::Fatal(
          "When binding threads and streams, the use of "
          "process-level allocators will result in undefined result "
          "errors due to memory asynchronous operations."
          "The thread and stream binding configuration of all "
          "predictors should be the same in a single process."));
    }
  }

  std::unique_ptr<PaddlePredictor> predictor(new AnalysisPredictor(config));
  // Each config can only be used for one predictor.
  config.SetInValid();
  auto predictor_p = dynamic_cast<AnalysisPredictor *>(predictor.get());

  if (!predictor_p->Init(nullptr)) {
    return nullptr;
  }

  if (config.mkldnn_quantizer_enabled() && !predictor_p->MkldnnQuantize()) {
    return nullptr;
  }

  return predictor;
}

6、每個AnalysisPredictor有一個自己的id

explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) {
    if (config_.shape_range_info_collected()) {
      config_.SwitchIrOptim(false);
      config_.EnableMemoryOptim(false);
    }
    predictor_id_ = inference::GetUniqueId();
  }

7、AnalysisPredictor::Init這里是最主要的初始化邏輯，資源占用。

bool AnalysisPredictor::Init(
    const std::shared_ptr<framework::Scope> &parent_scope,
    const std::shared_ptr<framework::ProgramDesc> &program) {
  VLOG(3) << "Predictor::init()";
  ...

  // no matter with or without MKLDNN
  paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());

  if (!PrepareScope(parent_scope)) {
    return false;
  }
  if (!CreateExecutor()) {
    return false;
  }
  if (!PrepareProgram(program)) {
    return false;
  }

  // Prepare executor, create local variables.
  if (!PrepareExecutor()) {
    return true;
  }

  // Get the feed_target_names and fetch_target_names
  PrepareFeedFetch();

  return true;
}

8、首先是scope初始化。scope是變量容器，用於保存輸入輸出變量。PrepareScope會讀取所有設備信息，同時創建Scope對象。

//parent_scope=nullptr

bool AnalysisPredictor::PrepareScope(
    const std::shared_ptr<framework::Scope> &parent_scope) {
  if (parent_scope) {
    PADDLE_ENFORCE_NOT_NULL(
        parent_scope,
        platform::errors::PreconditionNotMet(
            "Both program and parent_scope should be set in Clone mode."));
    scope_ = parent_scope;
    status_is_cloned_ = true;
  } else {
    // 獲取設備，例如GPU就會調用cuda接口，與設備相關的內容都在platform目錄下。這里會讀取所有設備，並將其信息保存
    paddle::framework::InitDevices();
    // TODO(wilber): we need to release memory occupied by weights.
    scope_.reset(new paddle::framework::Scope());
    status_is_cloned_ = false;
  }
  sub_scope_ = &scope_->NewScope();
  return true;
}

Scope中保存了所有變量，使用unordered_map保存

mutable std::unordered_map<std::string, std::unique_ptr<Variable>, KeyHasher>
      vars_;

Scope為鏈表結構，可以生成sub_scope，而sub_scope->parent指向父節點。這里保存參數時，持久化參數全部保存在父節點Scope中，非持久化參數保存在sub_scope中。Scope為每個predictor獨自持有。

9、創建Executor，這里會根據配置創建對應Place，例如CPUPlace、CUDAPlace。然后根據place_創建對應的NaiveExecutor,NaiveExecutor只用於inference

  place_ = paddle::platform::CPUPlace();
  }
  executor_.reset(new paddle::framework::NaiveExecutor(place_));

10、PrepareProgram(program)這里program=nullptr，這一步是比較重的操作，會讀取模型文件，讀取參數，並進行pass優化等等。所以這一步會進行詳細介紹。

10.1 LoadProgramDesc，讀取模型文件內容。這里proto定義為framework/framework.proto::ProgramDesc。然后通過proto對象初始化framework::ProgramDesc對象

message OpDesc {

  message Attr {
    required string name = 1;
    required AttrType type = 2;
    optional int32 i = 3;
    optional float f = 4;
    optional string s = 5;
    repeated int32 ints = 6;
    repeated float floats = 7;
    repeated string strings = 8;
    optional bool b = 10;
    repeated bool bools = 11;
    optional int32 block_idx = 12;
    optional int64 l = 13;
    repeated int32 blocks_idx = 14;
    repeated int64 longs = 15;
    repeated double float64s = 16;
  };

  message Var {
    required string parameter = 1;
    repeated string arguments = 2;
  };

  required string type = 3;
  repeated Var inputs = 1;
  repeated Var outputs = 2;
  repeated Attr attrs = 4;
  optional bool is_target = 5 [ default = false ];
};

message VarDesc {

  message Attr {
    required string name = 1;
    required AttrType type = 2;
    optional int32 i = 3;
    optional string s = 4;
    repeated int32 ints = 5;
  };

  required string name = 1;
  required VarType type = 2;
  optional bool persistable = 3 [ default = false ];
  // True if the variable is an input data and
  // have to check the feed data shape and dtype
  optional bool need_check_feed = 4 [ default = false ];
  optional bool is_parameter = 5 [ default = false ];
  optional bool stop_gradient = 6 [ default = false ];
  repeated Attr attrs = 7;
}

message BlockDesc {
  required int32 idx = 1;
  required int32 parent_idx = 2;
  repeated VarDesc vars = 3;
  repeated OpDesc ops = 4;
  optional int32 forward_block_idx = 5 [ default = -1 ];
}

// In some cases, Paddle may perform operator definition iterations,
// and the operator uses OpVersionMap for compatibility testing.
message OpVersion { required int32 version = 1; }
message OpVersionMap {
  message OpVersionPair {
    required string op_name = 1;
    required OpVersion op_version = 2;
  }
  repeated OpVersionPair pair = 1;
}

// Please refer to
// https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md
// for more details.
// TODO(panyx0718): A model can have multiple programs. Need a
// way to distinguish them. Maybe ID or name?
message ProgramDesc {
  reserved 2, 3; // For backward compatibility.
  repeated BlockDesc blocks = 1;
  optional Version version = 4;
  optional OpVersionMap op_version_map = 5;
}

10.2 NaiveExecutor->CreateVariables，這里將讀取的模型文件中的參數信息保存到Scope中vars_。這里會調用兩次，一次是將持久化的參數信息保存到父Scope中。第二次將非持久化的參數信息保存到子sub_scope中。

//block_id=0,persistable第一次true，第二次調用false，scope=sub_scope
void NaiveExecutor::CreateVariables(const ProgramDesc &desc, int block_id,
                                    bool persistable, Scope *scope) {
  PADDLE_ENFORCE_NOT_NULL(scope,
                          platform::errors::InvalidArgument(
                              "The Scope to hold variables is nullptr."));

  auto &global_block = desc.Block(block_id);

  const auto *anc = scope;
  PADDLE_ENFORCE_NE(
      anc->parent(), anc,
      platform::errors::InvalidArgument("Input scope should be child scope."));
  while (anc->parent()) {
    anc = anc->parent();
  }

  int num_vars = 0;
  for (auto &var : global_block.AllVars()) {
    if (var->Name() == framework::kEmptyVarName) {
      continue;
    }
    num_vars++;

    if (persistable == var->Persistable()) {
      if (persistable) {
        if (!anc->FindVar(var->Name())) {
          auto *ptr = const_cast<Scope *>(anc)->Var(var->Name());
          VLOG(3) << scope << " Create persistable variable " << var->Name()
                  << ", which pointer is " << ptr;
          InitializeVariable(ptr, var->GetType());
        }
      } else {
        auto *ptr = const_cast<Scope *>(scope)->Var(var->Name());
        VLOG(3) << scope << " Create variable " << var->Name()
                << ", which pointer is " << ptr;
        InitializeVariable(ptr, var->GetType());
      }
    }
  }
  VLOG(4) << "naive executor create " << num_vars << " vars";
}

10.3 OptimizeInferenceProgram.這里會調用Analyzer根據配置過一遍所有PASS，生成經過優化的ProgramDesc，並將inference_program_重置為優化后的argument_.ir_analyzed_program()

// NOTE All the members in AnalysisConfig should be copied to Argument.
void AnalysisPredictor::OptimizeInferenceProgram() {
  // 將config的配置設置到argument中
  PrepareArgument();
  // 遍歷analysis_passes，使用Pass對argument進行處理
  Analyzer().Run(&argument_);

  PADDLE_ENFORCE_EQ(
      argument_.scope_valid(), true,
      platform::errors::InvalidArgument("The argument scope should be valid."));
  VLOG(5) << "to prepare executor";
  ARGUMENT_CHECK_FIELD((&argument_), ir_analyzed_program);
  inference_program_.reset(
      new framework::ProgramDesc(argument_.ir_analyzed_program()),
      [](framework::ProgramDesc *prog) {
// Note, please do NOT use any member variables, because member variables may
// have been destructed in multiple threads.
#if PADDLE_WITH_TENSORRT
        ...
#endif
        delete prog;
      });
  // The config and argument take a lot of storage,
  // when the predictor settings are complete, we release these stores.
  argument_.PartiallyRelease();
  config_.PartiallyRelease();
  LOG(INFO) << "======= optimize end =======";
}

11、PrepareExecutor.

　　首先執行了DisablePrepareDataOpt，將inference_program_中的op進行了一次梳理，如果發現不友好的op就將准備數據disable掉。然后執行NaiveExecutor->Prepare.將sub_scope放入Executor中，然后根據優化后的ProgramDesc調用CreateOps逐個創建OP並保存到Executor中。

bool AnalysisPredictor::PrepareExecutor() {
  DisablePrepareDataOpt(inference_program_, 0, false);

  executor_->Prepare(sub_scope_, *inference_program_, 0,
                     config_.use_feed_fetch_ops_);

  PADDLE_ENFORCE_NOT_NULL(sub_scope_,
                          platform::errors::PreconditionNotMet(
                              "The sub_scope should not be nullptr."));

  return true;
}

12、PrepareFeedFetch,在sub_scope中創建feed和fetch變量，將其與模型中的輸入輸出op綁定

void AnalysisPredictor::PrepareFeedFetch() {
  PADDLE_ENFORCE_NOT_NULL(sub_scope_,
                          platform::errors::InvalidArgument(
                              "The sub_scope should not be nullptr."));
  CreateFeedFetchVar(sub_scope_);
  for (auto *op : inference_program_->Block(0).AllOps()) {
    if (op->Type() == "feed") {
      int idx = BOOST_GET_CONST(int, op->GetAttr("col"));
      if (feeds_.size() <= static_cast<size_t>(idx)) {
        feeds_.resize(idx + 1);
      }
      feeds_[idx] = op;
      feed_names_[op->Output("Out")[0]] = idx;
      idx2feeds_[idx] = op->Output("Out")[0];
    } else if (op->Type() == "fetch") {
      int idx = BOOST_GET_CONST(int, op->GetAttr("col"));
      if (fetches_.size() <= static_cast<size_t>(idx)) {
        fetches_.resize(idx + 1);
      }
      fetches_[idx] = op;
      idx2fetches_[idx] = op->Input("X")[0];
    }
  }
}

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 PaddlePaddle inference 源碼分析（四） PaddlePaddle inference 源碼分析（一） PaddlePaddle Transformer encoder 源碼解析 fluid.io.load_inference_model 載入多個模型的時候會報錯 -- [paddlepaddle] tensorflow源碼解析之framework-shape_inference PaddlePaddle paddlepaddle中文詞法分析LAC inference和learning paddlepaddle使用(一) PaddlePaddle tutorial