梳理caffe代碼data_transformer（十二）

本文轉載自查看原文 2016-04-12 09:55 8967 caffe源碼解析（轉載）

data_transformer詳細注釋看頭文件和實現部分：

頭文件：

[cpp] view plain copy

/////////////////TransformationParameter的caffe消息定義
/*
// Message that stores parameters used to apply transformation
// to the data layer's data
message TransformationParameter {
// For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried
// out before scaling.
optional float scale = 1 [default = 1];
// Specify if we want to randomly mirror data.
optional bool mirror = 2 [default = false];
// Specify if we would like to randomly crop an image.
optional uint32 crop_size = 3 [default = 0];
// mean_file and mean_value cannot be specified at the same time
optional string mean_file = 4;
// if specified can be repeated once (would substract it from all the channels)
// or can be repeated the same number of times as channels
// (would subtract them from the corresponding channel)
repeated float mean_value = 5;
// Force the decoded image to have 3 color channels.
optional bool force_color = 6 [default = false];
// Force the decoded image to have 1 color channels.
optional bool force_gray = 7 [default = false];
}
*/
/*
DataTransformer類主要負責對數據進行預處理，比如減去均值、進行crop，鏡像，強制設置為彩色強制設置為灰度圖像以及像素值的縮放，此外該類還將Datum、const vector<Datum>、cv::Mat&、vector<cv::Mat> 、Blob<Dtype>*類型的數據變換到目標大小的blob。負責對上述類型的數據推斷其shape。
*/
#ifndef CAFFE_DATA_TRANSFORMER_HPP
#define CAFFE_DATA_TRANSFORMER_HPP
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
/**
* @brief Applies common transformations to the input data, such as
* scaling, mirroring, substracting the image mean...
*/
template <typename Dtype>
class DataTransformer {
public:
explicit DataTransformer(const TransformationParameter& param, Phase phase);
virtual ~DataTransformer() {}
/**
* @brief Initialize the Random number generations if needed by the
* transformation.
*/
// 初始化隨機數生成器，因為在對數據進行變換的時候有可能用到，比如說打亂數據的輸入順序
void InitRand();
/**
* @brief Applies the transformation defined in the data layer's
* transform_param block to the data.
*
* @param datum
* Datum containing the data to be transformed.
* @param transformed_blob
* This is destination blob. It can be part of top blob's data if
* set_cpu_data() is used. See data_layer.cpp for an example.
*/
// 對Datum的數據進行變換，放入到transformed_blob中
void Transform(const Datum& datum, Blob<Dtype>* transformed_blob);
/**
* @brief Applies the transformation defined in the data layer's
* transform_param block to a vector of Datum.
*
* @param datum_vector
* A vector of Datum containing the data to be transformed.
* @param transformed_blob
* This is destination blob. It can be part of top blob's data if
* set_cpu_data() is used. See memory_layer.cpp for an example.
*/
// 對Datum容器的數據進行變換翻入到transformed_blob
void Transform(const vector<Datum> & datum_vector,
Blob<Dtype>* transformed_blob);
#ifdef USE_OPENCV
/**
* @brief Applies the transformation defined in the data layer's
* transform_param block to a vector of Mat.
*
* @param mat_vector
* A vector of Mat containing the data to be transformed.
* @param transformed_blob
* This is destination blob. It can be part of top blob's data if
* set_cpu_data() is used. See memory_layer.cpp for an example.
*/
// 如果定義OpenCV還可能對mat容器數據類型的數據進行變換
void Transform(const vector<cv::Mat> & mat_vector,
Blob<Dtype>* transformed_blob);
/**
* @brief Applies the transformation defined in the data layer's
* transform_param block to a cv::Mat
*
* @param cv_img
* cv::Mat containing the data to be transformed.
* @param transformed_blob
* This is destination blob. It can be part of top blob's data if
* set_cpu_data() is used. See image_data_layer.cpp for an example.
*/
// 將opencv讀取的單個圖像轉換到blob中去
void Transform(const cv::Mat& cv_img, Blob<Dtype>* transformed_blob);
#endif // USE_OPENCV
/**
* @brief Applies the same transformation defined in the data layer's
* transform_param block to all the num images in a input_blob.
*
* @param input_blob
* A Blob containing the data to be transformed. It applies the same
* transformation to all the num images in the blob.
* @param transformed_blob
* This is destination blob, it will contain as many images as the
* input blob. It can be part of top blob's data.
*/
// 將輸入的blob進行變換，可能是取出blob的中的一部分數據到新的blob
void Transform(Blob<Dtype>* input_blob, Blob<Dtype>* transformed_blob);
/**
* @brief Infers the shape of transformed_blob will have when
* the transformation is applied to the data.
*
* @param datum
* Datum containing the data to be transformed.
*/
// 根據Datum獲取blob的形狀
vector<int> InferBlobShape(const Datum& datum);
/**
* @brief Infers the shape of transformed_blob will have when
* the transformation is applied to the data.
* It uses the first element to infer the shape of the blob.
*
* @param datum_vector
* A vector of Datum containing the data to be transformed.
*/
// 根據Datum容器獲取blob的形狀
vector<int> InferBlobShape(const vector<Datum> & datum_vector);
/**
* @brief Infers the shape of transformed_blob will have when
* the transformation is applied to the data.
* It uses the first element to infer the shape of the blob.
*
* @param mat_vector
* A vector of Mat containing the data to be transformed.
*/
#ifdef USE_OPENCV
// 根據Mat容器獲取blob的形狀
vector<int> InferBlobShape(const vector<cv::Mat> & mat_vector);
/**
* @brief Infers the shape of transformed_blob will have when
* the transformation is applied to the data.
*
* @param cv_img
* cv::Mat containing the data to be transformed.
*/
// 根據Mat獲取blob的形狀
vector<int> InferBlobShape(const cv::Mat& cv_img);
#endif // USE_OPENCV
protected:
/**
* @brief Generates a random integer from Uniform({0, 1, ..., n-1}).
*
* @param n
* The upperbound (exclusive) value of the random number.
* @return
* A uniformly random integer value from ({0, 1, ..., n-1}).
*/
// 生成從0到n-1的服從均勻分布的隨機數,要求繼承他的都必須實現如何生成隨機數
virtual int Rand(int n);
// 將給定的Datum進行轉換
void Transform(const Datum& datum, Dtype* transformed_data);
// 變換所使用的參數
TransformationParameter param_;
// 隨機數生成器的種子
shared_ptr<Caffe::RNG> rng_;
// 是訓練還是測試？
Phase phase_;
// 數據均值 blob
Blob<Dtype> data_mean_;
// 數據均值blob的容器
vector<Dtype> mean_values_;
};
} // namespace caffe
#endif // CAFFE_DATA_TRANSFORMER_HPP_

實現：

[cpp] view plain copy

//DataTransformer需要輸入的是blob，所以需要看一下里面的參數，因此再把這一部分內容的proto貼出來，這是新版的caffe
/*
// Specifies the shape (dimensions) of a Blob.
message BlobShape {
repeated int64 dim = 1 [packed = true];
}
message BlobProto {
optional BlobShape shape = 7;
repeated float data = 5 [packed = true];
repeated float diff = 6 [packed = true];
repeated double double_data = 8 [packed = true];
repeated double double_diff = 9 [packed = true];
// 4D dimensions -- deprecated. Use "shape" instead.
optional int32 num = 1 [default = 0];
optional int32 channels = 2 [default = 0];
optional int32 height = 3 [default = 0];
optional int32 width = 4 [default = 0];
}
*/
/////////////////TransformationParameter的caffe消息定義
/*
// Message that stores parameters used to apply transformation
// to the data layer's data
message TransformationParameter {
// For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried
// out before scaling.
optional float scale = 1 [default = 1];
// Specify if we want to randomly mirror data.
optional bool mirror = 2 [default = false];
// Specify if we would like to randomly crop an image.
optional uint32 crop_size = 3 [default = 0];
// mean_file and mean_value cannot be specified at the same time
optional string mean_file = 4;
// if specified can be repeated once (would substract it from all the channels)
// or can be repeated the same number of times as channels
// (would subtract them from the corresponding channel)
repeated float mean_value = 5;
// Force the decoded image to have 3 color channels.
optional bool force_color = 6 [default = false];
// Force the decoded image to have 1 color channels.
optional bool force_gray = 7 [default = false];
}
*/
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#endif // USE_OPENCV
#include <string>
#include <vector>
#include "caffe/data_transformer.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"
namespace caffe {
// 構造函數
template<typename Dtype>
DataTransformer<Dtype>::DataTransformer(const TransformationParameter& param,
Phase phase)
: param_(param), phase_(phase) {
// check if we want to use mean_file
// 判斷是否有平均值文件
if (param_.has_mean_file()) {
CHECK_EQ(param_.mean_value_size(), 0) <<
"Cannot specify mean_file and mean_value at the same time";
// 平均值文件的路徑
const string& mean_file = param.mean_file();
if (Caffe::root_solver()) {
LOG(INFO) << "Loading mean file from: " << mean_file;
}
BlobProto blob_proto;// 調用google/protobuf?? ,用於加速運算的數據接口，有時間再詳細了解其應用方法
//這個函數是實現了從二進制文件中讀取數據到blob_proto中,猜測函數來自第3方庫的google/protobuf模塊
ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
data_mean_.FromProto(blob_proto);// 調用Blob類的成員函數FromRroto從BlobProto中加載數據
}
// check if we want to use mean_value
if (param_.mean_value_size() > 0) {
CHECK(param_.has_mean_file() == false) <<
"Cannot specify mean_file and mean_value at the same time";
for (int c = 0; c < param_.mean_value_size(); ++c) {
mean_values_.push_back(param_.mean_value(c));//將元素param_.mean_value(c)加入到mean_values_容器的最后一位
}
}
}
/*提前先描述一下數據層的Datum，
Datum數據結構,Caffe並不是把向量和矩陣直接放進數據庫的，而是將數據通過caffe.proto里定義的一個datum類來封裝。數據庫里放的是一個個的datum序列化成的字符串。Datum的定義摘錄如下：
message Datum {
optional int32 channels = 1;
optional int32 height = 2;
optional int32 width = 3;
// the actual image data, in bytes
optional bytes data = 4;
optional int32 label = 5;
// Optionally, the datum could also hold float data.
repeated float float_data = 6;
// If true data contains an encoded image that need to be decoded
optional bool encoded = 7 [default = false];
}
一個Datum有三個維度，channels, height,和width，可以看做是少了num維度的Blob。存放數據的地方有兩個：byte_data和float_data，分別存放整數型和浮點型數據。圖像數據一般是整形，放在byte_data里，特征向量一般是浮點型，放在float_data里。label存放數據的類別標簽，是整數型。encoded標識數據是否需要被解碼（里面有可能放的是JPEG或者PNG之類經過編碼的數據）。Datum這個數據結構將數據和標簽封裝在一起，兼容整形和浮點型數據。經過Protobuf編譯后，可以在Python和C++中都提供高效的訪問。同時Protubuf還為它提供了序列化與反序列化的功能。存放進LMDB的就是Datum序列化生成的字符串。
Caffe中關於LMDB的代碼有三類：生成數據集、讀取數據集、生成特征向量。接下來就分別針對三者進行分析。
生成數據集:
生成數據集的代碼在examples，隨數據集提供，比如MNIST。
首先，創建訪問LMDB所需的一些變量：
MDB_env *mdb_env;
MDB_dbi mdb_dbi;
MDB_val mdb_key, mdb_data;
MDB_txn *mdb_txn;
...
mdb_env是整個數據庫環境的句柄，mdb_dbi是環境中一個數據庫的句柄，mdb_key和mdb_data用來存放向數據庫中輸入數據的“值”。mdb_txn是數據庫事物操作的句柄，”txn”是”transaction”的縮寫。
然后，創建數據庫環境，創建並打開數據庫：
if (db_backend == "lmdb") { // lmdb
LOG(INFO) << "Opening lmdb " << db_path;
CHECK_EQ(mkdir(db_path, 0744), 0)
<< "mkdir " << db_path << "failed";
CHECK_EQ(mdb_env_create(&mdb_env), MDB_SUCCESS) << "mdb_env_create failed";
CHECK_EQ(mdb_env_set_mapsize(mdb_env, 1099511627776), MDB_SUCCESS) // 1TB
<< "mdb_env_set_mapsize failed";
CHECK_EQ(mdb_env_open(mdb_env, db_path, 0, 0664), MDB_SUCCESS)
<< "mdb_env_open failed";
CHECK_EQ(mdb_txn_begin(mdb_env, NULL, 0, &mdb_txn), MDB_SUCCESS)
<< "mdb_txn_begin failed";
CHECK_EQ(mdb_open(mdb_txn, NULL, 0, &mdb_dbi), MDB_SUCCESS)
<< "mdb_open failed. Does the lmdb already exist? ";
} else {
LOG(FATAL) << "Unknown db backend " << db_backend;
}
mkdir(db_path, 0744)為數據庫創建文件夾，如果文件夾已經存在，程序會報錯退出。也就是說，程序不會覆蓋已有的數據庫。已有的數據庫如果不要了，需要手動刪除。需要注意的是，LMDB的一個環境中是可以有多個數據庫的，數據庫之間以名字區分。mdb_open()的第二個參數實際上就是數據庫的名稱(char *)。當一個環境中只有一個數據庫的時候，這個參數可以給NULL。最后，為每一個圖像創建Datum對象，向對象內寫入數據，然后將其序列化成字符串，將字符串放入數據庫中：
Datum datum;
datum.set_channels(1);
datum.set_height(rows);
datum.set_width(cols);
for (int item_id = 0; item_id < num_items; ++item_id) {
image_file.read(pixels, rows * cols);
label_file.read(&label, 1);
datum.set_data(pixels, rows*cols);
datum.set_label(label);
snprintf(key_cstr, kMaxKeyLength, "%08d", item_id);
datum.SerializeToString(&value);
string keystr(key_cstr);
// Put in db
if (db_backend == "lmdb") { // lmdb
mdb_data.mv_size = value.size();
mdb_data.mv_data = reinterpret_cast<void*>(&value[0]);
mdb_key.mv_size = keystr.size();
mdb_key.mv_data = reinterpret_cast<void*>(&keystr[0]);
CHECK_EQ(mdb_put(mdb_txn, mdb_dbi, &mdb_key, &mdb_data, 0), MDB_SUCCESS)
<< "mdb_put failed";
} else {
LOG(FATAL) << "Unknown db backend " << db_backend;
}
if (++count % 1000 == 0) {
// Commit txn
if (db_backend == "lmdb") { // lmdb
CHECK_EQ(mdb_txn_commit(mdb_txn), MDB_SUCCESS)
<< "mdb_txn_commit failed";
CHECK_EQ(mdb_txn_begin(mdb_env, NULL, 0, &mdb_txn), MDB_SUCCESS)
<< "mdb_txn_begin failed";
} else {
LOG(FATAL) << "Unknown db backend " << db_backend;
}
}
}
放入數據的Key是圖像的編號，前面補0至8位。MDB_val類型的mdb_data和mdb_key中存放的是數據來源的指針，以及數據的長度。mdb_put()函數將數據存入數據庫。每隔1000個圖像commit一次數據庫。只有commit之后，數據才真正寫入磁盤。
讀取數據集:
Caffe中讀取LMDB數據集的代碼是DataLayer，用在網絡的最下層，提供數據。DataLayer采用順序遍歷的方式讀取數據，不支持打亂數據順序，只能隨機跳過前若干個數據。
首先，在DataLayer的DataLayerSetUp方法中，打開數據庫，並獲取迭代器cursor_：
db_.reset(db::GetDB(this->layer_param_.data_param().backend()));
db_->Open(this->layer_param_.data_param().source(), db::READ);
cursor_.reset(db_->NewCursor());
然后，在每一次的數據預取時，InternalThreadEntry()方法中，從數據庫中讀取字符串，反序列化為Datum對象，再從Datum對象中取出數據：
Datum datum;
datum.ParseFromString(cursor_->value());
其中，cursor_->value()獲取序列化后的字符串。datum.ParseFromString()方法對字符串進行反序列化。
最后，要將cursor_向前推進：
cursor_->Next();
if (!cursor_->valid()) {
DLOG(INFO) << "Restarting data prefetching from start."
cursor_->SeekToFirst();
}
如果cursor->valid()返回false，說明數據庫已經遍歷到頭，這時需要將cursor_重置回數據庫開頭。不支持樣本隨機排序應該是DataLayer的致命弱點。如果數據庫的key能夠統一，其實可以通過對key隨機枚舉的方式實現。所以caffe定義了一個隨機生成器RNG。
*/
template<typename Dtype>
void DataTransformer<Dtype>::Transform(const Datum& datum,
Dtype* transformed_data) {
// 參考TransformationParameter的定義
const string& data = datum.data();
const int datum_channels = datum.channels();//數據的channel
const int datum_height = datum.height();//數據的行數
const int datum_width = datum.width();// 數據的列數
const int crop_size = param_.crop_size();// crop大小
const Dtype scale = param_.scale();// 縮放比例
const bool do_mirror = param_.mirror() && Rand(2);// 該參數用於在鏡像位置對數據處理
const bool has_mean_file = param_.has_mean_file();// 是否有均值文件
const bool has_uint8 = data.size() > 0;// 數據是否為uint8還是float類型的
const bool has_mean_values = mean_values_.size() > 0;// 是否有每個channel的均值
// 檢查合法性
CHECK_GT(datum_channels, 0);
CHECK_GE(datum_height, crop_size);
CHECK_GE(datum_width, crop_size);
Dtype* mean = NULL;
/*
前面有介紹這一部分CHECK內容，glog提供了多個便利的宏來處理特定關系的判定。具體有：
1,判定大小關系
CHECK_EQ, CHECK_NE, CHECK_LE, CHECK_LT, CHECK_GE, CHECK_GT，使用這些宏需要注意類型一致，如果出現類型不一致的，可使用static_cast轉換。
2,判定指針是否為空
CHECK_NOTNULL（some_ptr），可用於對象初始化的時候。
3，判定字符串是否相等
CHECK_STREQ, CHECK_STRNE, CHECK_STRCASEEQ,CHECK_STRCASENE。可進行大小寫敏感或不敏感字符串來分別判定。
4，判定浮點是否相等或相近
CHECK_DOUBLE_EQ，CHECK_NEAR。這兩個宏都需要指定一個可容忍的偏差上限。
*/
if (has_mean_file) {// 檢查mean_file是否與數據的參數一致
CHECK_EQ(datum_channels, data_mean_.channels());
CHECK_EQ(datum_height, data_mean_.height());
CHECK_EQ(datum_width, data_mean_.width());
mean = data_mean_.mutable_cpu_data();
}
if (has_mean_values) {
CHECK(mean_values_.size() == 1 || mean_values_.size() == datum_channels) <<
"Specify either 1 mean_value or as many as channels: " << datum_channels;
if (datum_channels > 1 && mean_values_.size() == 1) {
// Replicate the mean_value for simplicity
for (int c = 1; c < datum_channels; ++c) {
mean_values_.push_back(mean_values_[0]);
}
}
}
int height = datum_height;
int width = datum_width;
// 根據是否需要crop來生成h_off和w_off
int h_off = 0;
int w_off = 0;
if (crop_size) {// 如果crop_size不為0
height = crop_size;
width = crop_size;
// We only do random crop when we do training.
// 在訓練的時候隨機crop圖像塊,這里需要自己實現Rand這個函數來確定是如何隨機的
if (phase_ == TRAIN) {
h_off = Rand(datum_height - crop_size + 1);// 產生從0到datum_height - crop_size的隨機數
w_off = Rand(datum_width - crop_size + 1);
} else {// 測試的時候不用隨機，取圖像的中心
h_off = (datum_height - crop_size) / 2;
w_off = (datum_width - crop_size) / 2;
}
}
// 對數據進行變換，主要是將原來的像素值減去均值，然后乘以scale這么一個操作
// 如果需要crop則最終轉換的Blob的大小即為crop*crop
// 如果不是，則最終的Blob大小即為datum_height*datum_width
Dtype datum_element;
int top_index, data_index;
for (int c = 0; c < datum_channels; ++c) {
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
data_index = (c * datum_height + h_off + h) * datum_width + w_off + w;// 獲取數據的索引，我不是很明白怎么計算的？
if (do_mirror) {// 是否需要在鏡像位置轉換
top_index = (c * height + h) * width + (width - 1 - w);//在寬這個坐標上做文章，來實現鏡像
} else {//
top_index = (c * height + h) * width + w;
}
if (has_uint8) {// 數據如果是uint8則進行轉換
datum_element =
static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
} else {// 否則就是float
datum_element = datum.float_data(data_index);
}
if (has_mean_file) {// 如果有mean_file,則原來的像素值減去均值，然后乘以scale
transformed_data[top_index] =
(datum_element - mean[data_index]) * scale;
} else {
if (has_mean_values) {// 否則減去該channel的均值（每個channel有其一個均值），然后乘以scale
transformed_data[top_index] =
(datum_element - mean_values_[c]) * scale;
} else {// 否則如果沒有均值那么就直接乘以scale即可
transformed_data[top_index] = datum_element * scale;
}
}
}
}
}
}
template<typename Dtype>
void DataTransformer<Dtype>::Transform(const Datum& datum,
Blob<Dtype>* transformed_blob) {
// If datum is encoded, decoded and transform the cv::image.
if (datum.encoded()) {// 檢查是否編碼了，如果是則解碼
#ifdef USE_OPENCV
// 先檢查是不是兩個屬性都設置，如果是則說明參數設置有誤
CHECK(!(param_.force_color() && param_.force_gray()))
<< "cannot set both force_color and force_gray";
cv::Mat cv_img;
if (param_.force_color() || param_.force_gray()) {
// 如果強制彩色或者強制灰度圖像一個成立則使用DecodeDatumToCVMat解碼
// If force_color then decode in color otherwise decode in gray.
cv_img = DecodeDatumToCVMat(datum, param_.force_color());
} else {// 否則使用DecodeDatumToCVMatNative解碼
cv_img = DecodeDatumToCVMatNative(datum);
}
// Transform the cv::image into blob.
// 變換
return Transform(cv_img, transformed_blob);
#else
LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV.";
#endif // USE_OPENCV
} else {// 如果沒有編碼則，檢查force_color和force_gray是否設置，如果設置則不合法，因為該選項只適合於編碼后的數據
if (param_.force_color() || param_.force_gray()) {
LOG(ERROR) << "force_color and force_gray only for encoded datum";
}
}
const int crop_size = param_.crop_size();
const int datum_channels = datum.channels();
const int datum_height = datum.height();
const int datum_width = datum.width();
// Check dimensions.
const int channels = transformed_blob->channels();
const int height = transformed_blob->height();
const int width = transformed_blob->width();
const int num = transformed_blob->num();
CHECK_EQ(channels, datum_channels);
CHECK_LE(height, datum_height);
CHECK_LE(width, datum_width);
CHECK_GE(num, 1);
if (crop_size) {
CHECK_EQ(crop_size, height);
CHECK_EQ(crop_size, width);
} else {
CHECK_EQ(datum_height, height);
CHECK_EQ(datum_width, width);
}
// 繼續變換數據
Dtype* transformed_data = transformed_blob->mutable_cpu_data();
Transform(datum, transformed_data);
}
template<typename Dtype>
void DataTransformer<Dtype>::Transform(const vector<Datum> & datum_vector,
Blob<Dtype>* transformed_blob) {
const int datum_num = datum_vector.size();
// 變換到的目標blob的形狀
const int num = transformed_blob->num();
const int channels = transformed_blob->channels();
const int height = transformed_blob->height();
const int width = transformed_blob->width();
CHECK_GT(datum_num, 0) << "There is no datum to add";
CHECK_LE(datum_num, num) <<
"The size of datum_vector must be no greater than transformed_blob->num()";
// 新建一個uni_blob,里面只有一個batch
Blob<Dtype> uni_blob(1, channels, height, width);
for (int item_id = 0; item_id < datum_num; ++item_id) {
int offset = transformed_blob->offset(item_id);
uni_blob.set_cpu_data(transformed_blob->mutable_cpu_data() + offset);
Transform(datum_vector[item_id], &uni_blob);
}
}
#ifdef USE_OPENCV
template<typename Dtype>
void DataTransformer<Dtype>::Transform(const vector<cv::Mat> & mat_vector,
Blob<Dtype>* transformed_blob) {
// 獲取mat的參數
const int mat_num = mat_vector.size();
const int num = transformed_blob->num();
const int channels = transformed_blob->channels();
const int height = transformed_blob->height();
const int width = transformed_blob->width();
CHECK_GT(mat_num, 0) << "There is no MAT to add";
CHECK_EQ(mat_num, num) <<
"The size of mat_vector must be equals to transformed_blob->num()";
// 同上
Blob<Dtype> uni_blob(1, channels, height, width);
for (int item_id = 0; item_id < mat_num; ++item_id) {
int offset = transformed_blob->offset(item_id);
uni_blob.set_cpu_data(transformed_blob->mutable_cpu_data() + offset);
Transform(mat_vector[item_id], &uni_blob);
}
}
// 如果是圖像的話，需要減去均值乘以scale，判斷是不是需要做鏡像處理
// 邏輯與前面類似
template<typename Dtype>
void DataTransformer<Dtype>::Transform(const cv::Mat& cv_img,
Blob<Dtype>* transformed_blob) {
const int crop_size = param_.crop_size();
const int img_channels = cv_img.channels();
const int img_height = cv_img.rows;
const int img_width = cv_img.cols;
// Check dimensions.
const int channels = transformed_blob->channels();
const int height = transformed_blob->height();
const int width = transformed_blob->width();
const int num = transformed_blob->num();
CHECK_EQ(channels, img_channels);
CHECK_LE(height, img_height);
CHECK_LE(width, img_width);
CHECK_GE(num, 1);
CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte";
const Dtype scale = param_.scale();
const bool do_mirror = param_.mirror() && Rand(2);
const bool has_mean_file = param_.has_mean_file();
const bool has_mean_values = mean_values_.size() > 0;
CHECK_GT(img_channels, 0);
CHECK_GE(img_height, crop_size);
CHECK_GE(img_width, crop_size);
Dtype* mean = NULL;
if (has_mean_file) {
CHECK_EQ(img_channels, data_mean_.channels());
CHECK_EQ(img_height, data_mean_.height());
CHECK_EQ(img_width, data_mean_.width());
mean = data_mean_.mutable_cpu_data();
}
if (has_mean_values) {
CHECK(mean_values_.size() == 1 || mean_values_.size() == img_channels) <<
"Specify either 1 mean_value or as many as channels: " << img_channels;
if (img_channels > 1 && mean_values_.size() == 1) {
// Replicate the mean_value for simplicity
for (int c = 1; c < img_channels; ++c) {
mean_values_.push_back(mean_values_[0]);
}
}
}
int h_off = 0;
int w_off = 0;
cv::Mat cv_cropped_img = cv_img;
if (crop_size) {
CHECK_EQ(crop_size, height);
CHECK_EQ(crop_size, width);
// We only do random crop when we do training.
if (phase_ == TRAIN) {
h_off = Rand(img_height - crop_size + 1);
w_off = Rand(img_width - crop_size + 1);
} else {
h_off = (img_height - crop_size) / 2;
w_off = (img_width - crop_size) / 2;
}
cv::Rect roi(w_off, h_off, crop_size, crop_size);
cv_cropped_img = cv_img(roi);
} else {
CHECK_EQ(img_height, height);
CHECK_EQ(img_width, width);
}
CHECK(cv_cropped_img.data);
Dtype* transformed_data = transformed_blob->mutable_cpu_data();
int top_index;
for (int h = 0; h < height; ++h) {
const uchar* ptr = cv_cropped_img.ptr<uchar>(h);
int img_index = 0;
for (int w = 0; w < width; ++w) {
for (int c = 0; c < img_channels; ++c) {
if (do_mirror) {
top_index = (c * height + h) * width + (width - 1 - w);
} else {
top_index = (c * height + h) * width + w;
}
// int top_index = (c * height + h) * width + w;
Dtype pixel = static_cast<Dtype>(ptr[img_index++]);
if (has_mean_file) {
int mean_index = (c * img_height + h_off + h) * img_width + w_off + w;
transformed_data[top_index] =
(pixel - mean[mean_index]) * scale;
} else {
if (has_mean_values) {
transformed_data[top_index] =
(pixel - mean_values_[c]) * scale;
} else {
transformed_data[top_index] = pixel * scale;
}
}
}
}
}
}
#endif // USE_OPENCV
template<typename Dtype>
void DataTransformer<Dtype>::Transform(Blob<Dtype>* input_blob,
Blob<Dtype>* transformed_blob) {
const int crop_size = param_.crop_size();
const int input_num = input_blob->num();
const int input_channels = input_blob->channels();
const int input_height = input_blob->height();
const int input_width = input_blob->width();
if (transformed_blob->count() == 0) {
// Initialize transformed_blob with the right shape.
if (crop_size) {
transformed_blob->Reshape(input_num, input_channels,
crop_size, crop_size);
} else {
transformed_blob->Reshape(input_num, input_channels,
input_height, input_width);
}
}
const int num = transformed_blob->num();
const int channels = transformed_blob->channels();
const int height = transformed_blob->height();
const int width = transformed_blob->width();
const int size = transformed_blob->count();
CHECK_LE(input_num, num);
CHECK_EQ(input_channels, channels);
CHECK_GE(input_height, height);
CHECK_GE(input_width, width);
const Dtype scale = param_.scale();
const bool do_mirror = param_.mirror() && Rand(2);
const bool has_mean_file = param_.has_mean_file();
const bool has_mean_values = mean_values_.size() > 0;
int h_off = 0;
int w_off = 0;
if (crop_size) {
CHECK_EQ(crop_size, height);
CHECK_EQ(crop_size, width);
// We only do random crop when we do training.
if (phase_ == TRAIN) {
h_off = Rand(input_height - crop_size + 1);
w_off = Rand(input_width - crop_size + 1);
} else {
h_off = (input_height - crop_size) / 2;
w_off = (input_width - crop_size) / 2;
}
} else {
CHECK_EQ(input_height, height);
CHECK_EQ(input_width, width);
}
// 如果有均值文件則
Dtype* input_data = input_blob->mutable_cpu_data();
if (has_mean_file) {
CHECK_EQ(input_channels, data_mean_.channels());
CHECK_EQ(input_height, data_mean_.height());
CHECK_EQ(input_width, data_mean_.width());
for (int n = 0; n < input_num; ++n) {
int offset = input_blob->offset(n);
/*
template <typename Dtype>
void caffe_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y);
math_function中定義的caffe_sub目的是矩陣相減input_data（以offset開始的矩陣） = input_data（以offset開始的矩陣） - data_mean_
*/
caffe_sub(data_mean_.count(), input_data + offset,
data_mean_.cpu_data(), input_data + offset);
}
}
// 如果每個channel有均值則
if (has_mean_values) {
CHECK(mean_values_.size() == 1 || mean_values_.size() == input_channels) <<
"Specify either 1 mean_value or as many as channels: " << input_channels;
if (mean_values_.size() == 1) {
caffe_add_scalar(input_blob->count(), -(mean_values_[0]), input_data);
} else {
for (int n = 0; n < input_num; ++n) {
for (int c = 0; c < input_channels; ++c) {
int offset = input_blob->offset(n, c);
// 給nput_data[offset]地址開始的每一個元素加上一個-mean_values_[c]
caffe_add_scalar(input_height * input_width, -(mean_values_[c]),
input_data + offset);
}
}
}
}
// 如果啥均值都沒有則直接復制
Dtype* transformed_data = transformed_blob->mutable_cpu_data();
for (int n = 0; n < input_num; ++n) {
int top_index_n = n * channels;
int data_index_n = n * channels;
for (int c = 0; c < channels; ++c) {
int top_index_c = (top_index_n + c) * height;
int data_index_c = (data_index_n + c) * input_height + h_off;
for (int h = 0; h < height; ++h) {
int top_index_h = (top_index_c + h) * width;
int data_index_h = (data_index_c + h) * input_width + w_off;
if (do_mirror) {
int top_index_w = top_index_h + width - 1;
for (int w = 0; w < width; ++w) {
transformed_data[top_index_w-w] = input_data[data_index_h + w];
}
} else {
for (int w = 0; w < width; ++w) {
transformed_data[top_index_h + w] = input_data[data_index_h + w];
}
}
}
}
}
if (scale != Dtype(1)) {
DLOG(INFO) << "Scale: " << scale;
caffe_scal(size, scale, transformed_data);
}
}
template<typename Dtype>
vector<int> DataTransformer<Dtype>::InferBlobShape(const Datum& datum) {
if (datum.encoded()) {
#ifdef USE_OPENCV // 如果使用OpenCV則可以用先轉換為CVMat，然后在推斷blob的形狀
CHECK(!(param_.force_color() && param_.force_gray()))
<< "cannot set both force_color and force_gray";
cv::Mat cv_img;
if (param_.force_color() || param_.force_gray()) {
// If force_color then decode in color otherwise decode in gray.
cv_img = DecodeDatumToCVMat(datum, param_.force_color());
} else {
cv_img = DecodeDatumToCVMatNative(datum);
}
// InferBlobShape using the cv::image.
return InferBlobShape(cv_img);
#else
LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV.";
#endif // USE_OPENCV
}
// 否則直接粗暴地從datum里面獲取形狀的數據
const int crop_size = param_.crop_size();
const int datum_channels = datum.channels();
const int datum_height = datum.height();
const int datum_width = datum.width();
// Check dimensions.
CHECK_GT(datum_channels, 0);
CHECK_GE(datum_height, crop_size);
CHECK_GE(datum_width, crop_size);
// Build BlobShape.
vector<int> shape(4);
shape[0] = 1;
shape[1] = datum_channels;
shape[2] = (crop_size)? crop_size: datum_height;
shape[3] = (crop_size)? crop_size: datum_width;
return shape;
}
template<typename Dtype>
vector<int> DataTransformer<Dtype>::InferBlobShape(
const vector<Datum> & datum_vector) {
const int num = datum_vector.size();
CHECK_GT(num, 0) << "There is no datum to in the vector";
// Use first datum in the vector to InferBlobShape.
// 使用第一個來進行推斷
vector<int> shape = InferBlobShape(datum_vector[0]);
// Adjust num to the size of the vector.
shape[0] = num;
return shape;
}
#ifdef USE_OPENCV
// 如果使用OpenCV
// 使用CVMat中的信息來推斷形狀
template<typename Dtype>
vector<int> DataTransformer<Dtype>::InferBlobShape(const cv::Mat& cv_img) {
const int crop_size = param_.crop_size();
const int img_channels = cv_img.channels();
const int img_height = cv_img.rows;
const int img_width = cv_img.cols;
// Check dimensions.
CHECK_GT(img_channels, 0);
CHECK_GE(img_height, crop_size);
CHECK_GE(img_width, crop_size);
// Build BlobShape.
vector<int> shape(4);
shape[0] = 1;
shape[1] = img_channels;
shape[2] = (crop_size)? crop_size: img_height;
shape[3] = (crop_size)? crop_size: img_width;
return shape;
}
template<typename Dtype>
vector<int> DataTransformer<Dtype>::InferBlobShape(
const vector<cv::Mat> & mat_vector) {
const int num = mat_vector.size();
CHECK_GT(num, 0) << "There is no cv_img to in the vector";
// Use first cv_img in the vector to InferBlobShape.
// 使用第一個來推斷
vector<int> shape = InferBlobShape(mat_vector[0]);
// Adjust num to the size of the vector.
shape[0] = num;
return shape;
}
#endif // USE_OPENCV
// 初始化隨機數種子
template <typename Dtype>
void DataTransformer<Dtype>::InitRand() {
// 要么需要鏡像要么訓練階段和需要crop同時滿足的情況下才初始化隨機數種子
const bool needs_rand = param_.mirror() ||
(phase_ == TRAIN && param_.crop_size());
if (needs_rand) {
const unsigned int rng_seed = caffe_rng_rand();// 獲得隨機數種子（通過熵池或者時間生成種子）
rng_.reset(new Caffe::RNG(rng_seed));//初始化隨機數種子並實例化隨機數生成器
} else {
rng_.reset();//否則隨機數生成器設置為空
}
}
// 產生從0到n的隨機數
template <typename Dtype>
int DataTransformer<Dtype>::Rand(int n) {
CHECK(rng_);
CHECK_GT(n, 0);
caffe::rng_t* rng =
static_cast<caffe::rng_t*>(rng_->generator());
return ((*rng)() % n);
}
INSTANTIATE_CLASS(DataTransformer);
/*
初始化類的宏定義是這樣的，前面有講過，這里再給出來
#define INSTANTIATE_CLASS(classname) \
char gInstantiationGuard##classname; \
template class classname<float>; \
template class classname<double>
*/
} // namespace caffe

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 梳理caffe代碼layer(五) 梳理caffe代碼blob(三) 梳理caffe代碼common(八) Caffe學習系列（二）Caffe代碼結構梳理，及相關知識點歸納 Caffe框架詳細梳理 transformer的pytorch代碼講解 Transformer(self attention pytorch)代碼 pytorch實現的transformer代碼分析 [NLP] The Annotated Transformer 代碼修正 Transformer解析與tensorflow代碼解讀