均值削減是數據預處理中常見的處理方式,按照之前在學習ufldl教程PCA的一章時,對於圖像介紹了兩種:第一種常用的方式叫做dimension_mean(個人命名),是依據輸入數據的維度,每個維度內進行削減,這個也是常見的做法;第二種叫做per_image_mean,ufldl教程上說,在natural images上訓練網絡時;給每個像素(這里只每個dimension)計算一個獨立的均值和方差是make little sense的;這是因為圖像本身具有統計不變性,即在圖像的一部分的統計特性和另一部分相同。作者最后建議,如果你訓練你的算法在非natural images(如mnist,或者在白背景存在單個獨立的物體),其他類型的規則化是值得考慮的。但是當在natural images上訓練時,per_image_mean是一個合理的默認選擇。
本文中在imagenet數據集上采用的是dimension_mean的方法。
一:程序開始
make_image_mean.sh文件調用代碼:
- EXAMPLE=examples/imagenet
- DATA=data/ilsvrc12
- TOOLS=build/tools
- $TOOLS/compute_image_mean $EXAMPLE/ilsvrc12_train_lmdb \
- $DATA/imagenet_mean.binaryproto<strong>
- </strong>
二:make_image_mean.cpp函數分析
輸入參數:lmdb文件 均值文件imagenet_mean.binaryproto
2.1 頭文件分析
- #include<stdint.h>//定義了幾種擴展的整數類型和宏
- #include<algorithm>//輸出數組的內容、對數組進行排序、反轉數組內容、復制數組內容等操作,
- #include<string>
- #include<utility>//utility頭文件定義了一個pair類型,pair類型用於存儲一對數據;它也提供一些常用的便利函數、或類、或模板。大小求值、值交換:min、max和swap。
- #include<vector>//可以自動擴展容量的數組
- #include"boost/scoped_ptr.hpp"
- #include"gflags/gflags.h"
- #include"glog/logging.h"
- #include"caffe/proto/caffe.pb.h"
- #include"caffe/util/db.hpp"//引入包裝好的lmdb操作函數
- #include"caffe/util/io.hpp"//引入opencv中的圖像操作函數
- usingnamespacecaffe; //引入caffe命名空間
- usingstd::max;//
- usingstd::pair;
- using boost::scoped_ptr;
2.2 gflags宏定義string變量
DEFINE_string(backend, "lmdb","The backend {leveldb, lmdb} containing theimages");
2.3 main函數分析
2.3.1 lmdb數據操作
- scoped_ptr<db::DB>db(db::GetDB(FLAGS_backend));
- db->Open(argv[1], db::READ);//只讀的方式打開lmdb文件
- scoped_ptr<db::Cursor> cursor(db->NewCursor());
- //lmdb數據庫的“光標”文件,一個光標保存一個從數據庫根目錄到數據庫文件的路徑;A cursorholds a path of (page pointer, key index) from the DB root to a position in theDB, plus other state.
- BlobProtosum_blob;//聲明blob變量;這個BlobProto在哪里定義的,沒有找到;感覺應該在caffe.pb.h中定義的,因為db.cpp和io.cpp中沒有找到
- int count = 0;
- // load first datum
- Datum datum;
- datum.ParseFromString(cursor->value());//這個cursor.value,感覺返回的應該是lmdb中存儲的第一個鍵值對數據
每個blob對象,為一個4維的數組,分別為image_num*channels*height*width
- sum_blob.set_num(1);//設置圖片的個數
- sum_blob.set_channels(datum.channels());
- sum_blob.set_height(datum.height());
- sum_blob.set_width(datum.width());
- constintdata_size = datum.channels() *datum.height() * datum.width();//每張圖片的尺寸
- intsize_in_datum = std::max<int>(datum.data().size(),datum.float_data_size());
- for (inti= 0; i<size_in_datum; ++i) {
- sum_blob.add_data(0.);//設置初值為float型的0.0
- }
- while (cursor->valid()) {//如果cursor是有效的
- Datum datum;
- datum.ParseFromString(cursor->value());//解析cuisor.value返回的字符串值,到datum
- DecodeDatumNative(&datum);//感覺是把datum中字符串類型的值,變成相應的類型
- conststd::string& data =datum.data();//利用data來引用datum.data
- size_in_datum = std::max<int>(datum.data().size(),datum.float_data_size());
- CHECK_EQ(size_in_datum,data_size) <<"Incorrect data field size"<<size_in_datum;
- if (data.size() != 0) {//datum.data().size()!=0
- CHECK_EQ(data.size(),size_in_datum);//判斷是否相等
- for (inti= 0; i<size_in_datum; ++i) {
- sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]);//對應位置的像素值相加(uin8_t類型相加),相加的結果放在sum_blob中
- }
- } else{
- CHECK_EQ(datum.float_data_size(), size_in_datum);
- for (inti= 0; i<size_in_datum; ++i) {
- sum_blob.set_data(i, sum_blob.data(i) +
- static_cast<float>(datum.float_data(i)));//對應位置的像素值相加(float類型相加)
- }
- }
- ++count;
- if (count % 10000 == 0) {
- LOG(INFO) <<"Processed "<<count <<" files.";
- }
- cursor->Next();//光標下移(指針),指向下一個存儲在lmdb中的數據
- }
- for (inti= 0; i<sum_blob.data_size(); ++i) {
- sum_blob.set_data(i, sum_blob.data(i) / count);
- }
- // Write to disk
- if (argc == 3) {
- LOG(INFO) <<"Write to "<<argv[2];
- WriteProtoToBinaryFile(sum_blob, argv[2]);
- }
- constint channels = sum_blob.channels();
- constint dim = sum_blob.height() *sum_blob.width();
- std::vector<float>mean_values(channels,0.0);//容量為3的數組,初始值為0.0
- LOG(INFO) <<"Number of channels:"<< channels;
- for (intc = 0; c < channels; ++c) {
- for (inti= 0; i< dim; ++i) {
- mean_values[c] += sum_blob.data(dim * c + i);
- }
- LOG(INFO) <<"mean_value channel["<< c <<"]:"<<mean_values[c]/ dim;
- }
compute_image_mean.cpp
- #include <stdint.h>
- #include <algorithm>
- #include <string>
- #include <utility>
- #include <vector>
- #include "boost/scoped_ptr.hpp"
- #include "gflags/gflags.h"
- #include "glog/logging.h"
- #include "caffe/proto/caffe.pb.h"
- #include "caffe/util/db.hpp"
- #include "caffe/util/io.hpp"
- using namespace caffe; // NOLINT(build/namespaces)
- using std::max;
- using std::pair;
- using boost::scoped_ptr;
- DEFINE_string(backend, "lmdb",
- "The backend {leveldb, lmdb} containing the images");
- int main(int argc, char** argv) {
- ::google::InitGoogleLogging(argv[0]);
- #ifndef GFLAGS_GFLAGS_H_
- namespace gflags = google;
- #endif
- gflags::SetUsageMessage("Compute the mean_image of a set of images given by"
- " a leveldb/lmdb\n"
- "Usage:\n"
- " compute_image_mean [FLAGS] INPUT_DB [OUTPUT_FILE]\n");
- gflags::ParseCommandLineFlags(&argc, &argv, true);
- if (argc < 2 || argc > 3) {
- gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/compute_image_mean");
- return 1;
- }
- scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend));
- db->Open(argv[1], db::READ);
- scoped_ptr<db::Cursor> cursor(db->NewCursor());
- BlobProto sum_blob;
- int count = 0;
- // load first datum
- Datum datum;
- datum.ParseFromString(cursor->value());
- if (DecodeDatumNative(&datum)) {
- LOG(INFO) << "Decoding Datum";
- }
- sum_blob.set_num(1);
- sum_blob.set_channels(datum.channels());
- sum_blob.set_height(datum.height());
- sum_blob.set_width(datum.width());
- const int data_size = datum.channels() * datum.height() * datum.width();
- int size_in_datum = std::max<int>(datum.data().size(),datum.float_data_size());
- for (int i = 0; i < size_in_datum; ++i) {
- sum_blob.add_data(0.);//設置初值為float型的0.0
- }
- LOG(INFO) << "Starting Iteration";
- while (cursor->valid()) {//如果cursor是有效的
- Datum datum;
- datum.ParseFromString(cursor->value());//解析cuisor.value返回的字符串值,到datum
- DecodeDatumNative(&datum);
- const std::string& data = datum.data();//利用data來引用datum.data
- size_in_datum = std::max<int>(datum.data().size(),datum.float_data_size());
- CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " <<size_in_datum;
- if (data.size() != 0) {
- CHECK_EQ(data.size(), size_in_datum);
- for (int i = 0; i < size_in_datum; ++i) {
- sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]);
- }
- } else {
- CHECK_EQ(datum.float_data_size(), size_in_datum);
- for (int i = 0; i < size_in_datum; ++i) {
- sum_blob.set_data(i, sum_blob.data(i) +
- static_cast<float>(datum.float_data(i)));
- }
- }
- ++count;
- if (count % 10000 == 0) {
- LOG(INFO) << "Processed " << count << " files.";
- }
- cursor->Next();
- }
- if (count % 10000 != 0) {
- LOG(INFO) << "Processed " << count << " files.";
- }
- for (int i = 0; i < sum_blob.data_size(); ++i) {
- sum_blob.set_data(i, sum_blob.data(i) / count);
- }
- // Write to disk
- if (argc == 3) {
- LOG(INFO) << "Write to " << argv[2];
- WriteProtoToBinaryFile(sum_blob, argv[2]);
- }
- const int channels = sum_blob.channels();
- const int dim = sum_blob.height() * sum_blob.width();
- std::vector<float> mean_values(channels, 0.0);
- LOG(INFO) << "Number of channels: " << channels;
- for (int c = 0; c < channels; ++c) {
- for (int i = 0; i < dim; ++i) {
- mean_values[c] += sum_blob.data(dim * c + i);
- }
- LOG(INFO) << "mean_value channel [" << c << "]:" << mean_values[c] / dim;
- }
- return 0;
- }