1、在src\caffe\proto\caffe.proto中搜索message LayerParameter,在optional ReLUParameter relu_param = 123之后添加optional ReLU6Parameter relu6_param = 208; (最后的分號別忘了)
2、搜索message ReLUParameter,在這個ReLUParameter實現結構之后添加
// Message that stores parameters used by ReLU6Layer
message ReLU6Parameter {
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 2 [default = DEFAULT];
}
支持proto頭文件修改完畢,接下來添加所需的頭文件和實現文件。
1.在blob/ssd/include/caffe/layers文件夾下新建relu6_layer.hpp,將
#ifndef CAFFE_RELU_LAYER_HPP_ #define CAFFE_RELU_LAYER_HPP_ #include <vector> #include "caffe/blob.hpp" #include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/layers/neuron_layer.hpp" namespace caffe { /** * @brief Rectified Linear Unit non-linearity @f$ y = \min(6, \max(0, x)) @f$. * The simple max is fast to compute, and the function does not saturate. */ template <typename Dtype> class ReLU6Layer : public NeuronLayer<Dtype> { public: /** * @param param provides ReLUParameter relu_param, * with ReLULayer options: * - negative_slope (\b optional, default 0). * the value @f$ \nu @f$ by which negative values are multiplied. */ explicit ReLU6Layer(const LayerParameter& param) : NeuronLayer<Dtype>(param) {} virtual inline const char* type() const { return "ReLU6"; } protected: /** * @param bottom input Blob vector (length 1) * -# @f$ (N \times C \times H \times W) @f$ * the inputs @f$ x @f$ * @param top output Blob vector (length 1) * -# @f$ (N \times C \times H \times W) @f$ * the computed outputs @f$ * y = \max(0, x) * @f$ by default. If a non-zero negative_slope @f$ \nu @f$ is provided, * the computed outputs are @f$ y = \max(0, x) + \nu \min(0, x) @f$. */ virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); /** * @brief Computes the error gradient w.r.t. the ReLU inputs. * * @param top output Blob vector (length 1), providing the error gradient with * respect to the outputs * -# @f$ (N \times C \times H \times W) @f$ * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ * with respect to computed outputs @f$ y @f$ * @param propagate_down see Layer::Backward. * @param bottom input Blob vector (length 1) * -# @f$ (N \times C \times H \times W) @f$ * the inputs @f$ x @f$; Backward fills their diff with * gradients @f$ * \frac{\partial E}{\partial x} = \left\{ * \begin{array}{lr} * 0 & \mathrm{if} \; x \le 0 \\ * \frac{\partial E}{\partial y} & \mathrm{if} \; x > 0 * \end{array} \right. * @f$ if propagate_down[0], by default. * If a non-zero negative_slope @f$ \nu @f$ is provided, * the computed gradients are @f$ * \frac{\partial E}{\partial x} = \left\{ * \begin{array}{lr} * \nu \frac{\partial E}{\partial y} & \mathrm{if} \; x \le 0 \\ * \frac{\partial E}{\partial y} & \mathrm{if} \; x > 0 * \end{array} \right. * @f$. */ virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); }; } // namespace caffe #endif // CAFFE_RELU_LAYER_HPP_
2.在blob/ssd/src/caffe/layers文件夾下新建relu6_layer.cpp,將
#include <algorithm> #include <vector> #include "caffe/layers/relu6_layer.hpp" namespace caffe { template <typename Dtype> void ReLU6Layer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const int count = bottom[0]->count(); for (int i = 0; i < count; ++i) { top_data[i] = std::min(std::max(bottom_data[i], Dtype(0)), Dtype(6)); } } template <typename Dtype> void ReLU6Layer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[0]) { const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const int count = bottom[0]->count(); for (int i = 0; i < count; ++i) { bottom_diff[i] = top_diff[i] * ((bottom_data[i] > 0 && bottom_data[i] < 6)); } } } #ifdef CPU_ONLY STUB_GPU(ReLU6Layer); #endif INSTANTIATE_CLASS(ReLU6Layer); REGISTER_LAYER_CLASS(ReLU6); } // namespace caffe
3.在blob/ssd/src/caffe/layers文件夾下新建relu6_layer.cu,將
#include <algorithm> #include <vector> #include "caffe/layers/relu6_layer.hpp" namespace caffe { template <typename Dtype> __global__ void ReLU6Forward(const int n, const Dtype* in, Dtype* out) { CUDA_KERNEL_LOOP(index, n) { out[index] = in[index] < 0 ? 0: (in[index] > 6 ? 6 : in[index]); } } template <typename Dtype> void ReLU6Layer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = top[0]->mutable_gpu_data(); const int count = bottom[0]->count(); // NOLINT_NEXT_LINE(whitespace/operators) ReLU6Forward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( count, bottom_data, top_data); CUDA_POST_KERNEL_CHECK; // << " count: " << count << " bottom_data: " // << (unsigned long)bottom_data // << " top_data: " << (unsigned long)top_data // << " blocks: " << CAFFE_GET_BLOCKS(count) // << " threads: " << CAFFE_CUDA_NUM_THREADS; } template <typename Dtype> __global__ void ReLU6Backward(const int n, const Dtype* in_diff, const Dtype* in_data, Dtype* out_diff) { CUDA_KERNEL_LOOP(index, n) { out_diff[index] = in_diff[index] * ((in_data[index] > 0) && (in_data[index] < 6)); } } template <typename Dtype> void ReLU6Layer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[0]) { const Dtype* bottom_data = bottom[0]->gpu_data(); const Dtype* top_diff = top[0]->gpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); const int count = bottom[0]->count(); // NOLINT_NEXT_LINE(whitespace/operators) ReLU6Backward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( count, top_diff, bottom_data, bottom_diff); CUDA_POST_KERNEL_CHECK; } } INSTANTIATE_LAYER_GPU_FUNCS(ReLU6Layer); } // namespace caffe
重新編譯ssd。