(1) focal loss
<1> focal loss的函數形式為:
(1)
其中,zk為softmax的輸入,f(zk)為softmax的輸出,-log(f(zk))為softmaxloss, alpha和gamma為focal loss超參。
<2> focal loss對其輸入zj求導:
根據鏈式法則有:(2)
下面分別對(2)式中的兩項求導:
<a> 第一項:
(3)
<b> 第二項:
(4)
<c> (3)(4)項合並有(加上(1)式中的負號):
(5)
(5)式中黑色框為softmax的loss反傳公式。
(2) focal_loss_layer.cpp中的Forward_cpu函數:
1 Dtype loss = 0; 2 for (int i = 0; i < outer_num_; ++i) { 3 for (int j = 0; j < inner_num_; j++) { 4 const int label_value = static_cast<int>(label[i * inner_num_ + j]); 5 if (has_ignore_label_ && label_value == ignore_label_) { 6 continue; 7 } 8 DCHECK_GE(label_value, 0); 9 DCHECK_LT(label_value, channels); 10 const int index = i * dim + label_value * inner_num_ + j;
//power_prob_ blob就是公式(1)中的第一項,log_prob_是第二項 14 loss -= power_prob_data[index] * log_prob_data[index]; 15 ++count; 16 } 17 }
(3) focal_loss_layer.cpp中的Backward_cpu函數:
1 for (int i = 0; i < outer_num_; ++i) { 2 for (int j = 0; j < inner_num_; ++j) { 3 // label 4 const int label_value = static_cast<int>(label[i * inner_num_ + j]); 5 6 // ignore label 7 if (has_ignore_label_ && label_value == ignore_label_) { 8 for (int c = 0; c < channels; ++c) { 9 bottom_diff[i * dim + c * inner_num_ + j] = 0; 10 } 11 continue; 12 }
//對於每個樣本的channel,ind_i是label索引對應的channel中元素的值 13 int ind_i = i * dim + label_value * inner_num_ + j;
//grad就是(5)式中的第一項,prob_ blob為softmax的輸出,log_prob_為對其求log 14 Dtype grad = 0 - gamma_ * (power_prob_data[ind_i] / std::max(1 - prob_data[ind_i], eps)) 15 * log_prob_data[ind_i] * prob_data[ind_i] 16 + power_prob_data[ind_i]; 17 for (int c = 0; c < channels; ++c) { 18 int ind_j = i * dim + c * inner_num_ + j; 19 if(c == label_value) { 20 CHECK_EQ(ind_i, ind_j);
//對應公式(5) 21 bottom_diff[ind_j] = grad * (prob_data[ind_i] - 1); 22 } else { 23 bottom_diff[ind_j] = grad * prob_data[ind_j]; 24 } 25 } 26 ++count; 27 } 28 }