《SIFT原理與源碼分析》系列文章索引:http://www.cnblogs.com/tianyalu/p/5467813.html
由前一篇《
方向賦值》,為找到的關鍵點即SIFT特征點賦了值,包含位置、尺度和方向的信息。接下來的步驟是關鍵點描述,即用用一組向量將這個關鍵點描述出來,這個描述子不但包括關鍵點,也包括關鍵點周圍對其有貢獻的像素點。用來作為
目標匹配的依據(所以描述子應該有較高的獨特性,以保證匹配率),也可使關鍵點具有更多的不變特性,如光照變化、3D視點變化等。
SIFT描述子h(x,y,θ)是對關鍵點附近鄰域內高斯圖像梯度統計的結果,是一個三維矩陣,但通常用一個矢量來表示。矢量通過對三維矩陣按一定規律排列得到。
描述子采樣區域
特征描述子與關鍵點所在尺度有關,因此對梯度的求取應在特征點對應的高斯圖像上進行。將關鍵點附近划分成d×d個子區域,每個子區域尺寸為mσ個像元(d=4,m=3,σ為尺特征點的尺度值)。考慮到實際計算時需要雙線性插值,故計算的圖像區域為mσ(d+1),再考慮旋轉,則實際計算的圖像區域為
,如下圖所示:


源碼
Point pt(cvRound(ptf.x), cvRound(ptf.y)); //計算余弦,正弦,CV_PI/180:將角度值轉化為幅度值 float cos_t = cosf(ori*(float)(CV_PI/180)); float sin_t = sinf(ori*(float)(CV_PI/180)); float bins_per_rad = n / 360.f; float exp_scale = -1.f/(d * d * 0.5f); //d:SIFT_DESCR_WIDTH 4 float hist_width = SIFT_DESCR_SCL_FCTR * scl; // SIFT_DESCR_SCL_FCTR: 3 // scl: size*0.5f // 計算圖像區域半徑mσ(d+1)/2*sqrt(2) // 1.4142135623730951f 為根號2 int radius = cvRound(hist_width * 1.4142135623730951f * (d + 1) * 0.5f); cos_t /= hist_width; sin_t /= hist_width;
區域坐標軸旋轉
為了保證特征矢量具有旋轉不變性,要以特征點為中心,在附近鄰域內旋轉θ角,即旋轉為特征點的方向。
旋轉后區域內采樣點新的坐標為:

源碼
//計算采樣區域點坐標旋轉 for( i = -radius, k = 0; i <= radius; i++ ) for( j = -radius; j <= radius; j++ ) { /* Calculate sample's histogram array coords rotated relative to ori. Subtract 0.5 so samples that fall e.g. in the center of row 1 (i.e. r_rot = 1.5) have full weight placed in row 1 after interpolation. */ float c_rot = j * cos_t - i * sin_t; float r_rot = j * sin_t + i * cos_t; float rbin = r_rot + d/2 - 0.5f; float cbin = c_rot + d/2 - 0.5f; int r = pt.y + i, c = pt.x + j; if( rbin > -1 && rbin < d && cbin > -1 && cbin < d && r > 0 && r < rows - 1 && c > 0 && c < cols - 1 ) { float dx = (float)(img.at<short>(r, c+1) - img.at<short>(r, c-1)); float dy = (float)(img.at<short>(r-1, c) - img.at<short>(r+1, c)); X[k] = dx; Y[k] = dy; RBin[k] = rbin; CBin[k] = cbin; W[k] = (c_rot * c_rot + r_rot * r_rot)*exp_scale; k++; } }
計算采樣區域梯度直方圖
將旋轉后區域划分為d×d個子區域(每個區域間隔為mσ像元),在子區域內計算8個方向的梯度直方圖,繪制每個方向梯度方向的累加值,形成一個種子點。
與求主方向不同的是,此時,每個子區域梯度方向直方圖將0°~360°划分為8個方向區間,每個區間為45°。即每個種子點有8個方向區間的梯度強度信息。由於存在d×d,即4×4個子區域,所以最終共有4×4×8=128個數據,形成128維SIFT特征矢量。

對特征矢量需要加權處理,加權采用mσd/2的標准高斯函數。為了除去光照變化影響,還有一步歸一化處理。
源碼
//計算梯度直方圖 for( k = 0; k < len; k++ ) { float rbin = RBin[k], cbin = CBin[k]; float obin = (Ori[k] - ori)*bins_per_rad; float mag = Mag[k]*W[k]; int r0 = cvFloor( rbin ); int c0 = cvFloor( cbin ); int o0 = cvFloor( obin ); rbin -= r0; cbin -= c0; obin -= o0; //n為SIFT_DESCR_HIST_BINS:8,即將360°分為8個區間 if( o0 < 0 ) o0 += n; if( o0 >= n ) o0 -= n; // histogram update using tri-linear interpolation // 雙線性插值 float v_r1 = mag*rbin, v_r0 = mag - v_r1; float v_rc11 = v_r1*cbin, v_rc10 = v_r1 - v_rc11; float v_rc01 = v_r0*cbin, v_rc00 = v_r0 - v_rc01; float v_rco111 = v_rc11*obin, v_rco110 = v_rc11 - v_rco111; float v_rco101 = v_rc10*obin, v_rco100 = v_rc10 - v_rco101; float v_rco011 = v_rc01*obin, v_rco010 = v_rc01 - v_rco011; float v_rco001 = v_rc00*obin, v_rco000 = v_rc00 - v_rco001; int idx = ((r0+1)*(d+2) + c0+1)*(n+2) + o0; hist[idx] += v_rco000; hist[idx+1] += v_rco001; hist[idx+(n+2)] += v_rco010; hist[idx+(n+3)] += v_rco011; hist[idx+(d+2)*(n+2)] += v_rco100; hist[idx+(d+2)*(n+2)+1] += v_rco101; hist[idx+(d+3)*(n+2)] += v_rco110; hist[idx+(d+3)*(n+2)+1] += v_rco111; }
關鍵點描述源碼
// SIFT關鍵點特征描述 // SIFT描述子是關鍵點領域高斯圖像提取統計結果的一種表示 static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float scl, int d, int n, float* dst ) { Point pt(cvRound(ptf.x), cvRound(ptf.y)); //計算余弦,正弦,CV_PI/180:將角度值轉化為幅度值 float cos_t = cosf(ori*(float)(CV_PI/180)); float sin_t = sinf(ori*(float)(CV_PI/180)); float bins_per_rad = n / 360.f; float exp_scale = -1.f/(d * d * 0.5f); //d:SIFT_DESCR_WIDTH 4 float hist_width = SIFT_DESCR_SCL_FCTR * scl; // SIFT_DESCR_SCL_FCTR: 3 // scl: size*0.5f // 計算圖像區域半徑mσ(d+1)/2*sqrt(2) // 1.4142135623730951f 為根號2 int radius = cvRound(hist_width * 1.4142135623730951f * (d + 1) * 0.5f); cos_t /= hist_width; sin_t /= hist_width; int i, j, k, len = (radius*2+1)*(radius*2+1), histlen = (d+2)*(d+2)*(n+2); int rows = img.rows, cols = img.cols; AutoBuffer<float> buf(len*6 + histlen); float *X = buf, *Y = X + len, *Mag = Y, *Ori = Mag + len, *W = Ori + len; float *RBin = W + len, *CBin = RBin + len, *hist = CBin + len; //初始化直方圖 for( i = 0; i < d+2; i++ ) { for( j = 0; j < d+2; j++ ) for( k = 0; k < n+2; k++ ) hist[(i*(d+2) + j)*(n+2) + k] = 0.; } //計算采樣區域點坐標旋轉 for( i = -radius, k = 0; i <= radius; i++ ) for( j = -radius; j <= radius; j++ ) { /* Calculate sample's histogram array coords rotated relative to ori. Subtract 0.5 so samples that fall e.g. in the center of row 1 (i.e. r_rot = 1.5) have full weight placed in row 1 after interpolation. */ float c_rot = j * cos_t - i * sin_t; float r_rot = j * sin_t + i * cos_t; float rbin = r_rot + d/2 - 0.5f; float cbin = c_rot + d/2 - 0.5f; int r = pt.y + i, c = pt.x + j; if( rbin > -1 && rbin < d && cbin > -1 && cbin < d && r > 0 && r < rows - 1 && c > 0 && c < cols - 1 ) { float dx = (float)(img.at<short>(r, c+1) - img.at<short>(r, c-1)); float dy = (float)(img.at<short>(r-1, c) - img.at<short>(r+1, c)); X[k] = dx; Y[k] = dy; RBin[k] = rbin; CBin[k] = cbin; W[k] = (c_rot * c_rot + r_rot * r_rot)*exp_scale; k++; } } len = k; fastAtan2(Y, X, Ori, len, true); magnitude(X, Y, Mag, len); exp(W, W, len); //計算梯度直方圖 for( k = 0; k < len; k++ ) { float rbin = RBin[k], cbin = CBin[k]; float obin = (Ori[k] - ori)*bins_per_rad; float mag = Mag[k]*W[k]; int r0 = cvFloor( rbin ); int c0 = cvFloor( cbin ); int o0 = cvFloor( obin ); rbin -= r0; cbin -= c0; obin -= o0; //n為SIFT_DESCR_HIST_BINS:8,即將360°分為8個區間 if( o0 < 0 ) o0 += n; if( o0 >= n ) o0 -= n; // histogram update using tri-linear interpolation // 雙線性插值 float v_r1 = mag*rbin, v_r0 = mag - v_r1; float v_rc11 = v_r1*cbin, v_rc10 = v_r1 - v_rc11; float v_rc01 = v_r0*cbin, v_rc00 = v_r0 - v_rc01; float v_rco111 = v_rc11*obin, v_rco110 = v_rc11 - v_rco111; float v_rco101 = v_rc10*obin, v_rco100 = v_rc10 - v_rco101; float v_rco011 = v_rc01*obin, v_rco010 = v_rc01 - v_rco011; float v_rco001 = v_rc00*obin, v_rco000 = v_rc00 - v_rco001; int idx = ((r0+1)*(d+2) + c0+1)*(n+2) + o0; hist[idx] += v_rco000; hist[idx+1] += v_rco001; hist[idx+(n+2)] += v_rco010; hist[idx+(n+3)] += v_rco011; hist[idx+(d+2)*(n+2)] += v_rco100; hist[idx+(d+2)*(n+2)+1] += v_rco101; hist[idx+(d+3)*(n+2)] += v_rco110; hist[idx+(d+3)*(n+2)+1] += v_rco111; } // finalize histogram, since the orientation histograms are circular // 最后確定直方圖,目標方向直方圖是圓的 for( i = 0; i < d; i++ ) for( j = 0; j < d; j++ ) { int idx = ((i+1)*(d+2) + (j+1))*(n+2); hist[idx] += hist[idx+n]; hist[idx+1] += hist[idx+n+1]; for( k = 0; k < n; k++ ) dst[(i*d + j)*n + k] = hist[idx+k]; } // copy histogram to the descriptor, // apply hysteresis thresholding // and scale the result, so that it can be easily converted // to byte array float nrm2 = 0; len = d*d*n; for( k = 0; k < len; k++ ) nrm2 += dst[k]*dst[k]; float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR; for( i = 0, nrm2 = 0; i < k; i++ ) { float val = std::min(dst[i], thr); dst[i] = val; nrm2 += val*val; } nrm2 = SIFT_INT_DESCR_FCTR/std::max(std::sqrt(nrm2), FLT_EPSILON); for( k = 0; k < len; k++ ) { dst[k] = saturate_cast<uchar>(dst[k]*nrm2); } }
至此SIFT描述子生成,SIFT算法也基本完成了~參見《SIFT原理與源碼分析》