先貼上我對Opencv3.1中sift源碼的注釋吧,雖然還有很多沒看懂。先從detectAndCompute看起
void SIFT_Impl::detectAndCompute(InputArray _image, InputArray _mask, std::vector<KeyPoint>& keypoints, OutputArray _descriptors, bool useProvidedKeypoints) { int firstOctave = -1, actualNOctaves = 0, actualNLayers = 0; Mat image = _image.getMat(), mask = _mask.getMat(); if( image.empty() || image.depth() != CV_8U ) CV_Error( Error::StsBadArg, "image is empty or has incorrect depth (!=CV_8U)" ); if( !mask.empty() && mask.type() != CV_8UC1 ) CV_Error( Error::StsBadArg, "mask has incorrect type (!=CV_8UC1)" ); if( useProvidedKeypoints ) { firstOctave = 0; int maxOctave = INT_MIN; for( size_t i = 0; i < keypoints.size(); i++ ) { int octave, layer; float scale; unpackOctave(keypoints[i], octave, layer, scale); firstOctave = std::min(firstOctave, octave); maxOctave = std::max(maxOctave, octave); actualNLayers = std::max(actualNLayers, layer-2); } firstOctave = std::min(firstOctave, 0); CV_Assert( firstOctave >= -1 && actualNLayers <= nOctaveLayers ); actualNOctaves = maxOctave - firstOctave + 1; } Mat base = createInitialImage(image, firstOctave < 0, (float)sigma); //是否double size再模糊,返回模糊后的float型Mat std::vector<Mat> gpyr, dogpyr; // nOcatves = actualNOctaves 或者 cvRound[log(min(cols, rows)) / log(2) - 2] + 1 -- 如果是500 則大概返回8 int nOctaves = actualNOctaves > 0 ? actualNOctaves : cvRound(std::log( (double)std::min( base.cols, base.rows ) ) / std::log(2.) - 2) - firstOctave; //double t, tf = getTickFrequency(); //t = (double)getTickCount(); buildGaussianPyramid(base, gpyr, nOctaves); //構建尺度金字塔 buildDoGPyramid(gpyr, dogpyr); //構建DOG金字塔 //t = (double)getTickCount() - t; //printf("pyramid construction time: %g\n", t*1000./tf); if( !useProvidedKeypoints ) { //t = (double)getTickCount(); findScaleSpaceExtrema(gpyr, dogpyr, keypoints); //找到極值點,過濾極值點,根據極值點主方向計算keypoints的angle KeyPointsFilter::removeDuplicated( keypoints ); //刪去坐標相同,size相同(layer_id),角度相同的點 if( nfeatures > 0 ) KeyPointsFilter::retainBest(keypoints, nfeatures); //takes keypoints and culls them by the response //t = (double)getTickCount() - t; //printf("keypoint detection time: %g\n", t*1000./tf); if( firstOctave < 0 ) for( size_t i = 0; i < keypoints.size(); i++ ) { KeyPoint& kpt = keypoints[i]; float scale = 1.f/(float)(1 << -firstOctave); //這里具體在做什么也不清楚 kpt.octave = (kpt.octave & ~255) | ((kpt.octave + firstOctave) & 255); kpt.pt *= scale; kpt.size *= scale; } if( !mask.empty() ) KeyPointsFilter::runByPixelsMask( keypoints, mask ); } else { // filter keypoints by mask //KeyPointsFilter::runByPixelsMask( keypoints, mask ); } if( _descriptors.needed() ) //如果要計算描述子 { //t = (double)getTickCount(); int dsize = descriptorSize(); _descriptors.create((int)keypoints.size(), dsize, CV_32F); //初始化內存 Mat descriptors = _descriptors.getMat(); calcDescriptors(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave); //傳入 //t = (double)getTickCount() - t; //printf("descriptor extraction time: %g\n", t*1000./tf); } }
該函數分別調用了
createInitialImage
buildGaussianPyramid
buildDoGPyramid
findScaleSpaceExtrema 找到極值點
adjustLocalExtrema 去掉不好的極值點
calcOrientationHist 計算主方向
KeyPointsFilter::removeDuplicated 刪除重復的點
KeyPointsFilter::retainBest 根據響應保留最好的點
calcDescriptors 計算最終的方向描述符
然后各自的源碼注釋: createInitialImage就是將初始圖片進行兩倍放大然后模糊作為高斯金字塔的底。
//如果是要doubleImageSize就resize一下再模糊,否則直接模糊 static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma ) //image, firstOctave < 0, (float)sigma { Mat gray, gray_fpt; if( img.channels() == 3 || img.channels() == 4 ) cvtColor(img, gray, COLOR_BGR2GRAY); //轉成灰度圖像 else img.copyTo(gray); //拷貝到gray中 //轉成float型進行高斯模糊,但是為什么不轉到0-1 gray.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0); // SIFT_FIXPT_SCALE -> 1 float sig_diff; if( doubleImageSize ) { sig_diff = sqrtf( std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4, 0.01f) ); // SIFT_INIT_SIGMA = 0.5f Mat dbl; resize(gray_fpt, dbl, Size(gray.cols*2, gray.rows*2), 0, 0, INTER_LINEAR); //resize成兩倍 GaussianBlur(dbl, dbl, Size(), sig_diff, sig_diff); //sig_diff=sigma^2-1, 0.01f // can differ but they both must be positive and odd. Or, they can be zero’s and then they are computed from sigma* return dbl; } else { sig_diff = sqrtf( std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA, 0.01f) ); GaussianBlur(gray_fpt, gray_fpt, Size(), sig_diff, sig_diff); return gray_fpt; } }
buildGaussianPyramid 就是構建高斯金字塔,模擬人眼的尺度空間吧,這幾個還比較好理解。
void SIFT_Impl::buildGaussianPyramid( const Mat& base, std::vector<Mat>& pyr, int nOctaves ) const //base, gpyr, nOctaves { std::vector<double> sig(nOctaveLayers + 3); //nOctaveLayers是輸入, +3是為了保證尺度連續 pyr.resize(nOctaves*(nOctaveLayers + 3)); // precompute Gaussian sigmas using the following formula: // \sigma_{total}^2 = \sigma_{i}^2 + \sigma_{i-1}^2 sig[0] = sigma;//sig每層的基礎sigma double k = std::pow( 2., 1. / nOctaveLayers ); //k因子,== 2^(1/s) s is number of layers for( int i = 1; i < nOctaveLayers + 3; i++ ) { double sig_prev = std::pow(k, (double)(i-1))*sigma; // k^(i-1) * sigma 每層的sigma double sig_total = sig_prev*k; //k^i * sigma sig[i] = std::sqrt(sig_total*sig_total - sig_prev*sig_prev); //sqrt(k^2-1) * sig_prev 這里不知道為什么這樣算,和博客中的計算方法不一樣 } for( int o = 0; o < nOctaves; o++ ) { for( int i = 0; i < nOctaveLayers + 3; i++ ) { Mat& dst = pyr[o*(nOctaveLayers + 3) + i]; //取出第o個金字塔的第i層 if( o == 0 && i == 0 ) //如果是第0個金字塔的第0層就直接賦值就好 dst = base; // base of new octave is halved image from end of previous octave else if( i == 0 ) //如果是下一個金字塔的第0層,需要用第nOctaveLayers層進行縮放 { const Mat& src = pyr[(o-1)*(nOctaveLayers + 3) + nOctaveLayers]; resize(src, dst, Size(src.cols/2, src.rows/2), //縮小一倍 0, 0, INTER_NEAREST); } else { const Mat& src = pyr[o*(nOctaveLayers + 3) + i-1]; //平常的層就由其上一層進行模糊得到 GaussianBlur(src, dst, Size(), sig[i], sig[i]); } } } }
buildDoGPyramid,計算DOG
void SIFT_Impl::buildDoGPyramid( const std::vector<Mat>& gpyr, std::vector<Mat>& dogpyr ) const { int nOctaves = (int)gpyr.size()/(nOctaveLayers + 3); //從高斯尺度金字塔中計算出有多少座金字塔 dogpyr.resize( nOctaves*(nOctaveLayers + 2) ); //DOG金字塔的層數是高斯金字塔層數-1 for( int o = 0; o < nOctaves; o++ ) { for( int i = 0; i < nOctaveLayers + 2; i++ ) { const Mat& src1 = gpyr[o*(nOctaveLayers + 3) + i]; //取該層 const Mat& src2 = gpyr[o*(nOctaveLayers + 3) + i + 1]; //取上面一層 Mat& dst = dogpyr[o*(nOctaveLayers + 2) + i]; subtract(src2, src1, dst, noArray(), DataType<sift_wt>::type); //做差 src2-src1 -> dst 上面一層減去下面一層然后存到dst中 } } }
findScaleSpaceExtrema,就是求尺度空間的極大極小點
// // Detects features at extrema in DoG scale space. Bad features are discarded // based on contrast and ratio of principal curvatures. void SIFT_Impl::findScaleSpaceExtrema( const std::vector<Mat>& gauss_pyr, const std::vector<Mat>& dog_pyr, std::vector<KeyPoint>& keypoints ) const { int nOctaves = (int)gauss_pyr.size()/(nOctaveLayers + 3); //獲得金字塔的個數 int threshold = cvFloor(0.5 * contrastThreshold / nOctaveLayers * 255 * SIFT_FIXPT_SCALE); //contrastThreshold由用戶給出 //threshold其實和contrastThreshold成正比和nOctaveLayers成反比 const int n = SIFT_ORI_HIST_BINS; //SIFT_ORI_HIST_BINS = 36; 方向的bin的個數 float hist[n]; //直方圖 KeyPoint kpt; //關鍵點 keypoints.clear(); //清除原本的keypoints, //用clear方法並沒有釋放內存,詳情見:http://blog.jobbole.com/37700/ //可以考慮vector<KeyPoint>(keypoints).swap(keypoints); for( int o = 0; o < nOctaves; o++ ) //從第0個金字塔開始 for( int i = 1; i <= nOctaveLayers; i++ ) //這個范圍需要舉個例子就明白了5+3層的G層就有5+2的DOG層,然后去頭去尾從第1到第6層,第6層的標號就是nOctaveLayers { int idx = o*(nOctaveLayers+2)+i; const Mat& img = dog_pyr[idx]; //取出中間一層 const Mat& prev = dog_pyr[idx-1]; //取出下面一層 const Mat& next = dog_pyr[idx+1]; //取出上面一層 int step = (int)img.step1(); int rows = img.rows, cols = img.cols; //SIFT_IMG_BORDER = 5; 這個僅僅只是為了防止越界 //從5開始到rows-5,防止越界 for( int r = SIFT_IMG_BORDER; r < rows-SIFT_IMG_BORDER; r++) { const sift_wt* currptr = img.ptr<sift_wt>(r); //獲取img的第r行的float指針 const sift_wt* prevptr = prev.ptr<sift_wt>(r); const sift_wt* nextptr = next.ptr<sift_wt>(r); for( int c = SIFT_IMG_BORDER; c < cols-SIFT_IMG_BORDER; c++) { sift_wt val = currptr[c]; //取得img的 第r行第c列的像素值 // find local extrema with pixel accuracy //極大極小值都需要,是在上層下層和周圍3*3的像素正方體下判斷極值 if( std::abs(val) > threshold && //絕對值大於閾值要響應強烈 ((val > 0 && val >= currptr[c-1] && val >= currptr[c+1] && val >= currptr[c-step-1] && val >= currptr[c-step] && val >= currptr[c-step+1] && val >= currptr[c+step-1] && val >= currptr[c+step] && val >= currptr[c+step+1] && val >= nextptr[c] && val >= nextptr[c-1] && val >= nextptr[c+1] && val >= nextptr[c-step-1] && val >= nextptr[c-step] && val >= nextptr[c-step+1] && val >= nextptr[c+step-1] && val >= nextptr[c+step] && val >= nextptr[c+step+1] && val >= prevptr[c] && val >= prevptr[c-1] && val >= prevptr[c+1] && val >= prevptr[c-step-1] && val >= prevptr[c-step] && val >= prevptr[c-step+1] && val >= prevptr[c+step-1] && val >= prevptr[c+step] && val >= prevptr[c+step+1]) || (val < 0 && val <= currptr[c-1] && val <= currptr[c+1] && val <= currptr[c-step-1] && val <= currptr[c-step] && val <= currptr[c-step+1] && val <= currptr[c+step-1] && val <= currptr[c+step] && val <= currptr[c+step+1] && val <= nextptr[c] && val <= nextptr[c-1] && val <= nextptr[c+1] && val <= nextptr[c-step-1] && val <= nextptr[c-step] && val <= nextptr[c-step+1] && val <= nextptr[c+step-1] && val <= nextptr[c+step] && val <= nextptr[c+step+1] && val <= prevptr[c] && val <= prevptr[c-1] && val <= prevptr[c+1] && val <= prevptr[c-step-1] && val <= prevptr[c-step] && val <= prevptr[c-step+1] && val <= prevptr[c+step-1] && val <= prevptr[c+step] && val <= prevptr[c+step+1]))) { int r1 = r, c1 = c, layer = i; //取得r行號,c列號,layer在DOG的層號 if( !adjustLocalExtrema(dog_pyr, kpt, o, layer, r1, c1, //過濾不好的極值點 nOctaveLayers, (float)contrastThreshold, (float)edgeThreshold, (float)sigma) ) continue; //kpt.size = sigma*powf(2.f, (layer + xi) / nOctaveLayers)*(1 << octv)*2; 后面這兩項(1 << octv)*2不是和0.5f/(1 << o)低效了嗎 float scl_octv = kpt.size*0.5f/(1 << o); //o越大就要縮小1/2^o倍,由於金字塔之間是有resize的 float omax = calcOrientationHist(gauss_pyr[o*(nOctaveLayers+3) + layer], Point(c1, r1), //對於極值點求方向直方圖 cvRound(SIFT_ORI_RADIUS * scl_octv), //SIFT_ORI_RADIUS = 3 * SIFT_ORI_SIG_FCTR; SIFT_ORI_SIG_FCTR * scl_octv, // SIFT_ORI_SIG_FCTR = 1.5f; hist, n); float mag_thr = (float)(omax * SIFT_ORI_PEAK_RATIO); // SIFT_ORI_PEAK_RATIO = 0.8f; for( int j = 0; j < n; j++ ) { int l = j > 0 ? j - 1 : n - 1; int r2 = j < n-1 ? j + 1 : 0; if( hist[j] > hist[l] && hist[j] > hist[r2] && hist[j] >= mag_thr ) //比較該bin和周邊兩個bin的值大小,並且要大於等於最大方向幅值的0.8倍 { //這里的插值求極值的公式,估計是通過Newton等距插值然后求導得到,我算了一早上沒算出來,但是應該是這樣推導沒錯 //Adds features to an array for every orientation in a histogram greater than a specified threshold. float bin = j + 0.5f * (hist[l]-hist[r2]) / (hist[l] - 2*hist[j] + hist[r2]); //Interpolates a histogram peak from left, center, and right values bin = bin < 0 ? n + bin : bin >= n ? bin - n : bin; //控制在[0,n]當中 kpt.angle = 360.f - (float)((360.f/n) * bin); if(std::abs(kpt.angle - 360.f) < FLT_EPSILON) //FLT_EPSILON是浮點數非常小的一個數1.19209290E-07F kpt.angle = 0.f; keypoints.push_back(kpt); } } } } } } }
adjustLocalExtrema,使用泰勒展開然后求導得到極值點的逼近算法,不斷逼近可能的極值點,如果所走的步長小於0.5,在離散的條件下就是該點為極值點。並且計算極值點的幅值並且要求該極值要足夠的大,然后還有主曲率用Hassian矩陣來判斷就像Harris角點檢測一樣,取出邊界響應的極值點。最后還對keypoint的坐標等進行編碼(完全看不懂),可以使用unpackOctave進行解碼。
// // Interpolates a scale-space extremum's location and scale to subpixel // accuracy to form an image feature. Rejects features with low contrast. // Based on Section 4 of Lowe's paper. static bool adjustLocalExtrema( const std::vector<Mat>& dog_pyr, KeyPoint& kpt, int octv, //octv,layer,r,c顯示了關鍵點的位置 int& layer, int& r, int& c, int nOctaveLayers, //還有四組參數nOctaveLayers,contrastThreshold,edgeThreshold,sigma float contrastThreshold, float edgeThreshold, float sigma ) { const float img_scale = 1.f/(255*SIFT_FIXPT_SCALE); //pixel的scale因子 const float deriv_scale = img_scale*0.5f; const float second_deriv_scale = img_scale; const float cross_deriv_scale = img_scale*0.25f; float xi=0, xr=0, xc=0, contr=0; int i = 0; for( ; i < SIFT_MAX_INTERP_STEPS; i++ ) // maximum steps of keypoint interpolation before failure { int idx = octv*(nOctaveLayers+2) + layer; //找到該層 const Mat& img = dog_pyr[idx];//分別獲得該層和上下兩層 const Mat& prev = dog_pyr[idx-1]; const Mat& next = dog_pyr[idx+1]; Vec3f dD((img.at<sift_wt>(r, c+1) - img.at<sift_wt>(r, c-1))*deriv_scale, //列上的差分/2就是一階導數,然后還要歸一化到0-1 (img.at<sift_wt>(r+1, c) - img.at<sift_wt>(r-1, c))*deriv_scale, //行上的差分 (next.at<sift_wt>(r, c) - prev.at<sift_wt>(r, c))*deriv_scale); //尺度上的差分 float v2 = (float)img.at<sift_wt>(r, c)*2; float dxx = (img.at<sift_wt>(r, c+1) + img.at<sift_wt>(r, c-1) - v2)*second_deriv_scale; //求導 float dyy = (img.at<sift_wt>(r+1, c) + img.at<sift_wt>(r-1, c) - v2)*second_deriv_scale; float dss = (next.at<sift_wt>(r, c) + prev.at<sift_wt>(r, c) - v2)*second_deriv_scale; float dxy = (img.at<sift_wt>(r+1, c+1) - img.at<sift_wt>(r+1, c-1) - img.at<sift_wt>(r-1, c+1) + img.at<sift_wt>(r-1, c-1))*cross_deriv_scale; float dxs = (next.at<sift_wt>(r, c+1) - next.at<sift_wt>(r, c-1) - prev.at<sift_wt>(r, c+1) + prev.at<sift_wt>(r, c-1))*cross_deriv_scale; float dys = (next.at<sift_wt>(r+1, c) - next.at<sift_wt>(r-1, c) - prev.at<sift_wt>(r+1, c) + prev.at<sift_wt>(r-1, c))*cross_deriv_scale; Matx33f H(dxx, dxy, dxs, dxy, dyy, dys, dxs, dys, dss); Vec3f X = H.solve(dD, DECOMP_LU); //LU分解 xi = -X[2]; xr = -X[1]; xc = -X[0]; if( std::abs(xi) < 0.5f && std::abs(xr) < 0.5f && std::abs(xc) < 0.5f ) //發現只能移動小於1/2,那其實極值點就是該點,在離散情況下就不需要移動了 break; if( std::abs(xi) > (float)(INT_MAX/3) || //如果太過於大了就直接拋棄 std::abs(xr) > (float)(INT_MAX/3) || //什么情況會發生這個,我也不太清楚 std::abs(xc) > (float)(INT_MAX/3) ) //這個問題需要看數值分析 return false; c += cvRound(xc); r += cvRound(xr); layer += cvRound(xi); if( layer < 1 || layer > nOctaveLayers || c < SIFT_IMG_BORDER || c >= img.cols - SIFT_IMG_BORDER || r < SIFT_IMG_BORDER || r >= img.rows - SIFT_IMG_BORDER ) //這個條件根本不可能發生。是為了魯棒吧 return false; } // ensure convergence of interpolation if( i >= SIFT_MAX_INTERP_STEPS ) //如果移動次數太多也拋棄,代表其實可能是局部像素抖動使得恰好其滿足了極值條件 return false; { int idx = octv*(nOctaveLayers+2) + layer; const Mat& img = dog_pyr[idx]; const Mat& prev = dog_pyr[idx-1]; const Mat& next = dog_pyr[idx+1]; Matx31f dD((img.at<sift_wt>(r, c+1) - img.at<sift_wt>(r, c-1))*deriv_scale, (img.at<sift_wt>(r+1, c) - img.at<sift_wt>(r-1, c))*deriv_scale, (next.at<sift_wt>(r, c) - prev.at<sift_wt>(r, c))*deriv_scale); float t = dD.dot(Matx31f(xc, xr, xi)); contr = img.at<sift_wt>(r, c)*img_scale + t * 0.5f; if( std::abs( contr ) * nOctaveLayers < contrastThreshold ) //判斷新的極值點的極值要大 return false; // principal curvatures are computed using the trace and det of Hessian float v2 = img.at<sift_wt>(r, c)*2.f; //類似Harris角點檢測去除線條的極值點和 float dxx = (img.at<sift_wt>(r, c+1) + img.at<sift_wt>(r, c-1) - v2)*second_deriv_scale; float dyy = (img.at<sift_wt>(r+1, c) + img.at<sift_wt>(r-1, c) - v2)*second_deriv_scale; float dxy = (img.at<sift_wt>(r+1, c+1) - img.at<sift_wt>(r+1, c-1) - img.at<sift_wt>(r-1, c+1) + img.at<sift_wt>(r-1, c-1)) * cross_deriv_scale; float tr = dxx + dyy; float det = dxx * dyy - dxy * dxy; if( det <= 0 || tr*tr*edgeThreshold >= (edgeThreshold + 1)*(edgeThreshold + 1)*det ) return false; } kpt.pt.x = (c + xc) * (1 << octv); //不明白, (c+xc) * (2^octv) 這里編碼了,估計是使用unpackOctave進行解碼 kpt.pt.y = (r + xr) * (1 << octv); kpt.octave = octv + (layer << 8) + (cvRound((xi + 0.5)*255) << 16); kpt.size = sigma*powf(2.f, (layer + xi) / nOctaveLayers)*(1 << octv)*2; kpt.response = std::abs(contr); //響應就是泰勒展開后求導后的帶入的極值 return true; }
calcOrientationHist 計算極值點的主方向,在findScaleSpaceExtrema中最后有個細節就是它還保存一些方向較好的極值點,結果就是同一個極值點坐標可能有兩個angle
/* @img: 極值點所處的高斯金字塔的第layer層的圖像 @pt: 極值點的列號和行號 @radius: radius -> SIFT_ORI_RADIUS*scl_octv -> SIFT_ORI_RADIUS*pt.size -> SIFT_ORI_RADIUS*sigma*powf(2.f, (layer + xi) / nOctaveLayers) @sigma: SIFT_ORI_SIG_FCTR * scl_octv, // SIFT_ORI_SIG_FCTR = 1.5f; @hist: 直方圖 float hist[n]; @n: 直方圖bin的個數 */ // Computes a gradient orientation histogram at a specified pixel static float calcOrientationHist( const Mat& img, Point pt, int radius, float sigma, float* hist, int n ) { int i, j, k, len = (radius*2+1)*(radius*2+1); //len直徑+1變奇數 float expf_scale = -1.f/(2.f * sigma * sigma); //與sigma^2成反比的一個東西 AutoBuffer<float> buf(len*4 + n+4); //n個bin, float *X = buf, *Y = X + len, *Mag = X, *Ori = Y + len, *W = Ori + len; //X指向0, Y指向len, Mag指向X, Ori指向2len, W指向3len float* temphist = W + len + 2; //temphist指向4len+2, 后面4len 和 4len+1有用處, 以及4len+n 和 4len+n+1也有用處, 用來平滑 for( i = 0; i < n; i++ ) //初始化為0 temphist[i] = 0.f; for( i = -radius, k = 0; i <= radius; i++ ) //在特征點pt.x,pt.y周圍按radius取一塊正方塊 { int y = pt.y + i; //取該正方塊左上的點 if( y <= 0 || y >= img.rows - 1 ) //如果越界就該塊的下一個像素 continue; for( j = -radius; j <= radius; j++ ) { int x = pt.x + j; if( x <= 0 || x >= img.cols - 1 ) //如果越界就下一個 continue; float dx = (float)(img.at<sift_wt>(y, x+1) - img.at<sift_wt>(y, x-1)); //計算該塊所取出的像素點的x方向梯度 float dy = (float)(img.at<sift_wt>(y-1, x) - img.at<sift_wt>(y+1, x)); //y方向梯度 X[k] = dx; Y[k] = dy; W[k] = (i*i + j*j)*expf_scale; //X存該像素的dx, Y存dy, W存距離與極值點的距離的平方的expf_scale因子倍 k++; //記錄取出了多少塊,其實就是沒有越界的像素個數 } } len = k; // compute gradient values, orientations and the weights over the pixel neighborhood cv::hal::exp32f(W, W, len); //計算exp^W存到W中 cv::hal::fastAtan2(Y, X, Ori, len, true); //計算arctan(y/x)存到Ori中, true是以角度返回, false以弧度返回, 范圍是0-360 cv::hal::magnitude32f(X, Y, Mag, len); //計算幅值 存入到 Mag中 for( k = 0; k < len; k++ ) { int bin = cvRound((n/360.f)*Ori[k]); //計算所屬的bin號 if( bin >= n ) bin -= n; if( bin < 0 ) bin += n; temphist[bin] += W[k]*Mag[k]; //根據距離進行加權 } // smooth the histogram temphist[-1] = temphist[n-1]; temphist[-2] = temphist[n-2]; temphist[n] = temphist[0]; temphist[n+1] = temphist[1]; for( i = 0; i < n; i++ ) { hist[i] = (temphist[i-2] + temphist[i+2])*(1.f/16.f) + //其實就是加權平滑,用左右2個距離的直方圖幅值進行平滑 (temphist[i-1] + temphist[i+1])*(4.f/16.f) + temphist[i]*(6.f/16.f); } float maxval = hist[0]; for( i = 1; i < n; i++ ) maxval = std::max(maxval, hist[i]); //返回最大的方向的直方圖幅值 return maxval; }
KeyPointsFilter::removeDuplicated( keypoints ); //刪去坐標相同,size相同(layer_id),角度相同的點,可以去看源碼
KeyPointsFilter::retainBest(keypoints, nfeatures); //takes keypoints and culls them by the response
https://github.com/opencv/opencv/blob/master/modules/features2d/src/keypoint.cpp
calcDescriptors 就是計算極值點的梯度方向描述符
static void calcDescriptors(const std::vector<Mat>& gpyr, const std::vector<KeyPoint>& keypoints, Mat& descriptors, int nOctaveLayers, int firstOctave ) { int d = SIFT_DESCR_WIDTH, n = SIFT_DESCR_HIST_BINS; for( size_t i = 0; i < keypoints.size(); i++ ) { KeyPoint kpt = keypoints[i]; int octave, layer; float scale; unpackOctave(kpt, octave, layer, scale); CV_Assert(octave >= firstOctave && layer <= nOctaveLayers+2); float size=kpt.size*scale; Point2f ptf(kpt.pt.x*scale, kpt.pt.y*scale); //用unpackOctave進行解碼,恢復了x,y坐標 const Mat& img = gpyr[(octave - firstOctave)*(nOctaveLayers + 3) + layer]; float angle = 360.f - kpt.angle; if(std::abs(angle - 360.f) < FLT_EPSILON) angle = 0.f; calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors.ptr<float>((int)i)); } }
calcSIFTDescriptor 很難懂,尤其是在旋轉到主方向那里,還有插值那里。最后那里有個細節就是這個描述符輸出的是一連串uchar整數,(應該是可以直接使用的吧,畢竟同一個計算算法而且沒有隨機的因素在里面所得到的描述符肯定可以進行比較,待驗證),原因到時候可以參考我前面的一個SIFT博客,主要是為了節省空間。
static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float scl, int d, int n, float* dst ) { Point pt(cvRound(ptf.x), cvRound(ptf.y)); float cos_t = cosf(ori*(float)(CV_PI/180)); float sin_t = sinf(ori*(float)(CV_PI/180)); float bins_per_rad = n / 360.f; float exp_scale = -1.f/(d * d * 0.5f); float hist_width = SIFT_DESCR_SCL_FCTR * scl; // determines the size of a single descriptor orientation histogram SIFT_DESCR_SCL_FCTR = 3.f; int radius = cvRound(hist_width * 1.4142135623730951f * (d + 1) * 0.5f); // Clip the radius to the diagonal of the image to avoid autobuffer too large exception radius = std::min(radius, (int) sqrt((double) img.cols*img.cols + img.rows*img.rows)); cos_t /= hist_width; sin_t /= hist_width; int i, j, k, len = (radius*2+1)*(radius*2+1), histlen = (d+2)*(d+2)*(n+2); //d==4 , n==8 int rows = img.rows, cols = img.cols; AutoBuffer<float> buf(len*6 + histlen); float *X = buf, *Y = X + len, *Mag = Y, *Ori = Mag + len, *W = Ori + len; float *RBin = W + len, *CBin = RBin + len, *hist = CBin + len; for( i = 0; i < d+2; i++ ) { for( j = 0; j < d+2; j++ ) for( k = 0; k < n+2; k++ ) hist[(i*(d+2) + j)*(n+2) + k] = 0.; } for( i = -radius, k = 0; i <= radius; i++ ) for( j = -radius; j <= radius; j++ ) { // Calculate sample's histogram array coords rotated relative to ori. // Subtract 0.5 so samples that fall e.g. in the center of row 1 (i.e. // r_rot = 1.5) have full weight placed in row 1 after interpolation. float c_rot = j * cos_t - i * sin_t; //乘以旋轉矩陣 並且還放縮了 hist_width?? float r_rot = j * sin_t + i * cos_t; float rbin = r_rot + d/2 - 0.5f; //沒看懂 float cbin = c_rot + d/2 - 0.5f; int r = pt.y + i, c = pt.x + j; //j是x方向,i是y方向 if( rbin > -1 && rbin < d && cbin > -1 && cbin < d && r > 0 && r < rows - 1 && c > 0 && c < cols - 1 ) { float dx = (float)(img.at<sift_wt>(r, c+1) - img.at<sift_wt>(r, c-1)); //就算旋轉后,不影響計算梯度 float dy = (float)(img.at<sift_wt>(r-1, c) - img.at<sift_wt>(r+1, c)); X[k] = dx; Y[k] = dy; RBin[k] = rbin; CBin[k] = cbin; W[k] = (c_rot * c_rot + r_rot * r_rot)*exp_scale; k++; } } len = k; cv::hal::fastAtan2(Y, X, Ori, len, true); //計算梯度方向 cv::hal::magnitude32f(X, Y, Mag, len); //計算幅值 cv::hal::exp32f(W, W, len); //計算權重 for( k = 0; k < len; k++ ) { float rbin = RBin[k], cbin = CBin[k]; float obin = (Ori[k] - ori)*bins_per_rad; float mag = Mag[k]*W[k]; int r0 = cvFloor( rbin ); int c0 = cvFloor( cbin ); int o0 = cvFloor( obin ); rbin -= r0; cbin -= c0; obin -= o0; if( o0 < 0 ) o0 += n; if( o0 >= n ) o0 -= n; // histogram update using tri-linear interpolation float v_r1 = mag*rbin, v_r0 = mag - v_r1; //沒看懂, tri-linear float v_rc11 = v_r1*cbin, v_rc10 = v_r1 - v_rc11; float v_rc01 = v_r0*cbin, v_rc00 = v_r0 - v_rc01; float v_rco111 = v_rc11*obin, v_rco110 = v_rc11 - v_rco111; float v_rco101 = v_rc10*obin, v_rco100 = v_rc10 - v_rco101; float v_rco011 = v_rc01*obin, v_rco010 = v_rc01 - v_rco011; float v_rco001 = v_rc00*obin, v_rco000 = v_rc00 - v_rco001; int idx = ((r0+1)*(d+2) + c0+1)*(n+2) + o0; hist[idx] += v_rco000; hist[idx+1] += v_rco001; hist[idx+(n+2)] += v_rco010; hist[idx+(n+3)] += v_rco011; hist[idx+(d+2)*(n+2)] += v_rco100; hist[idx+(d+2)*(n+2)+1] += v_rco101; hist[idx+(d+3)*(n+2)] += v_rco110; hist[idx+(d+3)*(n+2)+1] += v_rco111; } // finalize histogram, since the orientation histograms are circular for( i = 0; i < d; i++ ) for( j = 0; j < d; j++ ) { int idx = ((i+1)*(d+2) + (j+1))*(n+2); hist[idx] += hist[idx+n]; hist[idx+1] += hist[idx+n+1]; for( k = 0; k < n; k++ ) dst[(i*d + j)*n + k] = hist[idx+k]; } // copy histogram to the descriptor, // apply hysteresis thresholding // and scale the result, so that it can be easily converted // to byte array float nrm2 = 0; len = d*d*n; for( k = 0; k < len; k++ ) nrm2 += dst[k]*dst[k]; float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR; for( i = 0, nrm2 = 0; i < k; i++ ) { float val = std::min(dst[i], thr); dst[i] = val; nrm2 += val*val; } nrm2 = SIFT_INT_DESCR_FCTR/std::max(std::sqrt(nrm2), FLT_EPSILON); #if 1 for( k = 0; k < len; k++ ) { dst[k] = saturate_cast<uchar>(dst[k]*nrm2); } #else float nrm1 = 0; for( k = 0; k < len; k++ ) { dst[k] *= nrm2; nrm1 += dst[k]; } nrm1 = 1.f/std::max(nrm1, FLT_EPSILON); for( k = 0; k < len; k++ ) { dst[k] = std::sqrt(dst[k] * nrm1);//saturate_cast<uchar>(std::sqrt(dst[k] * nrm1)*SIFT_INT_DESCR_FCTR); } #endif }