BoW算法及DBoW2庫簡介(二)


  一、BoW算法

  用OpenCV實現了最簡單的BoW算法進行了一次小規模的圖像檢索任務,使用UKbench數據庫,算法原理和網上的描述差不多,使用K-means算法進行聚類,這里使用KDTree算法進行特征量化,按照自己的理解計算了TF-IDF權重,使用余弦距離計算圖像之間的相似性。下面給出關鍵函數依賴於OpenCV的實現:

如TF-IDF權重的計算,這里只是按照自己的理解實現了算法,有的地方傳參不是很合適,不過不影響效果:

std::vector<double> compute_TF(cv::Mat& descriptors, cv::Mat& labels)
{
    std::vector<double> tf(Num_clu, 0.0);
    for (int i = 0; i < descriptors.rows; i++)
    {
        tf[labels.at<int>(i)] ++;
    }

    for (unsigned int i = 0; i < tf.size(); i++)
    {
        tf[i] /= (float)descriptors.rows;
    }
    return tf;
}

std::vector<double> comput_IDF(std::vector<cv::Mat>& descriptors, std::vector<cv::Mat> &labels)
{
    std::vector<double> idf(Num_clu, 1.0);

    for (unsigned int i = 0; i < descriptors.size(); i++)
    {
        std::vector<int> idf_tmp(Num_clu, 0);
        for (int j = 0; j < descriptors[i].rows; j++)
        {
            idf_tmp[labels[i].at<int>(j)] ++;
        }
        for (unsigned int j = 0; j < idf_tmp.size(); j++)
        {
            if (idf_tmp[j] != 0) idf[j] ++;
        }
    }

    for (unsigned int i = 0; i < idf.size(); i++)
    {
        idf[i] = log(Num_img / idf[i]);
    }

    return idf;
}

  有一點需要注意,這里的IDF應該是只計算一次,而TF則是對每一幅圖像計算一次。

  有了TF-IDF函數的實現就可以計算BoW向量了,首先是計算訓練圖像的BoW向量:

cv::Mat TrainingBowVector(cv::Mat & centers, std::vector<double>& IDF)
{
    cv::SurfFeatureDetector detector;
    cv::SurfDescriptorExtractor extractor;

    char image_name[50];
    std::vector<cv::Mat> descriptor_all;
    descriptor_all.reserve(Num_img);

    //Find the keypoints and compute the descriptors;
    for (int i = 1; i <= Num_img; i++)
    {
        std::cout << "I:" << i << std::endl;
        sprintf_s(image_name, "D:\\DataBase\\UKbench\\TestImage\\%d.jpg", i);
        cv::Mat image = cv::imread(image_name, 0);
        std::vector<cv::KeyPoint> keypoints;
        cv::Mat descriptors;
        detector.detect(image, keypoints);
        std::cout << "Keypoints:" << keypoints.size() << std::endl;
        extractor.compute(image, keypoints, descriptors);
        descriptor_all.push_back(descriptors);
    }

    //Get the training descriptors;
    std::cout << "Get the training descriptors." << std::endl;
    cv::Mat descriptor_train;
    for (int j = 0; j < Num_tra; j++)
        descriptor_train.push_back(descriptor_all[j]);

    cv::Mat labels_k;
    cv::kmeans(descriptor_train, Num_clu, labels_k, cv::TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 100, 0.01)
        , 3, cv::KMEANS_PP_CENTERS, centers);

    const int tk = 1, Emax = INT_MAX;
    cv::KDTree T(centers, false);
    std::vector<cv::Mat> labels(Num_img);
    for (int i = 0; i < Num_img; i++)
    {
        cv::Mat descriptor_img = descriptor_all[i];
        for (int j = 0; j < descriptor_img.rows; j++)
        {
            std::vector<float> desc_vec(descriptor_img.row(j));
            std::vector<int> idx_tmp(tk);
            T.findNearest(desc_vec, tk, Emax, idx_tmp, cv::noArray(), cv::noArray());
            labels[i].push_back(idx_tmp[0]);
        }
    }

    std::cout << "Compute the TF-IDF." << std::endl;
    cv::Mat BowVec;
    //Compute the TF-IDF for each image;
    IDF = comput_IDF(descriptor_all, labels);
    for (int i = 0; i < Num_img; i++)
    {
        std::vector<double> TF = compute_TF(descriptor_all[i], labels[i]);
        cv::Mat BowVec_tmp;
        for (unsigned int j = 0; j < IDF.size(); j++)
        {
            BowVec_tmp.push_back(TF[j] * IDF[j]);
            //BowVec_tmp.push_back(TF[j]);
        }
        BowVec_tmp = BowVec_tmp.t();
        cv::normalize(BowVec_tmp, BowVec_tmp);
        BowVec.push_back(BowVec_tmp);
    }
    return BowVec;
}

  計算測試圖片的BoW向量和上面類似。有了訓練圖像和測試圖像的BoW向量就可以根據余弦距離計算相似度了,最后使用堆排序獲得最相似的圖像ID。

  而Vocabuliary Tree算法的代碼實現和上面的不同點在於碼書的訓練方式。

二、DBoW2庫的使用

  使用DBoW2庫訓練碼書,並根據bow打分完成圖像檢索,根據正向索引完成特征匹配,在ORB里面沒注意到倒排索引加速圖像檢索的部分。

  首先是碼書的訓練(“盜用”代碼:http://www.cnblogs.com/jian-li/p/5666556.html):

#include <iostream>
#include <vector>
#include "Thirdparty/DBoW2/DBoW2/FORB.h"
#include "Thirdparty/DBoW2/DBoW2/TemplatedVocabulary.h"

// OpenCV
#include <opencv2/opencv.hpp>
#include "opencv2/core/core.hpp"
#include <opencv/cv.h>
#include <opencv/highgui.h>
#include <opencv2/nonfree/features2d.hpp>

// ROS
#include <rosbag/bag.h>
#include <rosbag/view.h>
#include <ros/ros.h>
#include <sensor_msgs/Image.h>
#include <boost/foreach.hpp>
#include <cv_bridge/cv_bridge.h>
#include "ORBextractor.h"
#include <dirent.h>
#include <string.h>

using namespace DBoW2;
using namespace DUtils;
using namespace std;
using namespace ORB_SLAM;
// - - - - - --- - - - -- - - - - -

/// ORB Vocabulary
typedef DBoW2::TemplatedVocabulary<DBoW2::FORB::TDescriptor, DBoW2::FORB>
ORBVocabulary;
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

void extractORBFeatures(cv::Mat &image, vector<vector<cv::Mat> > &features, ORBextractor* extractor);
void changeStructureORB( const cv::Mat &descriptor,vector<bool> &mask, vector<cv::Mat> &out);
void isInImage(vector<cv::KeyPoint> &keys, float &cx, float &cy, float &rMin, float &rMax, vector<bool> &mask);
void createVocabularyFile(ORBVocabulary &voc, std::string &fileName, const vector<vector<cv::Mat> > &features);
// ----------------------------------------------------------------------------

int main()
{
    //Extracting ORB features from image folder
    vector<std::string> filenames;
    std::string folder = "/home/saodiseng/FRONTAL/";
    cv::glob(folder, filenames);

    // initialze     ORBextractor
    int nLevels = 5;//6;
    ORBextractor* extractor = new ORBextractor(1000,1.2,nLevels,1,20);
    int nImages = filenames.size();

    vector<vector<cv::Mat > > features;
    features.clear();
    features.reserve(nImages);

    cv::Mat image;
    cout << "> Extracting Features from " << nImages << " images..." << endl;
    for(int i = 0; i < nImages; ++i)
    {
        std::cout << "Processing the " << i <<" image " << std::endl;
        cv::Mat src = cv::imread(filenames[i]);
        imshow("View", src);
        cv::waitKey(1);
        if (!src.empty())
        {
            cv::cvtColor(src, image, CV_RGB2GRAY);
            extractORBFeatures(image, features, extractor);
        }
    }
    cout << "... Extraction done!" << endl;

    // Creating the Vocabulary
    // define vocabulary
    const int k = 10; // branching factor
    const WeightingType weight = TF_IDF;
    const ScoringType score = L1_NORM;
    ORBVocabulary voc(k, nLevels, weight, score);
    std::string vociName = "vociOmni.txt";
    createVocabularyFile(voc, vociName, features);
    cout << "--- THE END ---" << endl;
    
    return 0;
}
// ----------------------------------------------------------------------------

void extractORBFeatures(cv::Mat &image, vector<vector<cv::Mat> > &features, ORBextractor* extractor) {
    vector<cv::KeyPoint> keypoints;
    cv::Mat descriptorORB;
    (*extractor)(image, cv::Mat(), keypoints, descriptorORB);

    // reject features outside region of interest
    vector<bool> mask;
    float cx = 0; float cy = 0;
    float rMin = 0; float rMax = 0;
    isInImage(keypoints, cx, cy, rMin, rMax, mask);
    // create descriptor vector for the vocabulary
    features.push_back(vector<cv::Mat>());
    changeStructureORB(descriptorORB, mask, features.back());
    imshow("ORBFeature", features.back().back());
}

// ----------------------------------------------------------------------------

void changeStructureORB( const cv::Mat &descriptor,vector<bool> &mask, vector<cv::Mat> &out) {
    for (int i = 0; i < descriptor.rows; i++) {
        if(mask[i]) {
            out.push_back(descriptor.row(i));
        }
    }
}

// ----------------------------------------------------------------------------

void isInImage(vector<cv::KeyPoint> &keys, float &cx, float &cy, float &rMin, float &rMax, vector<bool> &mask) {
    int N = keys.size();
    mask = vector<bool>(N, false);
    int num = 0;
    for(int i=0; i<N; i++) {
        cv::KeyPoint kp = keys[i];
        float u = kp.pt.x;
        float v = kp.pt.y;
        if(u>20 && u<320-20 && v>20 && v<240-20)
        {
            mask[i] = true;
            num ++;
        }
    }
    std::cout << "In image number " << num << std::endl;
}

// ----------------------------------------------------------------------------

void createVocabularyFile(ORBVocabulary &voc, std::string &fileName, const vector<vector<cv::Mat> > &features)
{
    cout << "> Creating vocabulary. May take some time ..." << endl;
    voc.create(features);
    cout << "... done!" << endl;
    cout << "> Vocabulary information: " << endl
        << voc << endl << endl;
    // save the vocabulary to disk
    cout << endl << "> Saving vocabulary..." << endl;
    voc.saveToTextFile(fileName);
    cout << "... saved to file: " << fileName << endl;
}

   也可以直接使用ORB給定的碼書。

  再下面就是訓練BoW向量並計算打分:

void FrameRecog::ComputeBoW()
{
    //數據類型轉換;
    vector<cv::Mat>vFrDesc = Converter::toDescriptorVector(Descriptors);
    //BowVec為BoW特征向量,FeatVec為正向索引;
    pORBVocabulary->transform(vFrDesc, BowVec, FeatVec, 4);
}



float score = pORBVocabulary->score(BowVec, vBowVec[i]);

ComputeBoW()函數計算了當前幀的BowVec向量,以及它的第4層正向索引值FeatVec。下面一句即計算了兩個BoW向量的相似性打分。當打分滿足某個閾值之后,還需要通過正向索引值進行特征匹配:

int FrameRecog::FeatMatchByBoW( const int idx )
{
    int nmatches = 0;
    const int TH_LOW = 50;
    const int HISTO_LENGTH = 30;
    const int factor = 1.0f/HISTO_LENGTH;

    const DBoW2::FeatureVector &vFeatVecTD = vFeatVec[idx];
    const DBoW2::FeatureVector &vFeatVecCD = FeatVec; 

    DBoW2::FeatureVector::const_iterator TDit = vFeatVecTD.begin();
    DBoW2::FeatureVector::const_iterator CDit = vFeatVecCD.begin();
    DBoW2::FeatureVector::const_iterator TDend= vFeatVecTD.end();
    DBoW2::FeatureVector::const_iterator CDend= vFeatVecCD.end();

    while( TDit != TDend && CDit != CDend )
    {
               //first為單詞的索引,second則對應為該單詞索引下的ORB特征集合;
        if( TDit->first == CDit->first)
        {
        //second是要循環的對象
            const vector<unsigned int> vIndicesTD = TDit->second;
            const vector<unsigned int> vIndicesCD = CDit->second;
              
        //循環關鍵幀和當前幀對應單詞下的特征集合,計算相似性;
            for ( size_t iTD = 0; iTD < vIndicesTD.size(); iTD ++ )
            {
                const unsigned int realIdxTD = vIndicesTD[iTD];
                const cv::Mat &dTD = vDescriptors[idx].row(realIdxTD);

                int bestDist1 = 256;
                int bestIdxF  = -1;
                int bestDist2 = 256;

                for ( size_t iCD = 0; iCD < vIndicesCD.size(); iCD ++ )
                {
                    const unsigned int realIdxCD = vIndicesCD[iCD];
                    const cv::Mat &dCD = Descriptors.row(realIdxCD);
                    const int dist = DescriptorDistance(dTD, dCD);
             //這里注意是雙閾值;
                    if( dist < bestDist1 )
                    {
                        bestDist2 = bestDist1;
                        bestDist1 = dist;
                        bestIdxF  = realIdxCD;
                    }
                    else if( dist < bestDist2 )
                    {
                        bestDist2 = dist;
                    }
                }

           //這里有兩個輸入參數,一個是TH_LOW,是指兩個特征的最小距離閾值;
           //第二個是0.95,它是指相似特征的最小距離小於第二小距離的百分之九十五;
           //第二個參數的含義是,當該參數越接近於1時,該式越接近於成立,而越小時說明要求越高,
           //即最小距離遠大於第二小距離,所以兩特征是相似特征的概率非常大
                if(bestDist1 <= TH_LOW)
                {
                    if( static_cast<float>(bestDist1)<0.95 * static_cast<float>(bestDist2))
                        nmatches ++;
                }
            }
            TDit ++;
            CDit ++;
        }
        else if( TDit->first < CDit->first )
        {
            TDit = vFeatVecTD.lower_bound(CDit->first);
        }
        else
        {
            CDit = vFeatVecCD.lower_bound(TDit->first);
        }
    }

   //原函數中還有特征對應的3D地圖點的輸出,以及根據ORB特征的主方向進一步判斷特征是否相似的代碼,這里略去;
    return nmatches;
}

 

int FrameRecog::DescriptorDistance(const cv::Mat &a, const cv::Mat &b)
{
    const int *pa = a.ptr<int32_t>();
    const int *pb = b.ptr<int32_t>();

    int dist = 0;
    for ( int i = 0; i < 8; i ++, pa ++, pb ++ )
    {
        unsigned int v = *pa ^ *pb;
        v = v - ((v>>1) & 0x55555555);
        v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
        dist += (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
    }
    return dist;
}

 

上面的源文件在ORBmatches.cc中的

int ORBmatcher::SearchByBoW(KeyFrame* pKF,Frame &F, vector<MapPoint*> &vpMapPointMatches) 函數中。即通過正向索引給出特征匹配數或匹配的特征以及對應的3D點。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM