OpenCV Kmeans


1 K-均值聚類算法的基本思想

    K-均值聚類算法是著名的划分聚類分割方法。划分方法的基本思想是:給定一個有N個元組或者紀錄的數據集,分裂法將構造K個分組,每一個分組就代表一個聚類,K<N。而且這K個分組滿足下列條件:(1) 每一個分組至少包含一個數據紀錄;(2)每一個數據紀錄屬於且僅屬於一個分組;對於給定的K,算法首先給出一個初始的分組方法,以后通過反復迭代的方法改變分組,使得每一次改進之后的分組方案都較前一次好,而所謂好的標准就是:同一分組中的記錄越近越好,而不同分組中的紀錄越遠越好。

K-means算法的工作原理:算法首先隨機從數據集中選取 K個點作為初始聚類中心,然后計算各個樣本到聚類中心的距離,把樣本歸到離它最近的那個聚類中心所在的類。計算新形成的每一個聚類的數據對象的平均值來得到新的聚類中心,如果相鄰兩次的聚類中心沒有任何變化,說明樣本調整結束,聚類准則函數 已經收斂。本算法的一個特點是在每次迭代中都要考察每個樣本的分類是否正確。若不正確,就要調整,在全部樣本調整完后,再修改聚類中心,進入下一次迭代。這個過程將不斷重復直到滿足某個終止條件,終止條件可以是以下任何一個:

(1)沒有對象被重新分配給不同的聚類。 

(2)聚類中心再發生變化。 

(3)誤差平方和局部最小。

   K-means聚類算法的一般步驟:

(1)從 n個數據對象任意選擇 k 個對象作為初始聚類中心;

(2)循環(3)到(4)直到每個聚類不再發生變化為止;

(3)根據每個聚類對象的均值(中心對象),計算每個對象與這些中心對象的距離;並根據最小距離重新對相應對象進行划分;

(4)重新計算每個(有變化)聚類的均值(中心對象),直到聚類中心不再變化。這種划分使得下式最小

                                   

K-均值聚類法的缺點:

(1)在 K-means 算法中 K 是事先給定的,這個 K 值的選定是非常難以估計的。

(2)在 K-means 算法中,首先需要根據初始聚類中心來確定一個初始划分,然后對初始划分進行優化。

(3) K-means算法需要不斷地進行樣本分類調整不斷地計算調整后的新的聚類中心因此當數據量非常大時算法的時間開銷是非常大的。

(4)K-means算法對一些離散點和初始k值敏感,不同的距離初始值對同樣的數據樣本可能得到不同的結果。


2 OpenCV中K均值函數分析:

CV_IMPL int 

cvKMeans2( const CvArr* _samples, intcluster_count, CvArr* _labels,

          CvTermCriteria termcrit, int attempts, CvRNG*,

           intflags, CvArr* _centers, double* _compactness )

 

_samples:輸入樣本的浮點矩陣,每個樣本一行,如對彩色圖像進行聚類,每個通道一行,CV_32FC3

cluster_count:所給定的聚類數目

_labels:輸出整數向量:每個樣本對應的類別標識,其范圍為0- (cluster_count-1),必須滿足以下條件:

cv::Mat data = cv::cvarrToMat(_samples),labels = cv::cvarrToMat(_labels);

    CV_Assert(labels.isContinuous() && labels.type() == CV_32S &&

            (labels.cols == 1 || labels.rows == 1)&&

             labels.cols + labels.rows - 1 ==data.rows );

termcrit:指定聚類的最大迭代次數和/或精度(兩次迭代引起的聚類中心的移動距離),其執行 k-means 算法搜索 cluster_count 個類別的中心並對樣本進行分類,輸出 labels(i) 為樣本i的類別標識。其中CvTermCriteria為OpenCV中的迭代算法的終止准則,其結構如下:

#define CV_TERMCRIT_ITER    1

#define CV_TERMCRIT_NUMBER  CV_TERMCRIT_ITER

#define CV_TERMCRIT_EPS     2

typedef struct CvTermCriteria

{

    int    type; int    max_iter;  double epsilon;

 } CvTermCriteria;

 max_iter:最大迭代次數。  epsilon:結果的精確性 。

attempts:

flags: 與labels和centers相關

_centers: 輸出聚類中心,可以不用設置輸出聚類中心,但如果想輸出聚類中心必須滿足以下條件:

           CV_Assert(!centers.empty() );      

 CV_Assert( centers.rows == cluster_count );

   CV_Assert( centers.cols ==data.cols );       

CV_Assert( centers.depth() == data.depth() );

聚類中心的獲得方式:(以三類為例)

double cent0 = centers->data.fl[0];           

double cent1 = centers->data.fl[1];           

double cent2 = centers->data.fl[2];

CV_IMPL int

cvKMeans2const CvArr_samples,int cluster_count,CvArr_labels,

           CvTermCriteriatermcritintattemptsCvRNG*,

           int flagsCvArr_centersdouble_compactness )

{

    cv::Mat data = cv::cvarrToMat(_samples), labelscv::cvarrToMat(_labels), centers;

    if_centers )

    {

        centerscv::cvarrToMat(_centers);

               // 將centers和data轉換為行向量

        centerscenters.reshape(1);

        datadata.reshape(1);

               // centers必須滿足的條件

        CV_Assert(!centers.empty());

        CV_Assert(centers.rows== cluster_count );

        CV_Assert(centers.cols== data.cols);

        CV_Assert(centers.depth()== data.depth());

}

// labels必須滿足的條件

    CV_Assert(labels.isContinuous()&& labels.type()== CV_32S &&

        (labels.cols == 1 || labels.rows == 1) &&

        labels.cols + labels.rows - 1 == data.rows );

       // 調用kmeans實現聚類,如果定義了輸出聚類中心矩陣,那么輸出centers

    double compactness = cv::kmeans(datacluster_countlabels,termcritattempts,

                                flags_centerscv::_OutputArray(centers) : cv::_OutputArray() );

    if_compactness )

        *_compactnesscompactness;

    return 1;

}

double cv::kmeansInputArray_dataint K,

                   InputOutputArray_bestLabels,

                   TermCriteriacriteriaintattempts,

                   intflagsOutputArray_centers )

{

    const int SPP_TRIALS =3;

Mat data = _data.getMat();

// 判斷data是否為行向量

    bool isrow = data.rows == 1 && data.channels() > 1;

    int N = !isrow ? data.rows : data.cols;

    int dims = (!isrowdata.cols: 1)*data.channels();

    int type = data.depth();

      

    attemptsstd::max(attempts, 1);

    CV_Assert(data.dims<= 2 && type == CV_32F && K> 0 );

    CV_Assert(N >= K);

 

    _bestLabels.create(N, 1, CV_32S, -1, true);

 

Mat _labelsbest_labels = _bestLabels.getMat();

// 使用已初始化的labels

    ifflags & CV_KMEANS_USE_INITIAL_LABELS)

    {

        CV_Assert((best_labels.cols== 1 || best_labels.rows== 1) &&

                  best_labels.cols*best_labels.rows == N&&

                  best_labels.type() == CV_32S&&

                  best_labels.isContinuous());

        best_labels.copyTo(_labels);

    }

    else

    {

        if( !((best_labels.cols== 1 || best_labels.rows== 1) &&

             best_labels.cols*best_labels.rows == N&&

            best_labels.type() == CV_32S&&

            best_labels.isContinuous()))

            best_labels.create(N, 1, CV_32S);

        _labels.create(best_labels.size(), best_labels.type());

    }

    intlabels = _labels.ptr<int>();

 

    Mat centers(Kdimstype), old_centers(Kdimstype), temp(1, dimstype);

    vector<intcounters(K);

    vector<Vec2f_box(dims);

    Vec2fbox = &_box[0];

    double best_compactness = DBL_MAX,compactness = 0;

    RNG&rng = theRNG();

    int aiterijk;

       // 對終止條件進行修改

    ifcriteria.typeTermCriteria::EPS)

        criteria.epsilon = std::max(criteria.epsilon, 0.);

    else

        criteria.epsilon = FLT_EPSILON;

    criteria.epsilon *= criteria.epsilon;

 

    ifcriteria.typeTermCriteria::COUNT)

        criteria.maxCount = std::min(std::max(criteria.maxCount, 2), 100);

    else

        criteria.maxCount = 100;

       // 聚類數目為1類的時候

    ifK == 1 )

    {

        attempts= 1;

        criteria.maxCount = 2;

    }

 

    const floatsample = data.ptr<float>(0);

    forj = 0; j < dimsj++ )

        box[j] = Vec2f(sample[j], sample[j]);

 

    fori = 1; i < Ni++ )

    {

        sampledata.ptr<float>(i);

        forj = 0; j < dimsj++ )

        {

            floatv = sample[j];

            box[j][0] = std::min(box[j][0], v);

            box[j][1] = std::max(box[j][1], v);

        }

    }

 

    fora = 0; a < attemptsa++ )

    {

        double max_center_shift = DBL_MAX;

        foriter = 0;; )

        {

            swap(centersold_centers);

/*enum

{

   KMEANS_RANDOM_CENTERS=0, // Chooses random centers for k-Meansinitialization

   KMEANS_PP_CENTERS=2,     // Usesk-Means++ algorithm for initialization

   KMEANS_USE_INITIAL_LABELS=1 // Uses the user-provided labels for K-Meansinitialization

};*/

 

            if(iter == 0 && (a > 0 || !(flagsKMEANS_USE_INITIAL_LABELS)) )

            {

                if(flags & KMEANS_PP_CENTERS)

                    generateCentersPP(datacentersKrngSPP_TRIALS);

                else

                {

                    for(k = 0; kKk++)

                        generateRandomCenter(_boxcenters.ptr<float>(k), rng);

                }

            }

            else

            {

                if(iter == 0 && a == 0 && (flagsKMEANS_USE_INITIAL_LABELS) )

                {

                    for(i = 0; iNi++)

                        CV_Assert( (unsigned)labels[i] <(unsigned)K);

                }

 

                //compute centers

                centersScalar(0);

                for(k = 0; kKk++)

                    counters[k] = 0;

 

                for(i = 0; iNi++)

                {

                    sampledata.ptr<float>(i);

                    klabels[i];

                    float*center = centers.ptr<float>(k);

                    j=0;

                    #ifCV_ENABLE_UNROLLED

                    for(;j <= dims- 4; j += 4 )

                    {

                        float t0 = center[j] + sample[j];

                        float t1 = center[j+1] + sample[j+1];

 

                        center[j] = t0;

                        center[j+1] = t1;

 

                        t0 = center[j+2] + sample[j+2];

                        t1 = center[j+3] + sample[j+3];

 

                        center[j+2] = t0;

                        center[j+3] = t1;

                    }

                    #endif

                    for(; j < dims;j++ )

                        center[j] += sample[j];

                    counters[k]++;

                }

 

                if(iter > 0 )

                    max_center_shift= 0;

 

                for(k = 0; kKk++)

                {

                    if(counters[k]!= 0 )

                        continue;

 

     // if somecluster appeared to be empty then:

     //   1. find the biggest cluster

     //   2. find the farthest from the center pointin the biggest cluster

     //   3. exclude the farthest point from thebiggest cluster and form a new 1-point cluster.

                    intmax_k = 0;

                    for(int k1 = 1; k1 < Kk1++ )

                    {

                        ifcounters[max_k] < counters[k1] )

                            max_k = k1;

                    }

 

                    doublemax_dist = 0;

                    intfarthest_i = -1;

                    float*new_center = centers.ptr<float>(k);

                    float*old_center = centers.ptr<float>(max_k);

                    float*_old_center = temp.ptr<float>();// normalized

                    floatscale = 1.f/counters[max_k];

                    for(j = 0; jdimsj++)

                        _old_center[j] = old_center[j]*scale;

 

                    for(i = 0; iNi++)

                    {

                        if(labels[i]!= max_k )

                            continue;

                        sample = data.ptr<float>(i);

                        double dist = normL2Sqr_(sample,_old_centerdims);

 

                        ifmax_dist <= dist )

                        {

                            max_dist = dist;

                            farthest_i = i;

                        }

                    }

 

                    counters[max_k]--;

                    counters[k]++;

                    labels[farthest_i] = k;

                    sampledata.ptr<float>(farthest_i);

 

                    for(j = 0; jdimsj++)

                    {

                        old_center[j] -= sample[j];

                        new_center[j] += sample[j];

                    }

                }

 

                for(k = 0; kKk++)

                {

                    float*center = centers.ptr<float>(k);

                    CV_Assert(counters[k]!= 0 );

 

                    floatscale = 1.f/counters[k];

                    for(j = 0; jdimsj++)

                        center[j] *= scale;

 

                    if(iter > 0 )

                    {

                        double dist = 0;

                        const floatold_center = old_centers.ptr<float>(k);

                        forj = 0; j < dimsj++ )

                        {

                            double t = center[j] - old_center[j];

                            dist += t*t;

                        }

                        max_center_shiftstd::max(max_center_shiftdist);

                    }

                }

            }

 

            if(++iter == MAX(criteria.maxCount,2) || max_center_shift <= criteria.epsilon)

                break;

 

            // assignlabels

            Matdists(1, N,CV_64F);

            double*dist = dists.ptr<double>(0);

            parallel_for_(Range(0, N),

                         KMeansDistanceComputer(dist,labelsdata,centers));

            compactness= 0;

            for(i = 0; iNi++)

            {

                compactness+= dist[i];

            }

        }

 

        ifcompactness < best_compactness)

        {

            best_compactnesscompactness;

            if(_centers.needed())

                centers.copyTo(_centers);

            _labels.copyTo(best_labels);

        }

    }

 

    return best_compactness;

}


3 采用cvKMeans2對灰度圖像進行聚類分析

//灰度圖像聚類分析

BOOL GrayImageSegmentByKMeans2(const IplImagepImgIplImage*pResultintsortFlag)

{

       assert(pImg != NULL&& pImg->nChannels== 1);

       //創建樣本矩陣,CV_32FC1代表位浮點通道(灰度圖像)

       CvMat*samples = cvCreateMat((pImg->width)* (pImg->height),1, CV_32FC1);

       //創建類別標記矩陣,CV_32SF1代表位整型通道

       CvMat*clusters = cvCreateMat((pImg->width)* (pImg->height),1, CV_32SC1);

       //創建類別中心矩陣

       CvMat*centers = cvCreateMat(nClusters, 1, CV_32FC1);

       // 將原始圖像轉換到樣本矩陣

       {

              intk = 0;

              CvScalars;

              for(int i = 0; i < pImg->widthi++)

              {

                     for(int j=0;j < pImg->heightj++)

                     {

                            s.val[0] = (float)cvGet2D(pImgji).val[0];

                            cvSet2D(samples,k++, 0, s);

                     }

              }

       }

       //開始聚類,迭代次,終止誤差.0

      cvKMeans2(samplesnClusters,clusterscvTermCriteria(CV_TERMCRIT_ITER + CV_TERMCRIT_EPS,100, 1.0), 1, 0, 0,centers);

       // 無需排序直接輸出時

       if (sortFlag == 0)

       {

              intk = 0;

              intval = 0;

              floatstep = 255 / ((float)nClusters - 1);

              CvScalars;

              for(int i = 0; i < pImg->widthi++)

              {

                     for(int j = 0;j < pImg->heightj++)

                     {

                            val = (int)clusters->data.i[k++];

                            s.val[0] = 255- val * step;//這個是將不同類別取不同的像素值,

                            cvSet2D(pResult,jis);  //將每個像素點賦值

                     }

              }

              returnTRUE;

       }    

4 利用OpenCV對彩色圖像進行顏色聚類:

BOOL ColorImageSegmentByKMeans2(const IplImageimgIplImagepResultint sortFlag)

{

       assert(img != NULL&& pResult != NULL);

       assert(img->nChannels== 3 && pResult->nChannels == 1);

       int i,j;

       CvMat*samples=cvCreateMat((img->width)*(img->height),1,CV_32FC3);//創建樣本矩陣,CV_32FC3代表位浮點通道(彩色圖像)

       CvMat*clusters=cvCreateMat((img->width)*(img->height),1,CV_32SC1);//創建類別標記矩陣,CV_32SF1代表位整型通道

 

       int k=0;

       for (i=0;i<img->width;i++)

       {

              for(j=0;j<img->height;j++)

              {

                     CvScalars;

                     //獲取圖像各個像素點的三通道值(RGB

                     s.val[0]=(float)cvGet2D(img,j,i).val[0];

                     s.val[1]=(float)cvGet2D(img,j,i).val[1];

                     s.val[2]=(float)cvGet2D(img,j,i).val[2];

                     cvSet2D(samples,k++,0,s);//將像素點三通道的值按順序排入樣本矩陣

              }

       }

       cvKMeans2(samples,nClusters,clusters,cvTermCriteria(CV_TERMCRIT_ITER,100,1.0));//開始聚類,迭代次,終止誤差.0

 

       k=0;

       int val=0;

       float step=255/(nClusters-1);

 

       for (i=0;i<img->width;i++)

       {

              for(j=0;j<img->height;j++)

              {

                     val=(int)clusters->data.i[k++];

                     CvScalars;

                     s.val[0]=255-val*step;//這個是將不同類別取不同的像素值,

                     cvSet2D(pResult,j,i,s);        //將每個像素點賦值      

              }

       }

       cvReleaseMat(&samples);

       cvReleaseMat(&clusters);

       return TRUE;

}

轉載:http://blog.csdn.net/xidianzhimeng/article/details/10391603


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM