KNN算法基本實例


  KNN算法是機器學習領域中一個最基本的經典算法。它屬於無監督學習領域的算法並且在模式識別,數據挖掘和特征提取領域有着廣泛的應用。

給定一些預處理數據,通過一個屬性把這些分類坐標分成不同的組。這就是KNN的思路。

  下面,舉個例子來說明一下。圖中的數據點包含兩個特征:

 

  現在,給出數據點的另外一個節點,通過分析訓練節點來把這些節點分類。沒有分來的及誒但我們標記為白色,如下所示:

  直觀來講,如果我們把那些節點花道一個圖片上,我們可能就能確定一些特征,或組。現在,給一個沒有分類的點,我們可以通過觀察它距離那個組位置最近來確定它屬於哪個組。意思就是,假如一個點距離紅色的組最近,我們就可以把這個點歸為紅色的組。簡而言之,我們可以把第一個點(2.5,7)歸類為綠色,把第二個點(5.5,4.5)歸類為紅色。

  算法流程:

  假設m是訓練樣本的數量,p是一個未知的節點。

  1 把所有訓練的樣本放到也數組arr[]中。這個意思就是這個數組中每個元素就可以使用元組(x,y)表示。

  2 偽碼

for i=0 to m:
  Calculate Euclidean distance d(arr[i], p).

  3 標記設置S為K的最小距離。這里每個距離都和一個已經分類的數據點相關。

  4 返回在S之間的大多數標簽。

  實際程序C代碼:

 

// C++ program to find groups of unknown
// Points using K nearest neighbour algorithm.
#include <bits/stdc++.h>
using namespace std;
 
struct Point
{
    int val;     // Group of point
    double x, y;     // Co-ordinate of point
    double distance; // Distance from test point
};
 
// Used to sort an array of points by increasing
// order of distance
bool comparison(Point a, Point b)
{
    return (a.distance < b.distance);
}
 
// This function finds classification of point p using
// k nearest neighbour algorithm. It assumes only two
// groups and returns 0 if p belongs to group 0, else
// 1 (belongs to group 1).
int classifyAPoint(Point arr[], int n, int k, Point p)
{
    // Fill distances of all points from p
    for (int i = 0; i < n; i++)
        arr[i].distance =
            sqrt((arr[i].x - p.x) * (arr[i].x - p.x) +
                 (arr[i].y - p.y) * (arr[i].y - p.y));
 
    // Sort the Points by distance from p
    sort(arr, arr+n, comparison);
 
    // Now consider the first k elements and only
    // two groups
    int freq1 = 0;     // Frequency of group 0
    int freq2 = 0;     // Frequency of group 1
    for (int i = 0; i < k; i++)
    {
        if (arr[i].val == 0)
            freq1++;
        else if (arr[i].val == 1)
            freq2++;
    }
 
    return (freq1 > freq2 ? 0 : 1);
}
 
// Driver code
int main()
{
    int n = 17; // Number of data points
    Point arr[n];
 
    arr[0].x = 1;
    arr[0].y = 12;
    arr[0].val = 0;
 
    arr[1].x = 2;
    arr[1].y = 5;
    arr[1].val = 0;
 
    arr[2].x = 5;
    arr[2].y = 3;
    arr[2].val = 1;
 
    arr[3].x = 3;
    arr[3].y = 2;
    arr[3].val = 1;
 
    arr[4].x = 3;
    arr[4].y = 6;
    arr[4].val = 0;
 
    arr[5].x = 1.5;
    arr[5].y = 9;
    arr[5].val = 1;
 
    arr[6].x = 7;
    arr[6].y = 2;
    arr[6].val = 1;
 
    arr[7].x = 6;
    arr[7].y = 1;
    arr[7].val = 1;
 
    arr[8].x = 3.8;
    arr[8].y = 3;
    arr[8].val = 1;
 
    arr[9].x = 3;
    arr[9].y = 10;
    arr[9].val = 0;
 
    arr[10].x = 5.6;
    arr[10].y = 4;
    arr[10].val = 1;
 
    arr[11].x = 4;
    arr[11].y = 2;
    arr[11].val = 1;
 
    arr[12].x = 3.5;
    arr[12].y = 8;
    arr[12].val = 0;
 
    arr[13].x = 2;
    arr[13].y = 11;
    arr[13].val = 0;
 
    arr[14].x = 2;
    arr[14].y = 5;
    arr[14].val = 1;
 
    arr[15].x = 2;
    arr[15].y = 9;
    arr[15].val = 0;
 
    arr[16].x = 1;
    arr[16].y = 7;
    arr[16].val = 0;
 
    /*Testing Point*/
    Point p;
    p.x = 2.5;
    p.y = 7;
 
    // Parameter to decide groupr of the testing point
    int k = 3;
    printf ("The value classified to unknown point"
            " is %d.\n", classifyAPoint(arr, n, k, p));
    return 0;
}
View Code

 

  實際程序python代碼:

  

 1 # Python3 program to find groups of unknown
 2 # Points using K nearest neighbour algorithm.
 3  
 4 import math
 5  
 6 def classifyAPoint(points,p,k=3):
 7     '''
 8      This function finds classification of p using
 9      k nearest neighbour algorithm. It assumes only two
10      groups and returns 0 if p belongs to group 0, else
11       1 (belongs to group 1).
12  
13       Parameters - 
14           points : Dictionary of training points having two keys - 0 and 1
15                    Each key have a list of training data points belong to that 
16  
17           p : A touple ,test data point of form (x,y)
18  
19           k : number of nearest neighbour to consider, default is 3 
20     '''
21  
22     distance=[]
23     for group in points:
24         for feature in points[group]:
25  
26             #calculate the euclidean distance of p from training points 
27             euclidean_distance = math.sqrt((feature[0]-p[0])**2 +(feature[1]-p[1])**2)
28  
29             # Add a touple of form (distance,group) in the distance list
30             distance.append((euclidean_distance,group))
31  
32     # sort the distance list in ascending order
33     # and select first k distances
34     distance = sorted(distance)[:k]
35  
36     freq1 = 0 #frequency of group 0
37     freq2 = 0 #frequency og group 1
38  
39     for d in distance:
40         if d[1] == 0:
41             freq1 += 1
42         elif d[1] == 1:
43             freq2 += 1
44  
45     return 0 if freq1>freq2 else 1
46  
47 # driver function
48 def main():
49  
50     # Dictionary of training points having two keys - 0 and 1
51     # key 0 have points belong to class 0
52     # key 1 have points belong to class 1
53  
54     points = {0:[(1,12),(2,5),(3,6),(3,10),(3.5,8),(2,11),(2,9),(1,7)],
55               1:[(5,3),(3,2),(1.5,9),(7,2),(6,1),(3.8,1),(5.6,4),(4,2),(2,5)]}
56  
57     # testing point p(x,y)
58     p = (2.5,7)
59  
60     # Number of neighbours 
61     k = 3
62  
63     print("The value classified to unknown point is: {}".\
64           format(classifyAPoint(points,p,k)))
65  
66 if __name__ == '__main__':
67     main()
68      
69 # This code is contributed by Atul Kumar (www.fb.com/atul.kr.007)
View Code

 

  

  


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM