關於KNN的python3實現

本文轉載自查看原文 2016-11-06 21:17 3011 python3/ 數據分析

　　關於KNN，有幸看到這篇文章，寫的很好，這里就不在贅述。直接貼上代碼了，有小的改動。（原來是python2版本的，這里改為python3的，主要就是print）

　　環境：win7 32bit + spyder + anaconda3.5

　　一、初階

# -*- coding: utf-8 -*-
"""
Created on Sun Nov  6 16:09:00 2016

@author: Administrator
"""

#Input:
#	newInput:待測的數據點(1xM)
#	dataSet:已知的數據(NxM)
#	labels:已知數據的標簽(1xM)
#	k:選取的最鄰近數據點的個數
#
#Output:
#	待測數據點的分類標簽
#	

from numpy import *

# creat a dataset which contain 4 samples with 2 class
def createDataSet():
	# creat a matrix: each row as a sample
	group = array([[1.0, 0.9], [1.0, 1.0], [0.1, 0.2], [0.0, 0.1]])
	labels = ['A', 'A', 'B', 'B']
	return group, labels
	

#classify using KNN
def KNNClassify(newInput, dataSet, labels, k):
	numSamples = dataSet.shape[0]  # row number
	# step1:calculate Euclidean distance
	# tile(A, reps):Constract an array by repeating A reps times
	diff = tile(newInput, (numSamples, 1)) - dataSet
	squreDiff = diff**2
	squreDist = sum(squreDiff, axis=1)  # sum if performed by row
	distance = squreDist ** 0.5
	
	#step2:sort the distance
	# argsort() returns the indices that would sort an array in a ascending order  
	sortedDistIndices = argsort(distance)
	
	classCount = {}
	for i in range(k):
		# choose the min k distance
		voteLabel = labels[sortedDistIndices[i]]
		
		#step4:count the times labels occur
		# when the key voteLabel is not in dictionary classCount, 
		# get() will return 0
		classCount[voteLabel] = classCount.get(voteLabel, 0) + 1
	#step5:the max vote class will return
	maxCount = 0
	for k, v in classCount.items():
		if v > maxCount:
			maxCount = v
			maxIndex = k
	
	return maxIndex

		
# test

dataSet, labels = createDataSet()

testX = array([1.2, 1.0])
k = 3
outputLabel = KNNClassify(testX, dataSet, labels, 3)

print("Your input is:", testX, "and classified to class: ", outputLabel)


testX = array([0.1, 0.3])
k = 3
outputLabel = KNNClassify(testX, dataSet, labels, 3)

print("Your input is:", testX, "and classified to class: ", outputLabel)

　　運行結果：

　　二、進階

　　用到的手寫識別數據庫資料在這里下載。關於資料的介紹在上面的博文也已經介紹的很清楚了。

# -*- coding: utf-8 -*-
"""
Created on Sun Nov  6 16:09:00 2016

@author: Administrator
"""

#Input:
#	newInput:待測的數據點(1xM)
#	dataSet:已知的數據(NxM)
#	labels:已知數據的標簽(1xM)
#	k:選取的最鄰近數據點的個數
#
#Output:
#	待測數據點的分類標簽
#	

from numpy import *



#classify using KNN
def KNNClassify(newInput, dataSet, labels, k):
	numSamples = dataSet.shape[0]  # row number
	# step1:calculate Euclidean distance
	# tile(A, reps):Constract an array by repeating A reps times
	diff = tile(newInput, (numSamples, 1)) - dataSet
	squreDiff = diff**2
	squreDist = sum(squreDiff, axis=1)  # sum if performed by row
	distance = squreDist ** 0.5
	
	#step2:sort the distance
	# argsort() returns the indices that would sort an array in a ascending order  
	sortedDistIndices = argsort(distance)
	
	classCount = {}
	for i in range(k):
		# choose the min k distance
		voteLabel = labels[sortedDistIndices[i]]
		
		#step4:count the times labels occur
		# when the key voteLabel is not in dictionary classCount, 
		# get() will return 0
		classCount[voteLabel] = classCount.get(voteLabel, 0) + 1
	#step5:the max vote class will return
	maxCount = 0
	for k, v in classCount.items():
		if v > maxCount:
			maxCount = v
			maxIndex = k
	
	return maxIndex

		

# convert image to vector  
def  img2vector(filename):  
    rows = 32  
    cols = 32  
    imgVector = zeros((1, rows * cols))   
    fileIn = open(filename)  
    for row in range(rows):  
        lineStr = fileIn.readline()  
        for col in range(cols):  
            imgVector[0, row * 32 + col] = int(lineStr[col])  
  
    return imgVector


# load dataSet  
def loadDataSet():  
    ## step 1: Getting training set  
    print("---Getting training set...") 
    dataSetDir = 'F:\\Techonolgoy\\算法學習\\KNN\\進階\\'  
    trainingFileList = os.listdir(dataSetDir + 'trainingDigits') # load the training set  
    numSamples = len(trainingFileList)  
  
    train_x = zeros((numSamples, 1024))  
    train_y = []  
    for i in range(numSamples):  
        filename = trainingFileList[i]  
  
        # get train_x  
        train_x[i, :] = img2vector(dataSetDir + 'trainingDigits/%s' % filename)   
  
        # get label from file name such as "1_18.txt"  
        label = int(filename.split('_')[0]) # return 1  
        train_y.append(label)  
  
    ## step 2: Getting testing set  
    print("---Getting testing set...")  
    testingFileList = os.listdir(dataSetDir + 'testDigits') # load the testing set  
    numSamples = len(testingFileList)  
    test_x = zeros((numSamples, 1024))  
    test_y = []  
    for i in range(numSamples):  
        filename = testingFileList[i]  
  
        # get train_x  
        test_x[i, :] = img2vector(dataSetDir + 'testDigits/%s' % filename)   
  
        # get label from file name such as "1_18.txt"  
        label = int(filename.split('_')[0]) # return 1  
        test_y.append(label)  
  
    return train_x, train_y, test_x, test_y  
  
# test hand writing class  
def testHandWritingClass():  
    ## step 1: load data  
    print("step 1: load data...") 
    train_x, train_y, test_x, test_y = loadDataSet()  
  
    ## step 2: training...  
    print("step 2: training...")  
    pass  
  
    ## step 3: testing  
    print("step 3: testing...")  
    numTestSamples = test_x.shape[0]  
    matchCount = 0  
    for i in range(numTestSamples):  
        predict = KNNClassify(test_x[i], train_x, train_y, 3)  
        if predict == test_y[i]:  
            matchCount += 1  
    accuracy = float(matchCount) / numTestSamples  
  
    ## step 4: show the result  
    print("step 4: show the result...")  
    print('The classify accuracy is: %.2f%%' % (accuracy * 100)) 



testHandWritingClass()

　　運行結果：

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 Python實現knn KNN算法——python實現 KNN及其改進算法的python實現利用Python實現kNN算法 KNN識別圖像上的數字及python實現 kNN算法python實現和簡單數字識別 Python3調試實現 kNN的matlab實現 KNN算法和實現基於Python3的12306登錄實現