吳裕雄 python 機器學習——KNN回歸KNeighborsRegressor模型


import numpy as np
import matplotlib.pyplot as plt

from sklearn import neighbors, datasets
from sklearn.model_selection import train_test_split

def create_regression_data(n):
    '''
    創建回歸模型使用的數據集
    '''
    X =5 * np.random.rand(n, 1)
    y = np.sin(X).ravel()
    # 每隔 5 個樣本就在樣本的值上添加噪音
    y[::5] += 1 * (0.5 - np.random.rand(int(n/5))) 
    # 進行簡單拆分,測試集大小占 1/4
    return train_test_split(X, y,test_size=0.25,random_state=0)

#KNN回歸KNeighborsRegressor模型
def test_KNeighborsRegressor(*data):
    X_train,X_test,y_train,y_test=data
    regr=neighbors.KNeighborsRegressor()
    regr.fit(X_train,y_train)
    print("Training Score:%f"%regr.score(X_train,y_train))
    print("Testing Score:%f"%regr.score(X_test,y_test))
    
#獲取回歸模型的數據集
X_train,X_test,y_train,y_test=create_regression_data(1000)
# 調用 test_KNeighborsRegressor
test_KNeighborsRegressor(X_train,X_test,y_train,y_test)

def test_KNeighborsRegressor_k_w(*data):
    '''
    測試 KNeighborsRegressor 中 n_neighbors 和 weights 參數的影響
    '''
    X_train,X_test,y_train,y_test=data
    Ks=np.linspace(1,y_train.size,num=100,endpoint=False,dtype='int')
    weights=['uniform','distance']

    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ### 繪制不同 weights 下, 預測得分隨 n_neighbors 的曲線
    for weight in weights:
        training_scores=[]
        testing_scores=[]
        for K in Ks:
            regr=neighbors.KNeighborsRegressor(weights=weight,n_neighbors=K)
            regr.fit(X_train,y_train)
            testing_scores.append(regr.score(X_test,y_test))
            training_scores.append(regr.score(X_train,y_train))
        ax.plot(Ks,testing_scores,label="testing score:weight=%s"%weight)
        ax.plot(Ks,training_scores,label="training score:weight=%s"%weight)
    ax.legend(loc='best')
    ax.set_xlabel("K")
    ax.set_ylabel("score")
    ax.set_ylim(0,1.05)
    ax.set_title("KNeighborsRegressor")
    plt.show()
    
# 調用 test_KNeighborsRegressor_k_w
test_KNeighborsRegressor_k_w(X_train,X_test,y_train,y_test)

def test_KNeighborsRegressor_k_p(*data):
    '''
    測試 KNeighborsRegressor 中 n_neighbors 和 p 參數的影響
    '''
    X_train,X_test,y_train,y_test=data
    Ks=np.linspace(1,y_train.size,endpoint=False,dtype='int')
    Ps=[1,2,10]

    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ### 繪制不同 p 下, 預測得分隨 n_neighbors 的曲線
    for P in Ps:
        training_scores=[]
        testing_scores=[]
        for K in Ks:
            regr=neighbors.KNeighborsRegressor(p=P,n_neighbors=K)
            regr.fit(X_train,y_train)
            testing_scores.append(regr.score(X_test,y_test))
            training_scores.append(regr.score(X_train,y_train))
        ax.plot(Ks,testing_scores,label="testing score:p=%d"%P)
        ax.plot(Ks,training_scores,label="training score:p=%d"%P)
    ax.legend(loc='best')
    ax.set_xlabel("K")
    ax.set_ylabel("score")
    ax.set_ylim(0,1.05)
    ax.set_title("KNeighborsRegressor")
    plt.show()
    
# 調用 test_KNeighborsRegressor_k_p
test_KNeighborsRegressor_k_p(X_train,X_test,y_train,y_test)

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM