import numpy as np
from sklearn import datasets
# 得到原始數據
digits = datasets.load_digits()
X = digits.data
y = digits.target
# 根據自己編寫的函數,對原始數據進行切分
from ALG.train_test_split import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_train = 0.2, seed = 666)
# 1)定義搜索的參數范圍
param_grid = [
{
'weights':['uniform'],
'n_neighbors':[i for i in range(1, 11)]
},
{
'weights':['distance'],
'n_neighbors':[i for i in range(1, 11)],
'p':[i for i in range(1, 6)]
}
]
# 2)創建一個需要進行網格搜索的機器學習算法對象
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier()
# 3)實例化scikit-learn中的網格搜索對象
from sklearn.model_selection import GridSearchCV
# 創GridSearchCV對應的實例對象,一般傳入4個參數:
grid_search = GridSearchCV(knn_clf, param_grid, n_jobs = -1, verbose = 2)
# 4)對網格搜索的實例對象fit
%%time
grid_search.fit(X_train, y_train)
# 5)查看結果
# 查看網格搜索得到的最佳的分類器對應的參數(為最佳分類器的所有參數)
grid_search.best_estimator_
# 查看准確度
# 此處得到的准確度(0.9853963838664812)並沒有之前(n_neighbors = 3時)得到的准確度高,因為評判標准改變了
grid_search.best_score_
# 查看之前定義的網格搜索參數中最優的結果
grid_search.best_params_
# 返回:{'n_neighbors': 3, 'p': 3, 'weights': 'distance'}
# 獲取最佳分類器模型
knn_clf = grid_search.best_estimator_
# 6)使用最佳分類器進行預測
knn_clf.score(X_test, y_test)