GridSearchCV交叉驗證


代碼實現(基於邏輯回歸算法):

 1 # -*- coding: utf-8 -*-
 2 """
 3 Created on Sat Sep  1 11:54:48 2018
 4 
 5 @author: zhen
 6 
 7     交叉驗證
 8 """
 9 import numpy as np
10 from sklearn import datasets
11 from sklearn.linear_model import LogisticRegression
12 from sklearn.model_selection import GridSearchCV
13 import matplotlib.pyplot as plt
14 
15 iris = datasets.load_iris()
16 x = iris['data'][:, 3:]
17 y = iris['target']
18 
19 
20 def report(results, n_top=3):
21     for i in range(1, n_top + 1):
22         candidates = np.flatnonzero(results['rank_test_score'] == i)
23         for candidate in candidates:
24             print("Model with rank: {0}".format(i))
25             print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
26                    results['mean_test_score'][candidate],
27                    results['std_test_score'][candidate]))
28             print("Parameters: {0}".format(results['params'][candidate]))
29             print("")
30 
31 
32 param_grid = {"tol":[1e-4, 1e-3,1e-2], "C":[0.4, 0.6, 0.8]}
33 
34 log_reg = LogisticRegression(multi_class='ovr', solver='sag')
35 # 采用3折交叉驗證
36 grid_search = GridSearchCV(log_reg, param_grid=param_grid, cv=3)
37 grid_search.fit(x, y)
38 
39 report(grid_search.cv_results_)
40 
41 x_new = np.linspace(0, 3, 1000).reshape(-1, 1)
42 y_proba = grid_search.predict_proba(x_new)
43 y_hat = grid_search.predict(x_new)
44 
45 plt.plot(x_new, y_proba[:, 2], 'g-', label='Iris-Virginica')
46 plt.plot(x_new, y_proba[:, 1], 'r-', label='Iris-Versicolour')
47 plt.plot(x_new, y_proba[:, 0], 'b-', label='Iris-Setosa')
48 plt.show()
49 
50 print(grid_search.predict([[1.7], [1.5]]))

結果:

總結:使用交叉驗證可以實現代碼自動對設定范圍參數的模型進行分別訓練,最后選出效果最好的參數所訓練出的模型進行預測,以求達到最好的預測效果!


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM