GridSearchCV和RandomizedSearchCV調參


1 GridSearchCV實際上可以看做是for循環輸入一組參數后再比較哪種情況下最優.

使用GirdSearchCV模板

# Use scikit-learn to grid search the batch size and epochs
import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
import pandas as pd
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
# Function to create model, required for KerasClassifier
def create_model(optimizer='adam'):
    # create model
    model = Sequential()
    model.add(Dense(12, input_dim=8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataset = pd.read_csv('diabetes.csv', )
# split into input (X) and output (Y) variables
X = dataset[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
             'Insulin','BMI', 'DiabetesPedigreeFunction', 'Age']]
Y = dataset['Outcome']
# create model
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10, verbose=0)
# define the grid search parameters
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
print(grid_result)
print('kkkk')
print(grid_result.cv_results_)
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
View Code

 

參考:https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/

          https://blog.csdn.net/weixin_41988628/article/details/83098130

2

利用隨機搜索實現鳶尾花調參,

from sklearn.datasets import load_iris  # 自帶的樣本數據集
from sklearn.neighbors import KNeighborsClassifier  # 要估計的是knn里面的參數,包括k的取值和樣本權重分布方式
import matplotlib.pyplot as plt  # 可視化繪圖
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV  # 網格搜索和隨機搜索
import pandas as pd
iris = pd.read_csv('../data/iris.csv', )
print(iris.head())
print(iris.columns)
X = iris[['Sepal.Length', 'Sepal.Width', 'Petal.Length','Petal.Width']]  # 150個樣本,4個屬性
y = iris['Species'] # 150個類標號

k_range = range(1, 31)  # 優化參數k的取值范圍
weight_options = ['uniform', 'distance']  # 代估參數權重的取值范圍。uniform為統一取權值,distance表示距離倒數取權值
# 下面是構建parameter grid,其結構是key為參數名稱,value是待搜索的數值列表的一個字典結構
param_grid = {'n_neighbors':k_range,'weights':weight_options}  # 定義優化參數字典,字典中的key值必須是分類算法的函數的參數名
print(param_grid)

knn = KNeighborsClassifier(n_neighbors=5)  # 定義分類算法。n_neighbors和weights的參數名稱和param_grid字典中的key名對應


# ================================網格搜索=======================================
# 這里GridSearchCV的參數形式和cross_val_score的形式差不多,其中param_grid是parameter grid所對應的參數
# GridSearchCV中的n_jobs設置為-1時,可以實現並行計算(如果你的電腦支持的情況下)
grid = GridSearchCV(estimator = knn, param_grid = param_grid, cv=10, scoring='accuracy') #針對每個參數對進行了10次交叉驗證。scoring='accuracy'使用准確率為結果的度量指標。可以添加多個度量指標
grid.fit(X, y)

print('網格搜索-度量記錄:',grid.cv_results_)  # 包含每次訓練的相關信息
print('網格搜索-最佳度量值:',grid.best_score_)  # 獲取最佳度量值
print('網格搜索-最佳參數:',grid.best_params_)  # 獲取最佳度量值時的代定參數的值。是一個字典
print('網格搜索-最佳模型:',grid.best_estimator_)  # 獲取最佳度量時的分類器模型


# 使用獲取的最佳參數生成模型,預測數據
knn = KNeighborsClassifier(n_neighbors=grid.best_params_['n_neighbors'], weights=grid.best_params_['weights'])  # 取出最佳參數進行建模
knn.fit(X, y)  # 訓練模型
print(knn.predict([[3, 5, 4, 2]]))  # 預測新對象



# =====================================隨機搜索===========================================
rand = RandomizedSearchCV(knn, param_grid, cv=10, scoring='accuracy', n_iter=10, random_state=5)  #
rand.fit(X, y)

print('隨機搜索-度量記錄:',grid.cv_results_)  # 包含每次訓練的相關信息
print('隨機搜索-最佳度量值:',grid.best_score_)  # 獲取最佳度量值
print('隨機搜索-最佳參數:',grid.best_params_)  # 獲取最佳度量值時的代定參數的值。是一個字典
print('隨機搜索-最佳模型:',grid.best_estimator_)  # 獲取最佳度量時的分類器模型


# 使用獲取的最佳參數生成模型,預測數據
knn = KNeighborsClassifier(n_neighbors=grid.best_params_['n_neighbors'], weights=grid.best_params_['weights'])  # 取出最佳參數進行建模
knn.fit(X, y)  # 訓練模型
print(knn.predict([[3, 5, 4, 2]]))  # 預測新對象


# =====================================自定義度量===========================================
from sklearn import metrics
# 自定義度量函數
def scorerfun(estimator, X, y):
    y_pred = estimator.predict(X)
    return metrics.accuracy_score(y, y_pred)

rand = RandomizedSearchCV(knn, param_grid, cv=10, scoring='accuracy', n_iter=10, random_state=5)  #
rand.fit(X, y)

print('隨機搜索-最佳度量值:',grid.best_score_)  # 獲取最佳度量值
View Code

參考:https://blog.csdn.net/luanpeng825485697/article/details/79831703


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM