Xgboost參數調節

本文轉載自查看原文 2019-07-26 16:56 482

轉自：https://segmentfault.com/a/1190000014040317

整體:

# 1.調試n_estimators
cv_params = {'n_estimators': [550, 575, 600, 650, 675]}
other_params = {'learning_rate': 0.1, 'n_estimators': 600, 'max_depth': 5, 'min_child_weight': 1, 'seed': 0,
'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1}
# 2.調試max_depth、min_child_weight
# cv_params = {'max_depth': [3, 4, 5, 6, 7, 8, 9, 10], 'min_child_weight': [1, 2, 3, 4, 5, 6]}
# other_params = {'learning_rate': 0.1, 'n_estimators': 550, 'max_depth': 5, 'min_child_weight': 1, 'seed': 0,
# 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1}
# 3.調試gamma
# cv_params = {'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]}
# other_params = {'learning_rate': 0.1, 'n_estimators': 550, 'max_depth': 4, 'min_child_weight': 5, 'seed': 0,
# 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1}
# 4.調試subsample、colsample_bytree
# cv_params = {'subsample': [0.6, 0.7, 0.8, 0.9], 'colsample_bytree': [0.6, 0.7, 0.8, 0.9]}
# other_params = {'learning_rate': 0.1, 'n_estimators': 550, 'max_depth': 4, 'min_child_weight': 5, 'seed': 0,
# 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.1, 'reg_alpha': 0, 'reg_lambda': 1}
# 5.調試reg_alpha、reg_lambda
# cv_params = {'reg_alpha': [0.05, 0.1, 1, 2, 3], 'reg_lambda': [0.05, 0.1, 1, 2, 3]}
# other_params = {'learning_rate': 0.1, 'n_estimators': 550, 'max_depth': 4, 'min_child_weight': 5, 'seed': 0,
# 'subsample': 0.7, 'colsample_bytree': 0.7, 'gamma': 0.1, 'reg_alpha': 0, 'reg_lambda': 1}
# 6.調試learning_rate
# cv_params = {'learning_rate': [0.01, 0.05, 0.07, 0.1, 0.2]}
# other_params = {'learning_rate': 0.1, 'n_estimators': 550, 'max_depth': 4, 'min_child_weight': 5, 'seed': 0,
# 'subsample': 0.7, 'colsample_bytree': 0.7, 'gamma': 0.1, 'reg_alpha': 1, 'reg_lambda': 1}

model = xgb.XGBClassifier(**other_params)
optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, cv=5, verbose=1, n_jobs=4)
optimized_GBM.fit(X_train, y_train)
evalute_result = optimized_GBM.grid_scores_
print('每輪迭代運行結果:{0}'.format(evalute_result))
print('參數的最佳取值：{0}'.format(optimized_GBM.best_params_))
print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))

1.調節最大迭代次數n_estimators

# 最佳迭代次數：n_estimators
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
cv_params = {'n_estimators': [20,30,40]}
other_params = {'learning_rate': 0.1, 'n_estimators': 500, 'max_depth': 5, 'min_child_weight': 1, 'seed': 0,
                    'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1}
model = XGBRegressor(**other_params)
optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=-1)
optimized_GBM.fit(x_data, y_data)
evalute_result =optimized_GBM.return_train_score
print('每輪迭代運行結果:{0}'.format(evalute_result))
print('參數的最佳取值：{0}'.format(optimized_GBM.best_params_))
print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))

2.調試的參數是min_child_weight以及max_depth：

# 調試的參數是min_child_weight以及max_depth：
cv_params = {'max_depth': [3, 4, 5, 6, 7, 8, 9, 10], 'min_child_weight': [6,7,8]}
other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 5, 'min_child_weight': 1, 'seed': 0,
                    'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1}
model = XGBRegressor(**other_params)
optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=-1)
optimized_GBM.fit(x_data, y_data)
evalute_result =optimized_GBM.return_train_score
print('每輪迭代運行結果:{0}'.format(evalute_result))
print('參數的最佳取值：{0}'.format(optimized_GBM.best_params_))
print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))

3.調試參數：gamma：

# 調試參數：gamma：
cv_params = {'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]}
other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 4, 'min_child_weight': 6, 'seed': 0,
                    'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1}
model = XGBRegressor(**other_params)
optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=-1)
optimized_GBM.fit(x_data, y_data)
evalute_result =optimized_GBM.return_train_score
print('每輪迭代運行結果:{0}'.format(evalute_result))
print('參數的最佳取值：{0}'.format(optimized_GBM.best_params_))
print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))

4. 調試subsample以及colsample_bytree：

# 調試subsample以及colsample_bytree：
cv_params = {'subsample': [0.6, 0.7, 0.8, 0.9], 'colsample_bytree': [0.6, 0.7, 0.8, 0.9]}
other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 4, 'min_child_weight': 6, 'seed': 0,
                    'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.2, 'reg_alpha': 0, 'reg_lambda': 1}
model = XGBRegressor(**other_params)
optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=4)
optimized_GBM.fit(x_data, y_data)
evalute_result =optimized_GBM.return_train_score
print('每輪迭代運行結果:{0}'.format(evalute_result))
print('參數的最佳取值：{0}'.format(optimized_GBM.best_params_))
print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))

5.調試reg_alpha以及reg_lambda：

# 調試reg_alpha以及reg_lambda：
cv_params = {'reg_alpha': [0.05, 0.1, 1, 2, 3], 'reg_lambda': [0.05, 0.1, 1, 2, 3]}
other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 4, 'min_child_weight': 6, 'seed': 0,
                    'subsample': 0.8, 'colsample_bytree': 0.9, 'gamma': 0.2, 'reg_alpha': 0, 'reg_lambda': 1}
model = XGBRegressor(**other_params)
optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=4)
optimized_GBM.fit(x_data, y_data)
evalute_result =optimized_GBM.return_train_score
print('每輪迭代運行結果:{0}'.format(evalute_result))
print('參數的最佳取值：{0}'.format(optimized_GBM.best_params_))
print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))

6.調試learning_rate：

# 調試learning_rate，一般這時候要調小學習率來測試：
cv_params = {'learning_rate': [0.01, 0.05, 0.07, 0.1, 0.2]}
other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 4, 'min_child_weight': 6, 'seed': 0,
                    'subsample': 0.8, 'colsample_bytree': 0.9, 'gamma': 0.2, 'reg_alpha': 0.1, 'reg_lambda': 1}
model = XGBRegressor(**other_params)
optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=4)
optimized_GBM.fit(x_data, y_data)
evalute_result =optimized_GBM.return_train_score
print('每輪迭代運行結果:{0}'.format(evalute_result))
print('參數的最佳取值：{0}'.format(optimized_GBM.best_params_))
print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 XGBoost 參數說明 XGBoost 參數介紹 xgboost參數及調參 XGBoost參數調優 XGBoost參數解釋及調優 xgboost 參數調優指南 11 ROS 動態參數調節 sklearn中的超參數調節 xgboost中XGBClassifier(）參數詳解 XGBoost、LightGBM參數講解及實戰