import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklearn.model_selection import train_test_split def load_data_classification(): ''' 加載用於分類問題的數據集 ''' # 使用 scikit-learn 自帶的 digits 數據集 digits=datasets.load_digits() # 分層采樣拆分成訓練集和測試集,測試集大小為原始數據集大小的 1/4 return train_test_split(digits.data,digits.target,test_size=0.25,random_state=0,stratify=digits.target) #集成學習AdaBoost算法回歸模型 def test_AdaBoostRegressor(*data): ''' 測試 AdaBoostRegressor 的用法,繪制 AdaBoostRegressor 的預測性能隨基礎回歸器數量的影響 ''' X_train,X_test,y_train,y_test=data regr=ensemble.AdaBoostRegressor() regr.fit(X_train,y_train) ## 繪圖 fig=plt.figure() ax=fig.add_subplot(1,1,1) estimators_num=len(regr.estimators_) X=range(1,estimators_num+1) ax.plot(list(X),list(regr.staged_score(X_train,y_train)),label="Traing score") ax.plot(list(X),list(regr.staged_score(X_test,y_test)),label="Testing score") ax.set_xlabel("estimator num") ax.set_ylabel("score") ax.legend(loc="best") ax.set_title("AdaBoostRegressor") plt.show() # 獲取分類數據 X_train,X_test,y_train,y_test=load_data_classification() # 調用 test_AdaBoostRegressor test_AdaBoostRegressor(X_train,X_test,y_train,y_test)
def test_AdaBoostRegressor_base_regr(*data): ''' 測試 AdaBoostRegressor 的預測性能隨基礎回歸器數量的和基礎回歸器類型的影響 ''' from sklearn.svm import LinearSVR X_train,X_test,y_train,y_test=data fig=plt.figure() regrs=[ensemble.AdaBoostRegressor(), # 基礎回歸器為默認類型 ensemble.AdaBoostRegressor(base_estimator=LinearSVR(epsilon=0.01,C=100))] # 基礎回歸器為 LinearSVR labels=["Decision Tree Regressor","Linear SVM Regressor"] for i ,regr in enumerate(regrs): ax=fig.add_subplot(2,1,i+1) regr.fit(X_train,y_train) ## 繪圖 estimators_num=len(regr.estimators_) X=range(1,estimators_num+1) ax.plot(list(X),list(regr.staged_score(X_train,y_train)),label="Traing score") ax.plot(list(X),list(regr.staged_score(X_test,y_test)),label="Testing score") ax.set_xlabel("estimator num") ax.set_ylabel("score") ax.legend(loc="lower right") ax.set_ylim(-1,1) ax.set_title("Base_Estimator:%s"%labels[i]) plt.suptitle("AdaBoostRegressor") plt.show() # 調用 test_AdaBoostRegressor_base_regr test_AdaBoostRegressor_base_regr(X_train,X_test,y_train,y_test)
def test_AdaBoostRegressor_learning_rate(*data): ''' 測試 AdaBoostRegressor 的預測性能隨學習率的影響 ''' X_train,X_test,y_train,y_test=data learning_rates=np.linspace(0.01,1) fig=plt.figure() ax=fig.add_subplot(1,1,1) traing_scores=[] testing_scores=[] for learning_rate in learning_rates: regr=ensemble.AdaBoostRegressor(learning_rate=learning_rate,n_estimators=500) regr.fit(X_train,y_train) traing_scores.append(regr.score(X_train,y_train)) testing_scores.append(regr.score(X_test,y_test)) ax.plot(learning_rates,traing_scores,label="Traing score") ax.plot(learning_rates,testing_scores,label="Testing score") ax.set_xlabel("learning rate") ax.set_ylabel("score") ax.legend(loc="best") ax.set_title("AdaBoostRegressor") plt.show() # 調用 test_AdaBoostRegressor_learning_rate test_AdaBoostRegressor_learning_rate(X_train,X_test,y_train,y_test)
def test_AdaBoostRegressor_loss(*data): ''' 測試 AdaBoostRegressor 的預測性能隨損失函數類型的影響 ''' X_train,X_test,y_train,y_test=data losses=['linear','square','exponential'] fig=plt.figure() ax=fig.add_subplot(1,1,1) for i ,loss in enumerate(losses): regr=ensemble.AdaBoostRegressor(loss=loss,n_estimators=30) regr.fit(X_train,y_train) ## 繪圖 estimators_num=len(regr.estimators_) X=range(1,estimators_num+1) ax.plot(list(X),list(regr.staged_score(X_train,y_train)),label="Traing score:loss=%s"%loss) ax.plot(list(X),list(regr.staged_score(X_test,y_test)),label="Testing score:loss=%s"%loss) ax.set_xlabel("estimator num") ax.set_ylabel("score") ax.legend(loc="lower right") ax.set_ylim(-1,1) plt.suptitle("AdaBoostRegressor") plt.show() # 調用 test_AdaBoostRegressor_loss test_AdaBoostRegressor_loss(X_train,X_test,y_train,y_test)