python3 學習機器學習api
使用了三種集成回歸模型
git: https://github.com/linyi0604/MachineLearning
代碼:
1 from sklearn.datasets import load_boston 2 from sklearn.cross_validation import train_test_split 3 from sklearn.preprocessing import StandardScaler 4 from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor 5 from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 6 import numpy as np 7 8 ''' 9 隨機森林回歸 10 極端隨機森林回歸 11 梯度提升回歸 12 13 通常集成模型能夠取得非常好的表現 14 ''' 15 16 # 1 准備數據 17 # 讀取波士頓地區房價信息 18 boston = load_boston() 19 # 查看數據描述 20 # print(boston.DESCR) # 共506條波士頓地區房價信息,每條13項數值特征描述和目標房價 21 # 查看數據的差異情況 22 # print("最大房價:", np.max(boston.target)) # 50 23 # print("最小房價:",np.min(boston.target)) # 5 24 # print("平均房價:", np.mean(boston.target)) # 22.532806324110677 25 26 x = boston.data 27 y = boston.target 28 29 # 2 分割訓練數據和測試數據 30 # 隨機采樣25%作為測試 75%作為訓練 31 x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33) 32 33 # 3 訓練數據和測試數據進行標准化處理 34 ss_x = StandardScaler() 35 x_train = ss_x.fit_transform(x_train) 36 x_test = ss_x.transform(x_test) 37 38 ss_y = StandardScaler() 39 y_train = ss_y.fit_transform(y_train.reshape(-1, 1)) 40 y_test = ss_y.transform(y_test.reshape(-1, 1)) 41 42 # 4 三種集成回歸模型進行訓練和預測 43 # 隨機森林回歸 44 rfr = RandomForestRegressor() 45 # 訓練 46 rfr.fit(x_train, y_train) 47 # 預測 保存預測結果 48 rfr_y_predict = rfr.predict(x_test) 49 50 # 極端隨機森林回歸 51 etr = ExtraTreesRegressor() 52 # 訓練 53 etr.fit(x_train, y_train) 54 # 預測 保存預測結果 55 etr_y_predict = rfr.predict(x_test) 56 57 # 梯度提升回歸 58 gbr = GradientBoostingRegressor() 59 # 訓練 60 gbr.fit(x_train, y_train) 61 # 預測 保存預測結果 62 gbr_y_predict = rfr.predict(x_test) 63 64 # 5 模型評估 65 # 隨機森林回歸模型評估 66 print("隨機森林回歸的默認評估值為:", rfr.score(x_test, y_test)) 67 print("隨機森林回歸的R_squared值為:", r2_score(y_test, rfr_y_predict)) 68 print("隨機森林回歸的均方誤差為:", mean_squared_error(ss_y.inverse_transform(y_test), 69 ss_y.inverse_transform(rfr_y_predict))) 70 print("隨機森林回歸的平均絕對誤差為:", mean_absolute_error(ss_y.inverse_transform(y_test), 71 ss_y.inverse_transform(rfr_y_predict))) 72 73 # 極端隨機森林回歸模型評估 74 print("極端隨機森林回歸的默認評估值為:", etr.score(x_test, y_test)) 75 print("極端隨機森林回歸的R_squared值為:", r2_score(y_test, gbr_y_predict)) 76 print("極端隨機森林回歸的均方誤差為:", mean_squared_error(ss_y.inverse_transform(y_test), 77 ss_y.inverse_transform(gbr_y_predict))) 78 print("極端隨機森林回歸的平均絕對誤差為:", mean_absolute_error(ss_y.inverse_transform(y_test), 79 ss_y.inverse_transform(gbr_y_predict))) 80 81 # 梯度提升回歸模型評估 82 print("梯度提升回歸回歸的默認評估值為:", gbr.score(x_test, y_test)) 83 print("梯度提升回歸回歸的R_squared值為:", r2_score(y_test, etr_y_predict)) 84 print("梯度提升回歸回歸的均方誤差為:", mean_squared_error(ss_y.inverse_transform(y_test), 85 ss_y.inverse_transform(etr_y_predict))) 86 print("梯度提升回歸回歸的平均絕對誤差為:", mean_absolute_error(ss_y.inverse_transform(y_test), 87 ss_y.inverse_transform(etr_y_predict))) 88 89 ''' 90 隨機森林回歸的默認評估值為: 0.8391590262557747 91 隨機森林回歸的R_squared值為: 0.8391590262557747 92 隨機森林回歸的均方誤差為: 12.471817322834646 93 隨機森林回歸的平均絕對誤差為: 2.4255118110236227 94 95 極端隨機森林回歸的默認評估值為: 0.783339502805047 96 極端隨機森林回歸的R_squared值為: 0.8391590262557747 97 極端隨機森林回歸的均方誤差為: 12.471817322834646 98 極端隨機森林回歸的平均絕對誤差為: 2.4255118110236227 99 100 GradientBoostingRegressor回歸的默認評估值為: 0.8431187344932869 101 GradientBoostingRegressor回歸的R_squared值為: 0.8391590262557747 102 GradientBoostingRegressor回歸的均方誤差為: 12.471817322834646 103 GradientBoostingRegressor回歸的平均絕對誤差為: 2.4255118110236227 104 '''