SVR的代碼(python)
項目中一個早期版本的代碼,PCA-SVR,參數尋優采用傳統的GridsearchCV。
1 from sklearn.decomposition import PCA 2 from sklearn.svm import SVR 3 from sklearn.model_selection import train_test_split 4 from sklearn.model_selection import GridSearchCV 5 from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 6 from sklearn.preprocessing import StandardScaler, MinMaxScaler 7 from numpy import * 8 import numpy as np 9 import matplotlib.pyplot as plt 10 import xlrd 11 from svmutil import * 12 import pandas as pd 13 14 '''前言''' 15 # pca - svr 16 # CG測試 17 18 '''預設參數''' 19 fname = "all01.xlsx" # 訓練數據文件讀取 26hao 20 random_1 = 34 # 樣本集選取隨機種子 21 random_2 = 4 # 訓練集選取隨機種子 22 newpca = 6 # 降維 23 yuzhi = 50 # 異常點閾值 24 rate_1 = 0.8 # 樣本集驗證集 25 rate_2 = 0.8 # 訓練集測試集 26 bestc = 384 # c 27 bestg = 9 # gamma 28 29 '''數據讀取''' 30 # xlrd生成對excel表進行操作的對象 31 ... 32 33 # 輸入輸出分割 34 data_x = data[:, 1:11] 35 data_y = data[:, 0:1] 36 37 '''PCA''' 38 pca = PCA(n_components=newpca) # 加載PCA算法,設置降維后主成分數目為 39 data_x = pca.fit_transform(data_x) # 對樣本進行降維 40 print(pca.components_) # 輸出主成分,即行數為降維后的維數,列數為原始特征向量轉換為新特征的系數 41 print(pca.explained_variance_ratio_) # 新特征 每維所能解釋的方差大小在全方差中所占比例 42 43 '''數據划分''' 44 # 樣本數據分割 45 train_data_x, predict_data_x, train_data_y, predict_data_y = train_test_split(data_x, data_y, test_size=rate_1, 46 random_state=random_1) 47 48 # 訓練數據分割 49 train_x, test_x, train_y, test_y = train_test_split(train_data_x, train_data_y, test_size=rate_2, random_state=random_2) 50 predict_x = predict_data_x 51 predict_y = predict_data_y 52 53 # reshape y 54 test_y = np.reshape(test_y, -1) 55 train_y = np.reshape(train_y, -1) 56 predict_y = np.reshape(predict_y, (-1, 1)) 57 58 # StandardScaler x 59 ss_X = StandardScaler() 60 ss_X.fit(train_data_x) # 20% 61 train_x = ss_X.transform(train_x) 62 test_x = ss_X.transform(test_x) 63 predict_x = ss_X.transform(predict_x) 64 65 '''參數優化與SVR''' 66 # 網格搜索交叉驗證(GridSearchCV):以窮舉的方式遍歷所有可能的參數組合 67 # 測試用 68 # param_grid = {'gamma': [bestg], 'C': [bestc]} 69 # rbf_svr_cg = GridSearchCV(SVR(kernel='rbf'), param_grid, cv=5) 70 # rbf_svr_cg.fit(train_x,train_y) 71 # bestc = rbf_svr_cg.best_params_.get('C') 72 # bestg = rbf_svr_cg.best_params_.get('gamma') 73 74 # 最優參數 75 print(bestc, bestg) 76 param_grid = {'gamma': [bestg], 'C': [bestc]} 77 rbf_svr = SVR(kernel='rbf',param_grid) # 需要修改 78 79 # 訓練 80 rbf_svr.fit(train_x, train_y) 81 82 # 預測 83 test_y_predict = rbf_svr.predict(test_x) 84 test_y_predict = np.reshape(test_y_predict, (-1, 1)) 85 predict_y_predict = rbf_svr.predict(predict_x) 86 predict_y_predict = np.reshape(predict_y_predict, (-1, 1)) 87 88 '''去異常點''' 89 print('樣本集:', len(train_data_y)) 90 print('驗證集:', len(predict_data_y)) 91 size = len(test_y_predict) 92 count = 0 93 for i in range(size): 94 if abs(test_y_predict[size - i - 1] - test_y[size - i - 1]) > yuzhi: 95 test_y_predict = np.delete(test_y_predict, size - i - 1) 96 test_y = np.delete(test_y, size - i - 1) 97 count = count + 1 98 print('測試集異常點', count) 99 size = len(predict_y_predict) 100 count = 0 101 for i in range(size): 102 if abs(predict_y_predict[size - i - 1] - predict_y[size - i - 1]) > yuzhi: 103 predict_y_predict = np.delete(predict_y_predict, size - i - 1) 104 predict_y = np.delete(predict_y, size - i - 1) 105 count = count + 1 106 print('驗證集異常點', count) 107 108 '''評估''' 109 # # 使用r2__score模塊,並輸出評估結果,擬合程度,R2決定系數,衡量模型預測能力好壞(真實與預測的相關程度百分比) 110 # print('The value of R-squared of kernal=rbf is',r2_score(test_y,test_y_predict)) 111 # # 使用mean_squared_error模塊,輸出評估結果,均方誤差 112 # print('The mean squared error of kernal=rbf is',mean_squared_error(test_y,test_y_predict)) 113 # # 使用mean_absolute_error模塊,輸出評估結果,平均絕對誤差 114 # print('The mean absolute error of kernal=rbf is',mean_absolute_error(test_y,test_y_predict)) 115 116 # 使用r2__score模塊,並輸出評估結果,擬合程度,R2決定系數,衡量模型預測能力好壞(真實與預測的相關程度百分比) 117 print('The value of R-squared of kernal=rbf is', r2_score(predict_y, predict_y_predict)) 118 # 使用mean_squared_error模塊,輸出評估結果,均方誤差 119 print('The mean squared error of kernal=rbf is', mean_squared_error(predict_y, predict_y_predict)) 120 # 使用mean_absolute_error模塊,輸出評估結果,平均絕對誤差 121 print('The mean absolute error of kernal=rbf is', mean_absolute_error(predict_y, predict_y_predict)) 122 # r 123 X1 = pd.Series(np.reshape(predict_y,-1)) 124 Y1 = pd.Series(np.reshape(predict_y_predict,-1)) 125 print('The r is', X1.corr(Y1, method="pearson")) 126 print('The r is', sqrt(r2_score(predict_y, predict_y_predict))) 127 128 '''作圖''' 129 # PRN 130 print('PRN:', fname) 131 132 # PCA 133 print() 134 135 # 殘差 136 diff_predict = predict_y_predict - predict_y 137 plt.plot(diff_predict, color='black', label='error') 138 plt.xlabel("no.") 139 plt.ylabel("error(m)") 140 plt.title('xxx') 141 plt.grid() 142 plt.legend() 143 plt.show() 144 145 # 真實/模型_1 146 plt.plot(predict_y, color='g', label='dtu15mss') 147 plt.plot(predict_y_predict, color='b', label='pre') 148 plt.xlabel("xxx") 149 plt.ylabel("error(m)") 150 plt.title('xxx') 151 plt.grid() 152 plt.legend() 153 plt.show() 154 155 # 真實/模型_2 156 fig = plt.figure(3) 157 ax1 = fig.add_subplot(2, 1, 1) 158 ax1.plot(predict_y, color='g', label='dtu15mss') 159 ax2 = fig.add_subplot(2, 1, 2) 160 ax2.plot(predict_y_predict, color='b', label='pre') 161 plt.show() 162 163 # 真實/模型_3 164 p_x = [x for x in range(int(min(predict_y)) - 5, int(max(predict_y)) + 5)] 165 p_y = p_x 166 plt.plot(p_x, p_y, color='black', label='1') 167 plt.scatter(predict_y_predict, predict_y, s=10, color='b', marker='x', 168 label='0') # https://www.cnblogs.com/shanlizi/p/6850318.html 169 plt.xlabel('PRE') 170 plt.ylabel('DTU') 171 plt.show()