菜鳥,剛入機器學習不久,數據是導師給的
import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn import metrics from sklearn.cross_validation import KFold def mape_cal(originalValue, predictValue, length): "mape值計算" temp = (originalValue - predictValue) / originalValue mape_cal = sum(pd.Series(temp).abs()) / length return mape_cal data = pd.read_csv("east訓練集.csv") data2 = pd.read_csv("測試集(1).csv") cols = data.shape[1] # 獲取列數 row = data.shape[0] predictors = ['x1', 'x2', 'x3', 'x4', 'x5'] X_train = data.values[:, 0:cols-1] # 獲取最后一列之前的所有數據 y_train = data.values[:, cols-1:cols] X_test = data2.values[:, 0:cols-1] # 獲取最后一列之前的所有數據 y_test = data2.values[:, cols-1:cols] lr = LinearRegression() kf = KFold(row, n_folds=3, random_state=1) predictions = [] # kf:一個存儲所有隨機組合的數據集的列表 for train, test in kf: # 取訓練集的特征數據 train_predictors = (data[predictors].loc[train, :]) # 取訓練集的標記數據 train_target = data['y'].loc[train] # 模型訓練 lr.fit(train_predictors, train_target) # 預測並返回預測值 test_predictions = lr.predict(data2[predictors]) predictions.append(test_predictions) y_predict = np.array(predictions).mean(axis=0) print(y_predict) #模型擬合測試集 print("-----預測結果-----") # y_predict = lr.predict(X_test) print("均方差MSE:",metrics.mean_squared_error(y_test, y_predict)) # 用scikit-learn計算均方根差RMSE print("均根方差RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_predict))) print("MAE:", metrics.mean_absolute_error(y_test, y_predict)) mapeValue = mape_cal(y_test.ravel(), y_predict.ravel(), X_test.shape[0]) print("mape值:", mapeValue) temp = pd.Series(y_predict.ravel()) temp.to_csv('test2.csv') fig = plt.figure(figsize=(15, 7)) ax1 = fig.add_subplot(2, 1, 1) ax1.scatter(y_test, y_predict, linewidths=4, c='blue') ax1.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4, c='red') ax1.set_xlabel('Measured') ax1.set_ylabel('Predicted') ax3 = fig.add_subplot(2, 1, 2) ax3.plot(range(X_test.shape[0]), y_predict, c='red', label='predictValue', lw=2) ax3.plot(range(X_test.shape[0]), y_test, c='blue', label='originalValue', lw=2) ax3.legend(loc='upper left') plt.show()
預測結果:
-----預測結果----- 均方差MSE: 1413.927330961531 均根方差RMSE: 37.60222507992753 MAE: 30.085778687356473 mape值: 0.18161724802211374