菜鳥,剛入機器學習不久,數據是導師給的
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.cross_validation import KFold
def mape_cal(originalValue, predictValue, length):
"mape值計算"
temp = (originalValue - predictValue) / originalValue
mape_cal = sum(pd.Series(temp).abs()) / length
return mape_cal
data = pd.read_csv("east訓練集.csv")
data2 = pd.read_csv("測試集(1).csv")
cols = data.shape[1] # 獲取列數
row = data.shape[0]
predictors = ['x1', 'x2', 'x3', 'x4', 'x5']
X_train = data.values[:, 0:cols-1] # 獲取最后一列之前的所有數據
y_train = data.values[:, cols-1:cols]
X_test = data2.values[:, 0:cols-1] # 獲取最后一列之前的所有數據
y_test = data2.values[:, cols-1:cols]
lr = LinearRegression()
kf = KFold(row, n_folds=3, random_state=1)
predictions = []
# kf:一個存儲所有隨機組合的數據集的列表
for train, test in kf:
# 取訓練集的特征數據
train_predictors = (data[predictors].loc[train, :])
# 取訓練集的標記數據
train_target = data['y'].loc[train]
# 模型訓練
lr.fit(train_predictors, train_target)
# 預測並返回預測值
test_predictions = lr.predict(data2[predictors])
predictions.append(test_predictions)
y_predict = np.array(predictions).mean(axis=0)
print(y_predict)
#模型擬合測試集
print("-----預測結果-----")
# y_predict = lr.predict(X_test)
print("均方差MSE:",metrics.mean_squared_error(y_test, y_predict))
# 用scikit-learn計算均方根差RMSE
print("均根方差RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_predict)))
print("MAE:", metrics.mean_absolute_error(y_test, y_predict))
mapeValue = mape_cal(y_test.ravel(), y_predict.ravel(), X_test.shape[0])
print("mape值:", mapeValue)
temp = pd.Series(y_predict.ravel())
temp.to_csv('test2.csv')
fig = plt.figure(figsize=(15, 7))
ax1 = fig.add_subplot(2, 1, 1)
ax1.scatter(y_test, y_predict, linewidths=4, c='blue')
ax1.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4, c='red')
ax1.set_xlabel('Measured')
ax1.set_ylabel('Predicted')
ax3 = fig.add_subplot(2, 1, 2)
ax3.plot(range(X_test.shape[0]), y_predict, c='red', label='predictValue', lw=2)
ax3.plot(range(X_test.shape[0]), y_test, c='blue', label='originalValue', lw=2)
ax3.legend(loc='upper left')
plt.show()
預測結果:
-----預測結果----- 均方差MSE: 1413.927330961531 均根方差RMSE: 37.60222507992753 MAE: 30.085778687356473 mape值: 0.18161724802211374

