XGBoost對波士頓房價進行預測

本文轉載自查看原文 2019-08-30 17:10 1293 機器學習/ xgboost

import numpy as np
import matplotlib as mpl
mpl.rcParams["font.sans-serif"] = ["SimHei"]
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection  import train_test_split
from sklearn.metrics import mean_squared_error

import xgboost as xgb

def notEmpty(s):
    return s != ''

names = ['CRIM','ZN', 'INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PTRATIO','B','LSTAT']
path = "datas/boston_housing.data"
## 由於數據文件格式不統一，所以讀取的時候，先按照一行一個字段屬性讀取數據，然后再按照每行數據進行處理
fd = pd.read_csv(path, header=None)
data = np.empty((len(fd), 14))
for i, d in enumerate(fd.values):
    d = map(float, filter(notEmpty, d[0].split(' ')))
    data[i] = list(d)

x, y = np.split(data, (13,), axis=1)
y = y.ravel()

print ("樣本數據量:%d, 特征個數：%d" % x.shape)
print ("target樣本數據量:%d" % y.shape[0])

樣本數據量:506, 特征個數：13
target樣本數據量:506

# 查看數據信息
X_DF = pd.DataFrame(x)
X_DF.info()
X_DF.describe().T
X_DF.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 13 columns):
0     506 non-null float64
1     506 non-null float64
2     506 non-null float64
3     506 non-null float64
4     506 non-null float64
5     506 non-null float64
6     506 non-null float64
7     506 non-null float64
8     506 non-null float64
9     506 non-null float64
10    506 non-null float64
11    506 non-null float64
12    506 non-null float64
dtypes: float64(13)
memory usage: 51.5 KB

#數據的分割，
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=14)
print ("訓練數據集樣本數目：%d, 測試數據集樣本數目：%d" % (x_train.shape[0], x_test.shape[0]))

訓練數據集樣本數目：404, 測試數據集樣本數目：102

# XGBoost將數據轉換為XGBoost可用的數據類型
dtrain = xgb.DMatrix(x_train, label=y_train)
dtest = xgb.DMatrix(x_test)

# XGBoost模型構建
# 1. 參數構建
params = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'reg:linear'}
num_round = 2
# 2. 模型訓練
bst = xgb.train(params, dtrain, num_round)
# 3. 模型保存
bst.save_model('xgb.model')

# XGBoost模型預測
y_pred = bst.predict(dtest)
print(mean_squared_error(y_test, y_pred))

24.869737956719252

# 4. 加載模型
bst2 = xgb.Booster()
bst2.load_model('xgb.model')
# 5 使用加載模型預測
y_pred2 = bst2.predict(dtest)
print(mean_squared_error(y_test, y_pred2))

24.869737956719252

# 畫圖
## 7. 畫圖
plt.figure(figsize=(12,6), facecolor='w')
ln_x_test = range(len(x_test))

plt.plot(ln_x_test, y_test, 'r-', lw=2, label=u'實際值')
plt.plot(ln_x_test, y_pred, 'g-', lw=4, label=u'XGBoost模型')
plt.xlabel(u'數據編碼')
plt.ylabel(u'租賃價格')
plt.legend(loc = 'lower right')
plt.grid(True)
plt.title(u'波士頓房屋租賃數據預測')
plt.show()

from xgboost import plot_importance  
from matplotlib import pyplot  
# 找出最重要的特征
plot_importance(bst,importance_type = 'cover')  
pyplot.show()

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 波士頓房價預測實驗01 波士頓房價預測【線性回歸】波士頓房價預測深度學習3：波士頓房價預測（1）使用Tensorflow對波士頓房價進行預測（一元和多元）基於sklearn的波士頓房價預測_線性回歸學習筆記 python 線性回歸（Linear Regression）預測波士頓房價機器學習第二練---波士頓房價預測【機器學習】基於線性回歸的波士頓房價預測用線性回歸簡單預測波士頓房價