一、引入相關庫
%matplotlib inline import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties font=FontProperties(fname=r'c:/windows/fonts/msyh.ttf',size=10)
二、一元回歸范例
def runplt(): plt.figure() plt.title(u'披薩價格與直徑數據',fontproperties=font) plt.xlabel(u'直徑(英寸)',fontproperties=font) plt.ylabel(u'價格(美元)',fontproperties=font) plt.axis([0,25,0,25]) plt.grid(True)#是否顯示網格 return plt
plt=runplt() x=[[6],[8],[10],[14],[18]] y=[[7],[9],[13],[17.5],[18]] plt.plot(x,y,'k.') plt.show()
三、利用sklearn建立一元回歸
from sklearn.linear_model import LinearRegression #創建並擬合模型 model=LinearRegression() model.fit(x,y) print('預測一張12英寸的披薩價格:')
## 波士頓房屋價格,SGDRegressor
import numpy as np from sklearn.datasets import load_boston from sklearn.linear_model import SGDRegressor#隨機梯度 from sklearn.cross_validation import cross_val_score from sklearn.preprocessing import StandardScaler#列歸一化,標准正態分布形式 from sklearn.cross_validation import train_test_split#分割訓練集和測試集,默認值是25% data=load_boston() X_train,X_test,y_train,y_test=train_test_split(data.data,data.target)
#歸一化 X_scaler=StandardScaler() y_scaler=StandardScaler() X_train=X_scaler.fit_transform(X_train)#訓練並轉換 y_train=y_scaler.fit_transform(y_train) #對測試樣本歸一轉換 X_test=X_scaler.transform(X_test) y_test=y_scaler.transform(y_test)
#訓練並測試樣本 regression=SGDRegressor(loss='squared_loss') scores=cross_val_score(regression,X_train,y_train,cv=5)#cv=5訓練五次 print scores print 'Cross validation r-squared score:',np.mean(scores) regression.fit_transform(X_train,y_train) print 'Test set r-squared score:',regression.score(X_test,y_test)
輸出結果:
[ 0.65592082 0.71571537 0.79468123 0.69650452 0.67266115]
Cross validation r-squared score: 0.707096620395
Test set r-squared score: 0.677424272546