1 一元線性回歸
線性回歸是一種簡單的模型,但受到廣泛應用,比如預測商品價格,成本評估等,都可以用一元線性模型。y = f(x) 叫做一元函數,回歸意思就是根據已知數據復原某些值,線性回歸(regression)就是用線性的模型做回歸復原。
基本思想:已知一批(x, y)來復原另外未知的值,例如(1,1),(2,2),(3,3),那么(4,?),大家很容易知道(4,4),這就是一元線性回歸求解問題
import numpy as np import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties from sklearn.linear_model import LinearRegression x = [[1], [2],[3], [4], [5], [6]] y = [[1], [2.1], [2.9], [4.2], [5.1], [5.8]] model = LinearRegression() model.fit(x, y) predicted = model.predict([13])[0] x2 = [[0], [2.5],[5.3],[7.6],[9.1]] y2 = model.predict(x2) print model.score(x2, y2) print predicted fonr = FontProperties() plt.figure() plt.title("linearRegression") plt.xlabel('x') plt.ylabel('y') plt.axis([0, 10, 0, 10]) plt.grid(True) plt.pause(0.4) plt.plot(x , y, 'k.') plt.plot(x2 , y2, 'g.') plt.show() print np.var(x2, ddof = 0) print model.score(x2, y2)
2 多元線性回歸
假設方程為:,求解系數,因為不一定是方陣,所以不能直接求,所以我們對方程兩邊同乘以,則,由於是方陣,所以兩邊同時乘以的逆,就可以得到:。
設計二元一次方程:y=1+2x1+3x2
取樣本為(1,1,1),(1,1,2),(1,2,1),計算得y=(6,9,8)
注意:這里面常數項1相當於1*x0,只不過這里的x0永遠取1
X = [[1,1,1],[1,1,2],[1,2,1]]
y = [[6],[9],[8]]
from numpy.linalg import inv from numpy import dot, transpose from numpy.linalg import lstsq x = [[1,1,1], [1,1,2], [1,2,1]] y = [[6], [9], [8]] print dot(inv(dot(transpose(x),x)),dot(transpose(x), y)) ##===最小二乘法===================== print lstsq(x, y)[0] from sklearn.linear_model import LinearRegression x = [[1,1,1], [1,1,2], [1,2,1]] y = [[6], [9], [8]] model = LinearRegression() model.fit(x, y) x2 = [[1,3,5]] y2 = model.predict(x2) print y2
3 .采用多項式回歸
import sys reload(sys) sys.setdefaultencoding('utf-8') import matplotlib.pyplot as plt import numpy as np from sklearn.linear_model import LinearRegression#導入線性回歸模型 from sklearn.preprocessing import PolynomialFeatures# 導入多項式回歸模型 plt.figure() plt.title('single variable') plt.xlabel('x') plt.ylabel('y') plt.axis([30, 400, 100, 400]) plt.grid(True) x = [[50], [100], [150], [200], [250], [300]] y = [[150], [200], [250], [280], [310], [330]] x_test = [[250], [300]] y_test = [[310], [330]] plt.plot(x, y, 'g.',markersize =20) model = LinearRegression() x2 = [[30], [400]] model.fit(x, y) y2 = model.predict(x2) plt.plot(x2, y2,label = '$y = ax + c$') plt.legend()
xx = np.linspace(30, 400, 100) quadratic_featurizer = PolynomialFeatures(degree = 2)#實例化一個二次多項式 x_train_quadratic = quadratic_featurizer.fit_transform(x)#用二次多項式多樣本x做變換 xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0], 1))# 把訓練好X值的多項式特征實例應用到一系列點上,形成矩陣 regressor_quadratic = LinearRegression() regressor_quadratic.fit(x_train_quadratic, y) plt.plot(xx, regressor_quadratic.predict(xx_quadratic),label="$y = ax^2 + bx + c$")
plt.legend() cubic_featurizer = PolynomialFeatures(degree = 3) x_train_cubic = cubic_featurizer.fit_transform(x) xx_cubie = cubic_featurizer.transform(xx.reshape(xx.shape[0], 1)) regressor_cubic = LinearRegression() regressor_cubic.fit(x_train_cubic, y) plt.plot(xx, regressor_cubic.predict(xx_cubie),label="$y = a_1x^3 + a_2x^2 + a_3x +c $") plt.legend() print '一元線性回歸 r-squared', model.score(x_test, y_test) x_test_quadratic = quadratic_featurizer.transform(x_test) print '二次性回歸 r-squared', regressor_quadratic.score(x_test_quadratic, y_test) x_test_cubic = cubic_featurizer.transform(x_test) print '三次線性回歸 r-squared', regressor_cubic.score(x_test_cubic, y_test) plt.show()