python實現normal equation進行一元、多元線性回歸
一元線性回歸
數據
代碼
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
# 梯度下降
def gradientDecent(xmat,ymat):
sgd=SGDRegressor(max_iter=1000000, tol=1e-7)
sgd.fit(xmat,ymat)
return sgd.coef_,sgd.intercept_
# 導入數據
def loadDataSet(filename):
x=[[],[]]
y=[]
with open(filename,'r') as f:
for line in f.readlines():
lineDataList=line.split('\t')
lineDataList=[float(x) for x in lineDataList]
x[0].append(lineDataList[0])
x[1].append(lineDataList[1])
y.append(lineDataList[2])
return x,y
# 轉化為矩陣
def mat(x):
return np.matrix(np.array(x)).T
# 可視化
def dataVisual(xmat,ymat,k,g,intercept):
k1,k2=k[0],k[1]
g1,g2=g[0],g[1]
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
plt.title('擬合可視化')
plt.scatter(xmat[:,1].flatten().A[0],ymat[:,0].flatten().A[0])
x = np.linspace(0, 1, 50)
y=x*k2+k1
g=x*g2+g1+intercept
plt.plot(x,g,c='yellow')
plt.plot(x,y,c='r')
plt.show()
# 求解回歸的參數
def normalEquation(xmat,ymat):
temp=xmat.T.dot(xmat)
isInverse=np.linalg.det(xmat.T.dot(xmat))
if isInverse==0.0:
print('不可逆矩陣')
else:
inv=temp.I
return inv.dot(xmat.T).dot(ymat)
# 主函數
def main():
xAll,y=loadDataSet('linearRegression/ex0.txt')
xlines=[]# 用於梯度下降算錢調用
for i in range(len(xAll[0])):
temp=[]
temp.append(xAll[0][i])
temp.append(xAll[1][i])
xlines.append(temp)
ylines=np.array(y).reshape(len(y),1)
# xlines=StandardScaler().fit_transform(xlines)
# ylines=StandardScaler().fit_transform(ylines)
# print(xlines)
# print(ylines)
gradPara,intercept=gradientDecent(xlines,y)
print('梯度下降參數')
print(gradPara)
print('梯度下降截距')
print(intercept)
xmat=mat(xAll)
ymat=mat(y)
print('normequation的參數:')
res=normalEquation(xmat,ymat)
print(res)
k1,k2=res[0,0],res[1,0]
dataVisual(xmat,ymat,[k1,k2],gradPara,intercept)
if __name__ == "__main__":
main()
結果
注意這里我踩了一個小小的坑,就是用SGDRegressor的時候,總是和預期結果相差一個截距,通過修改g從g=xg2+g1到g=xg2+g1+intercept,加上截距就好了
圖中紅色表示normalequation方法,而黃線表示梯度下降,由於我通過調參擬合的非常好,所以重合的很厲害,不好看出來
多元線性回歸
數據
代碼
import numpy as np
import matplotlib.pyplot as plt
import re
from sklearn.linear_model import SGDRegressor
# 將數據轉化成為矩陣
def matrix(x):
return np.matrix(np.array(x)).T
# 線性函數
def linerfunc(xList,thList,*intercept):
res=0.0
for i in range(len(xList)):
res+=xList[i]*thList[i]
if len(intercept)==0:
return res
else:
return res+intercept[0][0]
# 加載數據
def loadData(fileName):
x=[]
y=[]
regex = re.compile('\s+')
with open(fileName,'r') as f:
readlines=f.readlines()
for line in readlines:
dataLine=regex.split(line)
dataList=[float(x) for x in dataLine[0:-1]]
xList=dataList[0:8]
x.append(xList)
y.append(dataList[-1])
return x,y
# 求解回歸的參數
def normalEquation(xmat,ymat):
temp=xmat.T.dot(xmat)
isInverse=np.linalg.det(xmat.T.dot(xmat))
if isInverse==0.0:
print('不可逆矩陣')
return None
else:
inv=temp.I
return inv.dot(xmat.T).dot(ymat)
# 梯度下降求參數
def gradientDecent(xmat,ymat):
sgd=SGDRegressor(max_iter=1000000, tol=1e-7)
sgd.fit(xmat,ymat)
return sgd.coef_,sgd.intercept_
# 測試代碼
def testTrainResult(normPara,gradPara,Interc,xTest,yTest):
nright=0
for i in range(len(xTest)):
if round(linerfunc(xTest[i],normPara)) ==yTest[i]:
nright+=1
print('關於normequation的預測方法正確率為 {}'.format(nright/len(xTest)))
gright=0
for i in range(len(xTest)):
if round(linerfunc(xTest[i],gradPara,Interc))==yTest[i]:
gright+=1
print('關於梯度下降法預測的正確率為 {}'.format(gright/len(xTest)))
# 運行程序
def main():
x,y=loadData('linearRegression/abalone1.txt')
# 划分訓練集合和測試集
lr=0.8
xTrain=x[:int(len(x)*lr)]
yTrain=y[:int(len(y)*lr)]
xTest=x[int(len(x)*lr):]
yTest=y[int(len(y)*lr):]
xmat=matrix(xTrain).T
ymat=matrix(yTrain)
# 通過equation來計算模型的參數
theta=normalEquation(xmat,ymat)
print('通過equation來計算模型的參數')
theta=theta.reshape(1,len(theta)).tolist()[0]
print(theta)
# 掉包sklearn的梯度下降的算法求參數
print('掉包sklearn的梯度下降的算法求參數')
gtheta,Interc=gradientDecent(xTrain,yTrain)
print('參數')
print(gtheta)
print('截距')
print(Interc)
testTrainResult(theta,gtheta,Interc,xTest,yTest)
if __name__ == "__main__":
main()
結果
目錄結構
數據下載
鏈接:https://pan.baidu.com/s/1JXrE4kbYsdVSSWjTUSDT3g
提取碼:obxh
鏈接:https://pan.baidu.com/s/13wXq52wpKHbIlf3v21Qcgg
提取碼:w4m5