線性回歸
數據集:為房價的數據,第一列是面積,第二列是房間數,第三列是價格
鏈接:https://pan.baidu.com/s/1VCtTde2vb3wOPG5dGfmucg&shfl=sharepset 提取碼:3ekz
載入庫
1 import numpy as np 2 import pandas as pd #可使用DataFrame查看數據 3 from matplotlib import pyplot as plt
導入文件
1 def loadtxtAndcsv(fileName,split,datatype): 2 return np.loadtxt(fileName,delimiter=split,dtype = datatype) 3 4 def loadnpy(fileName): 5 return np.load(fileName)
歸一化,目的是減少同一數據集中,數據相差過大的現象
1 def featureNormalize(X): 2 X_norm = np.array(X) #將X轉化為numpy對象 3 mu = np.zeros((1,X.shape[1])) 4 sigma = np.zeros((1,X.shape[1])) 5 6 mu = np.mean(X_norm,0) # 求每一列的平均值(0指定為列,1代表行) 7 sigma = np.std(X_norm,0) 8 for i in range(X.shape[1]): 9 X_norm[:,i] = (X_norm[:,i] - mu[i]) / sigma[i] 10 11 return X_norm,mu,sigma
畫圖
def plot_X(X): plt.scatter(X[:,0],X[:,1]) plt.show() def plotJ(J_history,num_iters): x = np.arange(1,num_iters+1) plt.plot(x,J_history) plt.xlabel("Number of iterations") plt.ylabel("Value of costFunction") plt.title("The change of Value over the Number") plt.show()
計算梯度和代價函數
def computerCost(X,y,theta): m = len(y) J = 0 J = (np.transpose(X*theta - y)) * (X*theta - y) / (2*m) return J def gradientDescent(X, y, theta, alpha, num_iters): m = len(y) n = len(theta) temp = np.matrix(np.zeros((n,num_iters))) #暫存每次迭代計算的theta,轉化為矩陣形式 J_history = np.zeros((num_iters,1)) for i in range(num_iters): h = np.dot(X,theta) #h是目標函數 temp[:,i] = theta - ((alpha/m)) * np.dot(np.transpose(X),(h - y)) #計算梯度 theta = temp[:,i] J_history[i] = computerCost(X,y,theta) print('.',end=' ') return theta,J_history
主函數體和預測函數
def linearRegression(alpha = 0.01, num_iters = 400): print(u"加載數據中...\n") data = loadtxtAndcsv("data.txt",",",np.float64) X = data[:,0:-1] y = data[:,-1] plot_X(X) #作圖 m = len(y) #y的長度 col = data.shape[1] #data的列數 X,mu,sigma = featureNormalize(X) #歸一化 plot_X(X) #畫圖 ''' np.vstack():在豎直方向上堆疊 np.hstack():在水平方向上平鋪 ''' X = np.hstack((np.ones((m,1)),X)) #在X前加1 print(u"\n執行梯度下降...\n") theta = np.zeros((col,1)) y = y.reshape(-1,1) #行轉列 theta,J_history = gradientDescent(X, y, theta, alpha, num_iters) plotJ(J_history,num_iters) return mu,sigma,theta def predict(mu,sigma,theta): result = 0 predict = np.array([2000,3]) norm_predict = (predict - mu) / sigma final_predict = np.hstack((np.ones((1)),norm_predict)) result = np.dot(final_predict,theta) return result def testlinearRegression(): mu,sigma,theta = linearRegression(0.01,400) print(predict(mu,sigma,theta)) if __name__ == "__main__": testlinearRegression()