Python實現LinearRegression


線性回歸

數據集:為房價的數據,第一列是面積,第二列是房間數,第三列是價格

鏈接:https://pan.baidu.com/s/1VCtTde2vb3wOPG5dGfmucg&shfl=sharepset 
提取碼:3ekz 

 

載入庫

1 import numpy as np
2 import pandas as pd #可使用DataFrame查看數據
3 from matplotlib import pyplot as plt

導入文件

1 def loadtxtAndcsv(fileName,split,datatype):
2     return np.loadtxt(fileName,delimiter=split,dtype = datatype)
3 
4 def loadnpy(fileName):
5     return np.load(fileName)

歸一化,目的是減少同一數據集中,數據相差過大的現象

 1 def featureNormalize(X):
 2     X_norm = np.array(X)       #將X轉化為numpy對象
 3     mu = np.zeros((1,X.shape[1]))
 4     sigma = np.zeros((1,X.shape[1]))
 5     
 6     mu = np.mean(X_norm,0)        # 求每一列的平均值(0指定為列,1代表行)
 7     sigma = np.std(X_norm,0)
 8     for i in range(X.shape[1]):
 9         X_norm[:,i] = (X_norm[:,i] - mu[i]) / sigma[i]
10     
11     return X_norm,mu,sigma

畫圖

def plot_X(X):
    plt.scatter(X[:,0],X[:,1])
    plt.show()

def plotJ(J_history,num_iters):
    x = np.arange(1,num_iters+1)
    plt.plot(x,J_history)
    plt.xlabel("Number of iterations")
    plt.ylabel("Value of costFunction")
    plt.title("The change of Value over the Number")
    plt.show()

計算梯度和代價函數

def computerCost(X,y,theta):
    m = len(y)
    J = 0
    
    J = (np.transpose(X*theta - y)) * (X*theta - y) / (2*m) 
    return J

def gradientDescent(X, y, theta, alpha, num_iters):
    m = len(y)
    n = len(theta)
    
    temp = np.matrix(np.zeros((n,num_iters))) #暫存每次迭代計算的theta,轉化為矩陣形式
    
    J_history = np.zeros((num_iters,1))
    
    for i in range(num_iters):
        h = np.dot(X,theta)           #h是目標函數
        temp[:,i] = theta - ((alpha/m)) * np.dot(np.transpose(X),(h - y)) #計算梯度
        theta = temp[:,i]
        J_history[i] = computerCost(X,y,theta)
        print('.',end=' ')
    return theta,J_history

主函數體和預測函數

def linearRegression(alpha = 0.01, num_iters = 400):
    print(u"加載數據中...\n")
    
    data = loadtxtAndcsv("data.txt",",",np.float64)
    X = data[:,0:-1]
    y = data[:,-1]
    plot_X(X)               #作圖
    m = len(y)              #y的長度
    col = data.shape[1]     #data的列數
    
    X,mu,sigma = featureNormalize(X)   #歸一化
    plot_X(X)                          #畫圖
    '''
    np.vstack():在豎直方向上堆疊

    np.hstack():在水平方向上平鋪
    '''
    X = np.hstack((np.ones((m,1)),X))   #在X前加1
    
    print(u"\n執行梯度下降...\n")
    theta = np.zeros((col,1))             
    y = y.reshape(-1,1)                 #行轉列
    theta,J_history = gradientDescent(X, y, theta, alpha, num_iters)
    
    plotJ(J_history,num_iters)
    
    return mu,sigma,theta

def predict(mu,sigma,theta):
    result = 0
    predict = np.array([2000,3])
    norm_predict = (predict - mu) / sigma
    final_predict = np.hstack((np.ones((1)),norm_predict))
    
    result = np.dot(final_predict,theta)
    return result

def testlinearRegression():
    mu,sigma,theta = linearRegression(0.01,400)
    print(predict(mu,sigma,theta))

if __name__ == "__main__":
    testlinearRegression()

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM