Python实现LinearRegression


线性回归

数据集:为房价的数据,第一列是面积,第二列是房间数,第三列是价格

链接:https://pan.baidu.com/s/1VCtTde2vb3wOPG5dGfmucg&shfl=sharepset 
提取码:3ekz 

 

载入库

1 import numpy as np
2 import pandas as pd #可使用DataFrame查看数据
3 from matplotlib import pyplot as plt

导入文件

1 def loadtxtAndcsv(fileName,split,datatype):
2     return np.loadtxt(fileName,delimiter=split,dtype = datatype)
3 
4 def loadnpy(fileName):
5     return np.load(fileName)

归一化,目的是减少同一数据集中,数据相差过大的现象

 1 def featureNormalize(X):
 2     X_norm = np.array(X)       #将X转化为numpy对象
 3     mu = np.zeros((1,X.shape[1]))
 4     sigma = np.zeros((1,X.shape[1]))
 5     
 6     mu = np.mean(X_norm,0)        # 求每一列的平均值(0指定为列,1代表行)
 7     sigma = np.std(X_norm,0)
 8     for i in range(X.shape[1]):
 9         X_norm[:,i] = (X_norm[:,i] - mu[i]) / sigma[i]
10     
11     return X_norm,mu,sigma

画图

def plot_X(X):
    plt.scatter(X[:,0],X[:,1])
    plt.show()

def plotJ(J_history,num_iters):
    x = np.arange(1,num_iters+1)
    plt.plot(x,J_history)
    plt.xlabel("Number of iterations")
    plt.ylabel("Value of costFunction")
    plt.title("The change of Value over the Number")
    plt.show()

计算梯度和代价函数

def computerCost(X,y,theta):
    m = len(y)
    J = 0
    
    J = (np.transpose(X*theta - y)) * (X*theta - y) / (2*m) 
    return J

def gradientDescent(X, y, theta, alpha, num_iters):
    m = len(y)
    n = len(theta)
    
    temp = np.matrix(np.zeros((n,num_iters))) #暂存每次迭代计算的theta,转化为矩阵形式
    
    J_history = np.zeros((num_iters,1))
    
    for i in range(num_iters):
        h = np.dot(X,theta)           #h是目标函数
        temp[:,i] = theta - ((alpha/m)) * np.dot(np.transpose(X),(h - y)) #计算梯度
        theta = temp[:,i]
        J_history[i] = computerCost(X,y,theta)
        print('.',end=' ')
    return theta,J_history

主函数体和预测函数

def linearRegression(alpha = 0.01, num_iters = 400):
    print(u"加载数据中...\n")
    
    data = loadtxtAndcsv("data.txt",",",np.float64)
    X = data[:,0:-1]
    y = data[:,-1]
    plot_X(X)               #作图
    m = len(y)              #y的长度
    col = data.shape[1]     #data的列数
    
    X,mu,sigma = featureNormalize(X)   #归一化
    plot_X(X)                          #画图
    '''
    np.vstack():在竖直方向上堆叠

    np.hstack():在水平方向上平铺
    '''
    X = np.hstack((np.ones((m,1)),X))   #在X前加1
    
    print(u"\n执行梯度下降...\n")
    theta = np.zeros((col,1))             
    y = y.reshape(-1,1)                 #行转列
    theta,J_history = gradientDescent(X, y, theta, alpha, num_iters)
    
    plotJ(J_history,num_iters)
    
    return mu,sigma,theta

def predict(mu,sigma,theta):
    result = 0
    predict = np.array([2000,3])
    norm_predict = (predict - mu) / sigma
    final_predict = np.hstack((np.ones((1)),norm_predict))
    
    result = np.dot(final_predict,theta)
    return result

def testlinearRegression():
    mu,sigma,theta = linearRegression(0.01,400)
    print(predict(mu,sigma,theta))

if __name__ == "__main__":
    testlinearRegression()

 


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM