線性回歸和梯度下降代碼demo

本文轉載自查看原文 2019-10-29 22:34 386 machine learning

程序所用文件：https://files.cnblogs.com/files/henuliulei/%E5%9B%9E%E5%BD%92%E5%88%86%E7%B1%BB%E6%95%B0%E6%8D%AE.zip

線性回歸

決定系數越接近一那么預測效果越好

對於多元線性回歸和一元線性回歸推導理論是一致的，只不過參數是多個參數而已

梯度下降

梯度下降法存在局部最小值

太小迭代次數多，太大將無法迭代到最優質

梯度下降發容易到達局部最小值

凸函數使用局部下降法一定可以到全部最小值，所以不存在局部最小值才可以

下面兩個demo是一元函數的擬合

1使用梯度下降法的數學公式進行的機器學習代碼

 1 import numpy as np
 2 from matplotlib import  pyplot as plt
 3 #讀取數據
 4 data = np.genfromtxt('data.csv',delimiter=',')
 5 x_data = data[:, 0]
 6 y_data = data[:, 1]
 7 #plt.scatter(x_data, y_data)
 8 #plt.show()
 9 lr = 0.0001
10 k = 0
11 b = 0
12 epochs = 500
13 def compute_loss(x_data, y_data, b, k):#計算損失函數
14     m = float(len(x_data))
15     sum = 0
16     for i in range(0, len(x_data)):
17         sum += (y_data[i] - (k*x_data[i] + b))**2
18     return sum/(2*m)
19 def gradient(x_data, y_data, k, b, lr, epochs):#進行梯度下降
20     m = float(len(x_data))
21 
22     for i in range(0,epochs):
23         k_gradient = 0
24         b_gradiet = 0
25         for j in range(0,len(x_data)):
26             k_gradient += (1/m)*((x_data[j] * k + b) - y_data[j])
27             b_gradiet += (1/m)*((x_data[j] * k + b) - y_data[j]) * x_data[j]
28         k -= lr * k_gradient
29         b -= lr * b_gradiet
30 
31 
32         if i % 50 == 0:
33             print(i)
34             plt.plot(x_data, y_data, 'b.')
35             plt.plot(x_data, k*x_data + b, 'r')
36             plt.show()
37 
38     return k, b
39 
40 k,b = gradient(x_data, y_data, 0, 0, lr, epochs)
41 plt.plot(x_data, k * x_data + b, 'r')
42 plt.plot(x_data, y_data, 'b.')
43 print('loss =:',compute_loss(x_data, y_data, b, k),'b =:',b,'k =:',k)
44 plt.show()

2 使用Python的sklearn庫

 1 import numpy as np
 2 from matplotlib import  pyplot as plt
 3 from sklearn.linear_model import LinearRegression
 4 #讀取數據
 5 data = np.genfromtxt('data.csv',delimiter=',')
 6 x_data = data[:, 0]
 7 y_data = data[:, 1]
 8 plt.scatter(x_data, y_data)
 9 plt.show()
10 x_data = data[:, 0, np.newaxis]#使一位數據編程二維數據
11 y_data = data[:, 1, np.newaxis]
12 model =LinearRegression()
13 model.fit(x_data, y_data)#傳進的參數必須是二維的
14 plt.plot(x_data, y_data, 'b.')
15 plt.plot(x_data, model.predict(x_data), 'r')#畫出預測的線條
16 plt.show()

3使用梯度下降法完成多元線性回歸（以二元為例）

 1 import numpy as np
 2 from numpy import genfromtxt
 3 import matplotlib.pyplot as plt
 4 from mpl_toolkits.mplot3d import Axes3D #用來畫3D圖的包
 5 # 讀入數據
 6 data = genfromtxt(r"Delivery.csv",delimiter=',')
 7 print(data)
 8 # 切分數據
 9 x_data = data[:,:-1]
10 y_data = data[:,-1]
11 print(x_data)
12 print(y_data)
13 # 學習率learning rate
14 lr = 0.0001
15 # 參數
16 theta0 = 0
17 theta1 = 0
18 theta2 = 0
19 # 最大迭代次數
20 epochs = 1000
21 
22 # 最小二乘法
23 def compute_error(theta0, theta1, theta2, x_data, y_data):
24     totalError = 0
25     for i in range(0, len(x_data)):
26         totalError += (y_data[i] - (theta1 * x_data[i,0] + theta2*x_data[i,1] + theta0)) ** 2
27     return totalError / float(len(x_data))
28 
29 def gradient_descent_runner(x_data, y_data, theta0, theta1, theta2, lr, epochs):
30     # 計算總數據量
31     m = float(len(x_data))
32     # 循環epochs次
33     for i in range(epochs):
34         theta0_grad = 0
35         theta1_grad = 0
36         theta2_grad = 0
37         # 計算梯度的總和再求平均
38         for j in range(0, len(x_data)):
39             theta0_grad += (1/m) * ((theta1 * x_data[j,0] + theta2*x_data[j,1] + theta0) - y_data[j])
40             theta1_grad += (1/m) * x_data[j,0] * ((theta1 * x_data[j,0] + theta2*x_data[j,1] + theta0) - y_data[j])
41             theta2_grad += (1/m) * x_data[j,1] * ((theta1 * x_data[j,0] + theta2*x_data[j,1] + theta0) - y_data[j])
42         # 更新b和k
43         theta0 = theta0 - (lr*theta0_grad)
44         theta1 = theta1 - (lr*theta1_grad)
45         theta2 = theta2 - (lr*theta2_grad)
46     return theta0, theta1, theta2
47 print("Starting theta0 = {0}, theta1 = {1}, theta2 = {2}, error = {3}".
48       format(theta0, theta1, theta2, compute_error(theta0, theta1, theta2, x_data, y_data)))
49 print("Running...")
50 theta0, theta1, theta2 = gradient_descent_runner(x_data, y_data, theta0, theta1, theta2, lr, epochs)
51 print("After {0} iterations theta0 = {1}, theta1 = {2}, theta2 = {3}, error = {4}".
52       format(epochs, theta0, theta1, theta2, compute_error(theta0, theta1, theta2, x_data, y_data)))
53 ax = Axes3D(plt.figure())#和下面的代碼功能一樣
54 #ax = plt.figure().add_subplot(111, projection='3d')#plt.figure().add_subplot和plt.subplot的作用是一致的
55 ax.scatter(x_data[:, 0], x_data[:, 1], y_data, c='r', marker='o', s=100)  # 點為紅色三角形
56 x0 = x_data[:, 0]
57 x1 = x_data[:, 1]
58 # 生成網格矩陣
59 x0, x1 = np.meshgrid(x0, x1)#生成一個網格矩陣，矩陣的每個點的第一個軸的取值來自於x0范圍內，第二個坐標軸的取值來自於x1范圍內
60 z = theta0 + x0 * theta1 + x1 * theta2
61 # 畫3D圖
62 ax.plot_surface(x0, x1, z)
63 # 設置坐標軸
64 ax.set_xlabel('Miles')
65 ax.set_ylabel('Num of Deliveries')
66 ax.set_zlabel('Time')
67 
68 # 顯示圖像
69 plt.show()

4：使用Python的sklearn庫完成多元線性回歸

import numpy as np
from numpy import genfromtxt
from sklearn import linear_model
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# 讀入數據
data = genfromtxt(r"Delivery.csv",delimiter=',')
print(data)
# 切分數據
x_data = data[:,:-1]
y_data = data[:,-1]
print(x_data)
print(y_data)
# 創建模型
model = linear_model.LinearRegression()
model.fit(x_data, y_data)
# 系數
print("coefficients:",model.coef_)

# 截距
print("intercept:",model.intercept_)

# 測試
x_test = [[102,4]]
predict = model.predict(x_test)
print("predict:",predict)
ax = plt.figure().add_subplot(111, projection='3d')
ax.scatter(x_data[:, 0], x_data[:, 1], y_data, c='r', marker='o', s=100)  # 點為紅色三角形
x0 = x_data[:, 0]
x1 = x_data[:, 1]
# 生成網格矩陣
x0, x1 = np.meshgrid(x0, x1)
z = model.intercept_ + x0*model.coef_[0] + x1*model.coef_[1]
# 畫3D圖
ax.plot_surface(x0, x1, z)#參數是二維的，而model.prodict(x_data)是一維的。
# 設置坐標軸
ax.set_xlabel('Miles')
ax.set_ylabel('Num of Deliveries')
ax.set_zlabel('Time')

# 顯示圖像
plt.show()

5 多項式回歸擬合

 1 import numpy as np
 2 import matplotlib.pyplot as plt
 3 from sklearn.preprocessing import PolynomialFeatures#多項式
 4 from sklearn.linear_model import LinearRegression
 5 
 6 # 載入數據
 7 data = np.genfromtxt("job.csv", delimiter=",")
 8 x_data = data[1:,1]
 9 y_data = data[1:,2]
10 plt.scatter(x_data,y_data)
11 plt.show()
12 x_data
13 x_data = x_data[:,np.newaxis]
14 y_data = y_data[:,np.newaxis]
15 x_data
16 # 創建並擬合模型
17 model = LinearRegression()
18 model.fit(x_data, y_data)
19 # 畫圖
20 plt.plot(x_data, y_data, 'b.')
21 plt.plot(x_data, model.predict(x_data), 'r')
22 plt.show()
23 # 定義多項式回歸,degree的值可以調節多項式的特征
24 poly_reg  = PolynomialFeatures(degree=5)
25 # 特征處理
26 x_poly = poly_reg.fit_transform(x_data)
27 # 定義回歸模型
28 lin_reg = LinearRegression()
29 # 訓練模型
30 lin_reg.fit(x_poly, y_data)
31 # 畫圖
32 plt.plot(x_data, y_data, 'b.')
33 plt.plot(x_data, lin_reg.predict(poly_reg.fit_transform(x_data)), c='r')
34 plt.title('Truth or Bluff (Polynomial Regression)')
35 plt.xlabel('Position level')
36 plt.ylabel('Salary')
37 plt.show()
38 # 畫圖
39 plt.plot(x_data, y_data, 'b.')
40 x_test = np.linspace(1,10,100)
41 x_test = x_test[:,np.newaxis]
42 plt.plot(x_test, lin_reg.predict(poly_reg.fit_transform(x_test)), c='r')
43 plt.title('Truth or Bluff (Polynomial Regression)')
44 plt.xlabel('Position level')
45 plt.ylabel('Salary')
46 plt.show()

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 線性回歸——梯度下降線性回歸與梯度下降算法線性回歸和批量梯度下降法python 線性回歸與梯度下降法[一]——原理與實現線性回歸的損失函數和梯度下降線性回歸與梯度下降法[二]——優化與比較線性回歸模型與梯度下降法梯度下降法求解多元線性回歸梯度下降算法&線性回歸算法梯度下降法求解線性回歸