1、概述
線性方程求解
線性回歸模型
最小二乘法
求解線程回歸
1、代碼實現
0.引入依賴
import numpy as np import matplotlib.pyplot as plt
1.導入數據(data.csv)
points = np.genfromtxt('data.csv',delimiter=',') # 提取points中的兩列數據,分別作為x,y x=points[:,0] #取所有的第一列 y=points[:,1] #取所有的第二列 # 用plt畫出散點圖 plt.scatter(x,y) plt.show()
2. 定義損失函數
# 損失函數是系數的函數,還要傳入數據的x,y def computer_cost(w,b,points): total_cost = 0 M = len(points) # 逐點計算平方損失誤差,然后求平均值 for i in range(M): x=points[i,0] y=points[i,1] total_cost += (y - w * x - b) ** 2 return total_cost/M
3. 定義核心算法擬合函數
# 先定義一個球均值的函數 def average(data): sum = 0 num = len(data) for i in range(num): sum += data[i] return sum/num # 定義核心擬合函數 def fit(points): M = len(points) x_bar= average(points[:,0]) sum_yx = 0 sum_x2 = 0 sum_delta = 0 for i in range(M): x=points[i,0] y=points[i,1] sum_yx += y * (x - x_bar) sum_x2 += x ** 2 # 根據公式計算w w = sum_yx / (sum_x2 - M * (x_bar ** 2)) for i in range(M): x=points[i,0] y=points[i,1] sum_delta += (y - w * x) b = sum_delta / M return w,b
4. 測試
w,b = fit(points) print("w = " ,w) print("b = " , b) cost = computer_cost(w,b,points) print("cost = ", cost)
5. 畫出擬合曲線
plt.scatter(x,y) # 針對每一個x,計算出預測的y值 pred_y = w * x + b plt.plot(x,pred_y,c='r') plt.show()