python之回歸模型


創建線性回歸模型

import sklearn.linear_model as lm

sklearn.linear_model.LinearRegression()->線性回歸器
線性回歸器.fit(輸入樣本, 輸出標簽)
線性回歸器.predict(輸入樣本)->預測輸出標簽

4.94,4.37
-1.58,1.7
-4.45,1.88
-6.06,0.56
-1.22,2.23
-3.55,1.53
0.36,2.99
-3.24,0.48
1.31,2.76
2.17,3.99
2.94,3.25
-0.92,2.27
-0.91,2.0
1.24,4.75
1.56,3.52
-4.14,1.39
3.75,4.9
4.15,4.44
0.33,2.72
3.41,4.59
2.27,5.3
2.6,3.43
1.06,2.53
1.04,3.69
2.74,3.1
-0.71,2.72
-2.75,2.82
0.55,3.53
-3.45,1.77
1.09,4.61
2.47,4.24
-6.35,1.0
1.83,3.84
-0.68,2.42
-3.83,0.67
-2.03,1.07
3.13,3.19
0.92,4.21
4.02,5.24
3.89,3.94
-1.81,2.85
3.94,4.86
-2.0,1.31
0.54,3.99
0.78,2.92
2.15,4.72
2.55,3.83
-0.63,2.58
1.06,2.89
-0.36,1.99
線性回歸數據集
 1 import numpy as np
 2 import sklearn.linear_model as lm
 3 import sklearn.metrics as sm
 4 import matplotlib.pyplot as mp
 5 
 6 # x,y用來裝x,y上的點
 7 x, y = [], []
 8 with open('../ML/data/single.txt', 'r') as f:
 9     for line in f.readlines():
10         data = [float(substr) for substr in line.split(',')]
11         x.append(data[:-1])
12         y.append(data[-1])
13 
14 x = np.array(x)
15 y = np.array(y)
16 # print(x)
17 # 創建線性回歸模型器
18 model = lm.LinearRegression()
19 # 導入數據
20 model.fit(x, y)
21 # 生成預測點
22 pred_y = model.predict(x)
23 
24 # 畫訓練集的圖像
25 mp.figure('Linear Regression', facecolor='lightgray')
26 mp.title('Linear Regression', fontsize=20)
27 mp.xlabel('x', fontsize=14)
28 mp.ylabel('y', fontsize=14)
29 mp.tick_params(labelsize=10)
30 mp.grid(linestyle=':')
31 mp.scatter(x, y, c='dodgerblue', alpha=0.55, s=60, label='Sample')
32 
33 # 畫線性回歸的圖像
34 # argsort()函數是將x中的元素從小到大排列,提取其對應的index(索引),然后輸出到y。
35 sorted_indices = x.T[0].argsort()
36 mp.plot(x[sorted_indices], pred_y[sorted_indices],
37         'o-', c='orangered', label='Regression')
38 
39 mp.legend()
40 mp.show()
線性回歸代碼

保存模型

 1 import numpy as np
 2 import sklearn.linear_model as lm
 3 import sklearn.metrics as sm
 4 import matplotlib.pyplot as mp
 5 # 導入pickle
 6 import pickle
 7 
 8 x, y = [], []
 9 with open('../ML/data/single.txt', 'r') as f:
10     for line in f.readlines():
11         data = [float(substr) for substr in line.split(',')]
12         x.append(data[:-1])
13         y.append(data[-1])
14 
15 x = np.array(x)
16 y = np.array(y)
17 model = lm.LinearRegression()
18 model.fit(x, y)
19 pred_y = model.predict(x)
20 
21 # sklearn中的回歸器性能評估方法
22 
23 # 平均絕對值誤差(mean_absolute_error)
24 print(sm.mean_absolute_error(y, pred_y))
25 # 均方差(mean-squared-error)
26 print(sm.mean_squared_error(y, pred_y))
27 # 中值絕對誤差(Median absolute error)
28 print(sm.median_absolute_error(y, pred_y))
29 # R2 決定系數(擬合優度)
30 print(sm.r2_score(y, pred_y))
31 
32 # 保存模型
33 with open('linear.pkl', 'wb') as f:
34     pickle.dump(model, f)
35 
36 mp.figure('Linear Regression', facecolor='lightgray')
37 mp.title('Linear Regression', fontsize=20)
38 mp.xlabel('x', fontsize=14)
39 mp.ylabel('y', fontsize=14)
40 mp.tick_params(labelsize=10)
41 mp.grid(linestyle=':')
42 mp.scatter(x, y, c='dodgerblue', alpha=0.55, s=60, label='Sample')
43 
44 sorted_indices = x.T[0].argsort()
45 mp.plot(x[sorted_indices], pred_y[sorted_indices],
46         'o-', c='orangered', label='Regression')
47 
48 mp.legend()
49 mp.show()
保存模型及回歸器性能評估

 導入模型

 1 import numpy as np
 2 import sklearn.linear_model as lm
 3 import sklearn.metrics as sm
 4 import matplotlib.pyplot as mp
 5 import pickle
 6 
 7 x, y = [], []
 8 with open('../ML/data/single.txt', 'r') as f:
 9     for line in f.readlines():
10         data = [float(substr) for substr in line.split(',')]
11         x.append(data[:-1])
12         y.append(data[-1])
13 x = np.array(x)
14 y = np.array(y)
15 
16 # 導入模型
17 with open('linear.pkl', 'rb') as f:
18     model = pickle.load(f)
19 pred_y = model.predict(x)
20 
21 mp.figure('Linear Regression', facecolor='lightgray')
22 mp.title('Linear Regression', fontsize=20)
23 mp.xlabel('x', fontsize=14)
24 mp.ylabel('y', fontsize=14)
25 mp.tick_params(labelsize=10)
26 mp.grid(linestyle=':')
27 mp.scatter(x, y, c='dodgerblue', alpha=0.55, s=60, label='Sample')
28 
29 sorted_indices = x.T[0].argsort()
30 mp.plot(x[sorted_indices], pred_y[sorted_indices],
31         'o-', c='orangered', label='Regression')
32 
33 mp.legend()
34 mp.show()
導入模型

個別異常點對線性回歸線的影響

4.94,15
-1.58,1.7
-4.45,1.88
-6.06,0.56
-1.22,2.23
-3.55,1.53
0.36,2.99
-3.24,0.48
1.31,2.76
2.17,3.99
2.94,3.25
-0.92,2.27
-0.91,2.0
1.24,4.75
1.56,3.52
-4.14,1.39
3.75,10
4.15,12
0.33,2.72
3.41,4.59
2.27,5.3
2.6,3.43
1.06,2.53
1.04,3.69
2.74,3.1
-0.71,2.72
-2.75,2.82
0.55,3.53
-3.45,1.77
1.09,4.61
2.47,4.24
-6.35,1.0
1.83,3.84
-0.68,2.42
-3.83,0.67
-2.03,1.07
3.13,3.19
0.92,4.21
4.02,5.24
3.89,3.94
-1.81,2.85
3.94,4.86
-2.0,1.31
0.54,3.99
0.78,2.92
2.15,4.72
2.55,3.83
-0.63,2.58
1.06,2.89
-0.36,1.99
樣本數據2
 1 import numpy as np
 2 import sklearn.linear_model as lm
 3 import sklearn.metrics as sm
 4 import matplotlib.pyplot as mp
 5 x, y = [], []
 6 #導入數據
 7 with open('../../data/abnormal.txt', 'r') as f:
 8     for line in f.readlines():
 9         data = [float(substr) for substr
10                 in line.split(',')]
11         x.append(data[:-1])
12         y.append(data[-1])
13 x = np.array(x)
14 y = np.array(y)
15 model = lm.LinearRegression()
16 model.fit(x, y)
17 pred_y = model.predict(x)
18 print(sm.mean_absolute_error(y, pred_y))
19 print(sm.mean_squared_error(y, pred_y))
20 print(sm.median_absolute_error(y, pred_y))
21 print(sm.r2_score(y, pred_y))
22 mp.figure('Linear Regression', facecolor='lightgray')
23 mp.title('Linear Regression', fontsize=20)
24 mp.xlabel('x', fontsize=14)
25 mp.ylabel('y', fontsize=14)
26 mp.tick_params(labelsize=10)
27 mp.grid(linestyle=':')
28 mp.scatter(x, y, c='dodgerblue', alpha=0.75,
29            s=60, label='Sample')
30 sorted_indices = x.T[0].argsort()
31 mp.plot(x[sorted_indices], pred_y[sorted_indices],
32         'o-', c='orangered', label='Regression')
33 mp.legend()
34 mp.show()
異常點的影響

  個別點對線性回歸模型影響

嶺回歸

  用於消除個別點對線性模型的影響

  

 1 import numpy as np
 2 import sklearn.linear_model as lm
 3 import matplotlib.pyplot as mp
 4 x, y = [], []
 5 with open('../ML/data/abnormal.txt', 'r') as f:
 6     for line in f.readlines():
 7         data = [float(substr) for substr in line.split(',')]
 8         x.append(data[:-1])
 9         y.append(data[-1])
10 x = np.array(x)
11 y = np.array(y)
12 # 線性回歸模型
13 model_ln = lm.LinearRegression()
14 model_ln.fit(x, y)
15 pred_y_ln = model_ln.predict(x)
16 
17 # 嶺回歸模型
18 # fit_intercept是否修正截距,max_iter最大迭代次數,防止函數陷入死循環
19 model_rd = lm.Ridge(150, fit_intercept=True, max_iter=10000)
20 model_rd.fit(x, y)
21 pred_y_rd = model_rd.predict(x)
22 
23 mp.figure('Ridge Regression', facecolor='lightgray')
24 mp.title('Ridge Regression', fontsize=20)
25 mp.xlabel('x', fontsize=14)
26 mp.ylabel('y', fontsize=14)
27 mp.tick_params(labelsize=10)
28 mp.grid(linestyle=':')
29 mp.scatter(x, y, c='dodgerblue', alpha=0.75, s=60,
30            label='Sample')
31 
32 sorted_indices = x.T[0].argsort()
33 mp.plot(x[sorted_indices], pred_y_ln[sorted_indices],
34         c='orangered', label='Linear')
35 mp.plot(x[sorted_indices], pred_y_rd[sorted_indices],
36         c='limegreen', label='Ridge')
37 mp.legend()
38 mp.show()
嶺回歸模型線性模型的對比

  嶺回歸減小了個別異常數據對整體的影響

多項式回歸分析

  通過在原有特征中增加高次項,使得回歸線呈現多項式方程曲線特性,借以提高回歸的精度。

  多項式次數過低可能導致欠擬合,即訓練樣本和測試樣本的精度都比較低,通過增加多項式次數可以提高精度,進而避免欠擬合,但若次數過高又會使得模型過於復雜,表現為對訓練樣本的高度匹配而與測試樣本則誤差奇大,這種現象稱為過擬合。

 1 import numpy as np
 2 # 導入管線
 3 import sklearn.pipeline as pl
 4 # 導入多項式
 5 import sklearn.preprocessing as sp
 6 import sklearn.linear_model as lm
 7 import matplotlib.pyplot as mp
 8 x, y = [], []
 9 with open('../ML/data/single.txt', 'r') as f:
10     for line in f.readlines():
11         data = [float(substr) for substr in line.split(',')]
12         x.append(data[:-1])
13         y.append(data[-1])
14 x = np.array(x)
15 y = np.array(y)
16 # 創建多項式擴展器
17 pf = sp.PolynomialFeatures(10)
18 lr = lm.LinearRegression()
19 model = pl.make_pipeline(pf, lr)
20 
21 model.fit(x, y)
22 # np.newaxis插入新的維度,使成為列
23 test_x = np.linspace(x.min(), x.max(), 1000)[:, np.newaxis]
24 pred_test_y = model.predict(test_x)
25 
26 mp.figure('Polynomial Regression', facecolor='lightgray')
27 mp.title('Polynomial Regression', fontsize=20)
28 mp.xlabel('x', fontsize=14)
29 mp.ylabel('y', fontsize=14)
30 mp.tick_params(labelsize=10)
31 mp.grid(linestyle=':')
32 mp.scatter(x, y, c='dodgerblue', alpha=0.75, s=60,
33            label='Sample')
34 mp.plot(test_x, pred_test_y, c='orangered', label='Regression')
35 mp.legend()
36 mp.show()
多項式回歸

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM