創建線性回歸模型
import sklearn.linear_model as lm
sklearn.linear_model.LinearRegression()->線性回歸器
線性回歸器.fit(輸入樣本, 輸出標簽)
線性回歸器.predict(輸入樣本)->預測輸出標簽

4.94,4.37 -1.58,1.7 -4.45,1.88 -6.06,0.56 -1.22,2.23 -3.55,1.53 0.36,2.99 -3.24,0.48 1.31,2.76 2.17,3.99 2.94,3.25 -0.92,2.27 -0.91,2.0 1.24,4.75 1.56,3.52 -4.14,1.39 3.75,4.9 4.15,4.44 0.33,2.72 3.41,4.59 2.27,5.3 2.6,3.43 1.06,2.53 1.04,3.69 2.74,3.1 -0.71,2.72 -2.75,2.82 0.55,3.53 -3.45,1.77 1.09,4.61 2.47,4.24 -6.35,1.0 1.83,3.84 -0.68,2.42 -3.83,0.67 -2.03,1.07 3.13,3.19 0.92,4.21 4.02,5.24 3.89,3.94 -1.81,2.85 3.94,4.86 -2.0,1.31 0.54,3.99 0.78,2.92 2.15,4.72 2.55,3.83 -0.63,2.58 1.06,2.89 -0.36,1.99

1 import numpy as np 2 import sklearn.linear_model as lm 3 import sklearn.metrics as sm 4 import matplotlib.pyplot as mp 5 6 # x,y用來裝x,y上的點 7 x, y = [], [] 8 with open('../ML/data/single.txt', 'r') as f: 9 for line in f.readlines(): 10 data = [float(substr) for substr in line.split(',')] 11 x.append(data[:-1]) 12 y.append(data[-1]) 13 14 x = np.array(x) 15 y = np.array(y) 16 # print(x) 17 # 創建線性回歸模型器 18 model = lm.LinearRegression() 19 # 導入數據 20 model.fit(x, y) 21 # 生成預測點 22 pred_y = model.predict(x) 23 24 # 畫訓練集的圖像 25 mp.figure('Linear Regression', facecolor='lightgray') 26 mp.title('Linear Regression', fontsize=20) 27 mp.xlabel('x', fontsize=14) 28 mp.ylabel('y', fontsize=14) 29 mp.tick_params(labelsize=10) 30 mp.grid(linestyle=':') 31 mp.scatter(x, y, c='dodgerblue', alpha=0.55, s=60, label='Sample') 32 33 # 畫線性回歸的圖像 34 # argsort()函數是將x中的元素從小到大排列,提取其對應的index(索引),然后輸出到y。 35 sorted_indices = x.T[0].argsort() 36 mp.plot(x[sorted_indices], pred_y[sorted_indices], 37 'o-', c='orangered', label='Regression') 38 39 mp.legend() 40 mp.show()
保存模型

1 import numpy as np 2 import sklearn.linear_model as lm 3 import sklearn.metrics as sm 4 import matplotlib.pyplot as mp 5 # 導入pickle 6 import pickle 7 8 x, y = [], [] 9 with open('../ML/data/single.txt', 'r') as f: 10 for line in f.readlines(): 11 data = [float(substr) for substr in line.split(',')] 12 x.append(data[:-1]) 13 y.append(data[-1]) 14 15 x = np.array(x) 16 y = np.array(y) 17 model = lm.LinearRegression() 18 model.fit(x, y) 19 pred_y = model.predict(x) 20 21 # sklearn中的回歸器性能評估方法 22 23 # 平均絕對值誤差(mean_absolute_error) 24 print(sm.mean_absolute_error(y, pred_y)) 25 # 均方差(mean-squared-error) 26 print(sm.mean_squared_error(y, pred_y)) 27 # 中值絕對誤差(Median absolute error) 28 print(sm.median_absolute_error(y, pred_y)) 29 # R2 決定系數(擬合優度) 30 print(sm.r2_score(y, pred_y)) 31 32 # 保存模型 33 with open('linear.pkl', 'wb') as f: 34 pickle.dump(model, f) 35 36 mp.figure('Linear Regression', facecolor='lightgray') 37 mp.title('Linear Regression', fontsize=20) 38 mp.xlabel('x', fontsize=14) 39 mp.ylabel('y', fontsize=14) 40 mp.tick_params(labelsize=10) 41 mp.grid(linestyle=':') 42 mp.scatter(x, y, c='dodgerblue', alpha=0.55, s=60, label='Sample') 43 44 sorted_indices = x.T[0].argsort() 45 mp.plot(x[sorted_indices], pred_y[sorted_indices], 46 'o-', c='orangered', label='Regression') 47 48 mp.legend() 49 mp.show()
導入模型

1 import numpy as np 2 import sklearn.linear_model as lm 3 import sklearn.metrics as sm 4 import matplotlib.pyplot as mp 5 import pickle 6 7 x, y = [], [] 8 with open('../ML/data/single.txt', 'r') as f: 9 for line in f.readlines(): 10 data = [float(substr) for substr in line.split(',')] 11 x.append(data[:-1]) 12 y.append(data[-1]) 13 x = np.array(x) 14 y = np.array(y) 15 16 # 導入模型 17 with open('linear.pkl', 'rb') as f: 18 model = pickle.load(f) 19 pred_y = model.predict(x) 20 21 mp.figure('Linear Regression', facecolor='lightgray') 22 mp.title('Linear Regression', fontsize=20) 23 mp.xlabel('x', fontsize=14) 24 mp.ylabel('y', fontsize=14) 25 mp.tick_params(labelsize=10) 26 mp.grid(linestyle=':') 27 mp.scatter(x, y, c='dodgerblue', alpha=0.55, s=60, label='Sample') 28 29 sorted_indices = x.T[0].argsort() 30 mp.plot(x[sorted_indices], pred_y[sorted_indices], 31 'o-', c='orangered', label='Regression') 32 33 mp.legend() 34 mp.show()
個別異常點對線性回歸線的影響

4.94,15 -1.58,1.7 -4.45,1.88 -6.06,0.56 -1.22,2.23 -3.55,1.53 0.36,2.99 -3.24,0.48 1.31,2.76 2.17,3.99 2.94,3.25 -0.92,2.27 -0.91,2.0 1.24,4.75 1.56,3.52 -4.14,1.39 3.75,10 4.15,12 0.33,2.72 3.41,4.59 2.27,5.3 2.6,3.43 1.06,2.53 1.04,3.69 2.74,3.1 -0.71,2.72 -2.75,2.82 0.55,3.53 -3.45,1.77 1.09,4.61 2.47,4.24 -6.35,1.0 1.83,3.84 -0.68,2.42 -3.83,0.67 -2.03,1.07 3.13,3.19 0.92,4.21 4.02,5.24 3.89,3.94 -1.81,2.85 3.94,4.86 -2.0,1.31 0.54,3.99 0.78,2.92 2.15,4.72 2.55,3.83 -0.63,2.58 1.06,2.89 -0.36,1.99

1 import numpy as np 2 import sklearn.linear_model as lm 3 import sklearn.metrics as sm 4 import matplotlib.pyplot as mp 5 x, y = [], [] 6 #導入數據 7 with open('../../data/abnormal.txt', 'r') as f: 8 for line in f.readlines(): 9 data = [float(substr) for substr 10 in line.split(',')] 11 x.append(data[:-1]) 12 y.append(data[-1]) 13 x = np.array(x) 14 y = np.array(y) 15 model = lm.LinearRegression() 16 model.fit(x, y) 17 pred_y = model.predict(x) 18 print(sm.mean_absolute_error(y, pred_y)) 19 print(sm.mean_squared_error(y, pred_y)) 20 print(sm.median_absolute_error(y, pred_y)) 21 print(sm.r2_score(y, pred_y)) 22 mp.figure('Linear Regression', facecolor='lightgray') 23 mp.title('Linear Regression', fontsize=20) 24 mp.xlabel('x', fontsize=14) 25 mp.ylabel('y', fontsize=14) 26 mp.tick_params(labelsize=10) 27 mp.grid(linestyle=':') 28 mp.scatter(x, y, c='dodgerblue', alpha=0.75, 29 s=60, label='Sample') 30 sorted_indices = x.T[0].argsort() 31 mp.plot(x[sorted_indices], pred_y[sorted_indices], 32 'o-', c='orangered', label='Regression') 33 mp.legend() 34 mp.show()
個別點對線性回歸模型影響
嶺回歸
用於消除個別點對線性模型的影響

1 import numpy as np 2 import sklearn.linear_model as lm 3 import matplotlib.pyplot as mp 4 x, y = [], [] 5 with open('../ML/data/abnormal.txt', 'r') as f: 6 for line in f.readlines(): 7 data = [float(substr) for substr in line.split(',')] 8 x.append(data[:-1]) 9 y.append(data[-1]) 10 x = np.array(x) 11 y = np.array(y) 12 # 線性回歸模型 13 model_ln = lm.LinearRegression() 14 model_ln.fit(x, y) 15 pred_y_ln = model_ln.predict(x) 16 17 # 嶺回歸模型 18 # fit_intercept是否修正截距,max_iter最大迭代次數,防止函數陷入死循環 19 model_rd = lm.Ridge(150, fit_intercept=True, max_iter=10000) 20 model_rd.fit(x, y) 21 pred_y_rd = model_rd.predict(x) 22 23 mp.figure('Ridge Regression', facecolor='lightgray') 24 mp.title('Ridge Regression', fontsize=20) 25 mp.xlabel('x', fontsize=14) 26 mp.ylabel('y', fontsize=14) 27 mp.tick_params(labelsize=10) 28 mp.grid(linestyle=':') 29 mp.scatter(x, y, c='dodgerblue', alpha=0.75, s=60, 30 label='Sample') 31 32 sorted_indices = x.T[0].argsort() 33 mp.plot(x[sorted_indices], pred_y_ln[sorted_indices], 34 c='orangered', label='Linear') 35 mp.plot(x[sorted_indices], pred_y_rd[sorted_indices], 36 c='limegreen', label='Ridge') 37 mp.legend() 38 mp.show()
嶺回歸減小了個別異常數據對整體的影響
多項式回歸分析
通過在原有特征中增加高次項,使得回歸線呈現多項式方程曲線特性,借以提高回歸的精度。
多項式次數過低可能導致欠擬合,即訓練樣本和測試樣本的精度都比較低,通過增加多項式次數可以提高精度,進而避免欠擬合,但若次數過高又會使得模型過於復雜,表現為對訓練樣本的高度匹配而與測試樣本則誤差奇大,這種現象稱為過擬合。

1 import numpy as np 2 # 導入管線 3 import sklearn.pipeline as pl 4 # 導入多項式 5 import sklearn.preprocessing as sp 6 import sklearn.linear_model as lm 7 import matplotlib.pyplot as mp 8 x, y = [], [] 9 with open('../ML/data/single.txt', 'r') as f: 10 for line in f.readlines(): 11 data = [float(substr) for substr in line.split(',')] 12 x.append(data[:-1]) 13 y.append(data[-1]) 14 x = np.array(x) 15 y = np.array(y) 16 # 創建多項式擴展器 17 pf = sp.PolynomialFeatures(10) 18 lr = lm.LinearRegression() 19 model = pl.make_pipeline(pf, lr) 20 21 model.fit(x, y) 22 # np.newaxis插入新的維度,使成為列 23 test_x = np.linspace(x.min(), x.max(), 1000)[:, np.newaxis] 24 pred_test_y = model.predict(test_x) 25 26 mp.figure('Polynomial Regression', facecolor='lightgray') 27 mp.title('Polynomial Regression', fontsize=20) 28 mp.xlabel('x', fontsize=14) 29 mp.ylabel('y', fontsize=14) 30 mp.tick_params(labelsize=10) 31 mp.grid(linestyle=':') 32 mp.scatter(x, y, c='dodgerblue', alpha=0.75, s=60, 33 label='Sample') 34 mp.plot(test_x, pred_test_y, c='orangered', label='Regression') 35 mp.legend() 36 mp.show()