邏輯回歸代碼demo

本文轉載自查看原文 2019-11-02 18:11 405 machine learning

程序所用文件：https://files.cnblogs.com/files/henuliulei/%E5%9B%9E%E5%BD%92%E5%88%86%E7%B1%BB%E6%95%B0%E6%8D%AE.zip

概念

代價函數關於參數的偏導

梯度下降法最終的推導公式如下

多分類問題可以轉為2分類問題

正則化處理可以防止過擬合，下面是正則化后的代價函數和求導后的式子

正確率和召回率F1指標

我們希望自己預測的結果希望更准確那么查准率就更高，如果希望更獲得更多數量的正確結果，那么查全率更重要，綜合考慮可以用F1指標

關於正則化為啥可以防止過擬合的理解

梯度下降法實現線性邏輯回歸

  1 import matplotlib.pyplot as plt
  2 import numpy as np
  3 from sklearn.metrics import classification_report#分類
  4 from sklearn import preprocessing#預測
  5 # 數據是否需要標准化
  6 scale = True#True是需要，False是不需要，標准化后效果更好，標准化之前可以畫圖看出可視化結果
  7 scale = False
  8 # 載入數據
  9 data = np.genfromtxt("LR-testSet.csv", delimiter=",")
 10 x_data = data[:, :-1]
 11 y_data = data[:, -1]
 12 
 13 def plot():
 14     x0 = []
 15     x1 = []
 16     y0 = []
 17     y1 = []
 18     # 切分不同類別的數據
 19     for i in range(len(x_data)):
 20         if y_data[i] == 0:
 21             x0.append(x_data[i, 0])
 22             y0.append(x_data[i, 1])
 23         else:
 24             x1.append(x_data[i, 0])
 25             y1.append(x_data[i, 1])
 26 
 27     # 畫圖
 28     scatter0 = plt.scatter(x0, y0, c='b', marker='o')
 29     scatter1 = plt.scatter(x1, y1, c='r', marker='x')
 30     # 畫圖例
 31     plt.legend(handles=[scatter0, scatter1], labels=['label0', 'label1'], loc='best')
 32 
 33 
 34 plot()
 35 plt.show()
 36 # 數據處理，添加偏置項
 37 x_data = data[:,:-1]
 38 y_data = data[:,-1,np.newaxis]
 39 
 40 print(np.mat(x_data).shape)
 41 print(np.mat(y_data).shape)
 42 # 給樣本添加偏置項
 43 X_data = np.concatenate((np.ones((100,1)),x_data),axis=1)
 44 print(X_data.shape)
 45 
 46 def sigmoid(x):
 47     return 1.0 / (1 + np.exp(-x))
 48 
 49 def cost(xMat, yMat, ws):
 50     left = np.multiply(yMat, np.log(sigmoid(xMat * ws)))
 51     right = np.multiply(1 - yMat, np.log(1 - sigmoid(xMat * ws)))
 52     return np.sum(left + right) / -(len(xMat))
 53 
 54 def gradAscent(xArr, yArr):
 55     if scale == True:
 56         xArr = preprocessing.scale(xArr)
 57     xMat = np.mat(xArr)
 58     yMat = np.mat(yArr)
 59 
 60     lr = 0.001
 61     epochs = 10000
 62     costList = []
 63     # 計算數據行列數
 64     # 行代表數據個數，列代表權值個數
 65     m, n = np.shape(xMat)
 66     # 初始化權值
 67     ws = np.mat(np.ones((n, 1)))
 68 
 69     for i in range(epochs + 1):
 70         # xMat和weights矩陣相乘
 71         h = sigmoid(xMat * ws)
 72         # 計算誤差
 73         ws_grad = xMat.T * (h - yMat) / m
 74         ws = ws - lr * ws_grad
 75 
 76         if i % 50 == 0:
 77             costList.append(cost(xMat, yMat, ws))
 78     return ws, costList
 79 # 訓練模型，得到權值和cost值的變化
 80 ws,costList = gradAscent(X_data, y_data)
 81 print(ws)
 82 if scale == False:
 83     # 畫圖決策邊界
 84     plot()
 85     x_test = [[-4],[3]]
 86     y_test = (-ws[0] - x_test*ws[1])/ws[2]
 87     plt.plot(x_test, y_test, 'k')
 88     plt.show()
 89 # 畫圖 loss值的變化
 90 x = np.linspace(0, 10000, 201)
 91 plt.plot(x, costList, c='r')
 92 plt.title('Train')
 93 plt.xlabel('Epochs')
 94 plt.ylabel('Cost')
 95 plt.show()
 96 # 預測
 97 def predict(x_data, ws):
 98     if scale == True:
 99         x_data = preprocessing.scale(x_data)
100     xMat = np.mat(x_data)
101     ws = np.mat(ws)
102     return [1 if x >= 0.5 else 0 for x in sigmoid(xMat*ws)]
103 
104 predictions = predict(X_data, ws)
105 
106 print(classification_report(y_data, predictions))#計算預測的分

sklearn實現線性邏輯回歸

 1 import matplotlib.pyplot as plt
 2 import numpy as np
 3 from sklearn.metrics import classification_report
 4 from sklearn import preprocessing
 5 from sklearn import linear_model
 6 # 數據是否需要標准化
 7 scale = False
 8 # 載入數據
 9 data = np.genfromtxt("LR-testSet.csv", delimiter=",")
10 x_data = data[:, :-1]
11 y_data = data[:, -1]
12 
13 def plot():
14     x0 = []
15     x1 = []
16     y0 = []
17     y1 = []
18     # 切分不同類別的數據
19     for i in range(len(x_data)):
20         if y_data[i] == 0:
21             x0.append(x_data[i, 0])
22             y0.append(x_data[i, 1])
23         else:
24             x1.append(x_data[i, 0])
25             y1.append(x_data[i, 1])
26 
27     # 畫圖
28     scatter0 = plt.scatter(x0, y0, c='b', marker='o')
29     scatter1 = plt.scatter(x1, y1, c='r', marker='x')
30     # 畫圖例
31     plt.legend(handles=[scatter0, scatter1], labels=['label0', 'label1'], loc='best')
32 
33 plot()
34 plt.show()
35 logistic = linear_model.LogisticRegression()
36 logistic.fit(x_data, y_data)
37 if scale == False:
38     # 畫圖決策邊界
39     plot()
40     x_test = np.array([[-4],[3]])
41     y_test = (-logistic.intercept_ - x_test*logistic.coef_[0][0])/logistic.coef_[0][1]
42     plt.plot(x_test, y_test, 'k')
43     plt.show()
44 predictions = logistic.predict(x_data)
45 
46 print(classification_report(y_data, predictions))

梯度下降法實現非線性邏輯回歸

  1 import matplotlib.pyplot as plt
  2 import numpy as np
  3 from sklearn.metrics import classification_report
  4 from sklearn import preprocessing
  5 from sklearn.preprocessing import PolynomialFeatures
  6 # 數據是否需要標准化
  7 scale = False
  8 # 載入數據
  9 data = np.genfromtxt("LR-testSet2.txt", delimiter=",")
 10 x_data = data[:, :-1]
 11 y_data = data[:, -1, np.newaxis]
 12 
 13 def plot():
 14     x0 = []
 15     x1 = []
 16     y0 = []
 17     y1 = []
 18     # 切分不同類別的數據
 19     for i in range(len(x_data)):
 20         if y_data[i] == 0:
 21             x0.append(x_data[i, 0])
 22             y0.append(x_data[i, 1])
 23         else:
 24             x1.append(x_data[i, 0])
 25             y1.append(x_data[i, 1])
 26     # 畫圖
 27     scatter0 = plt.scatter(x0, y0, c='b', marker='o')
 28     scatter1 = plt.scatter(x1, y1, c='r', marker='x')
 29     # 畫圖例
 30     plt.legend(handles=[scatter0, scatter1], labels=['label0', 'label1'], loc='best')
 31 
 32 plot()
 33 plt.show()
 34 # 定義多項式回歸,degree的值可以調節多項式的特征
 35 poly_reg  = PolynomialFeatures(degree=3)
 36 # 特征處理
 37 x_poly = poly_reg.fit_transform(x_data)
 38 def sigmoid(x):
 39     return 1.0 / (1 + np.exp(-x))
 40 
 41 
 42 def cost(xMat, yMat, ws):
 43     left = np.multiply(yMat, np.log(sigmoid(xMat * ws)))
 44     right = np.multiply(1 - yMat, np.log(1 - sigmoid(xMat * ws)))
 45     return np.sum(left + right) / -(len(xMat))
 46 
 47 def gradAscent(xArr, yArr):
 48     if scale == True:
 49         xArr = preprocessing.scale(xArr)
 50     xMat = np.mat(xArr)
 51     yMat = np.mat(yArr)
 52 
 53     lr = 0.03
 54     epochs = 50000
 55     costList = []
 56     # 計算數據列數，有幾列就有幾個權值
 57     m, n = np.shape(xMat)
 58     # 初始化權值
 59     ws = np.mat(np.ones((n, 1)))
 60 
 61     for i in range(epochs + 1):
 62         # xMat和weights矩陣相乘
 63         h = sigmoid(xMat * ws)
 64         # 計算誤差
 65         ws_grad = xMat.T * (h - yMat) / m
 66         ws = ws - lr * ws_grad
 67 
 68         if i % 50 == 0:
 69             costList.append(cost(xMat, yMat, ws))
 70     return ws, costList
 71 # 訓練模型，得到權值和cost值的變化
 72 ws,costList = gradAscent(x_poly, y_data)
 73 print(ws)
 74 
 75 # 獲取數據值所在的范圍
 76 x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
 77 y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1
 78 
 79 # 生成網格矩陣
 80 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
 81                      np.arange(y_min, y_max, 0.02))
 82 #print(xx.shape)#xx和yy有相同shape，xx的每行相同，總行數取決於yy的范圍
 83 #print(yy.shape)#xx和yy有相同shape，yy的每列相同，總行數取決於xx的范圍
 84 # np.r_按row來組合array，
 85 # np.c_按colunm來組合array
 86 # >>> a = np.array([1,2,3])
 87 # >>> b = np.array([5,2,5])
 88 # >>> np.r_[a,b]
 89 # array([1, 2, 3, 5, 2, 5])
 90 # >>> np.c_[a,b]
 91 # array([[1, 5],
 92 #        [2, 2],
 93 #        [3, 5]])
 94 # >>> np.c_[a,[0,0,0],b]
 95 # array([[1, 0, 5],
 96 #        [2, 0, 2],
 97 #        [3, 0, 5]])
 98 z = sigmoid(poly_reg.fit_transform(np.c_[xx.ravel(), yy.ravel()]).dot(np.array(ws)))# ravel與flatten類似，多維數據轉一維。flatten不會改變原始數據，ravel會改變原始數據
 99 for i in range(len(z)):
100     if z[i] > 0.5:
101         z[i] = 1
102     else:
103         z[i] = 0
104 print(z)
105 z = z.reshape(xx.shape)
106 
107 # 等高線圖
108 cs = plt.contourf(xx, yy, z)
109 plot()
110 plt.show()
111 # 預測
112 def predict(x_data, ws):
113 #     if scale == True:
114 #         x_data = preprocessing.scale(x_data)
115     xMat = np.mat(x_data)
116     ws = np.mat(ws)
117     return [1 if x >= 0.5 else 0 for x in sigmoid(xMat*ws)]
118 
119 predictions = predict(x_poly, ws)
120 
121 print(classification_report(y_data, predictions))

sklearn實現非線性邏輯回歸

 1 import numpy as np
 2 import matplotlib.pyplot as plt
 3 from sklearn import linear_model
 4 from sklearn.datasets import make_gaussian_quantiles
 5 from sklearn.preprocessing import PolynomialFeatures
 6 # 生成2維正態分布，生成的數據按分位數分為兩類，500個樣本,2個樣本特征
 7 # 可以生成兩類或多類數據
 8 x_data, y_data = make_gaussian_quantiles(n_samples=500, n_features=2,n_classes=2)#生成500個樣本，2個特征，2個類別
 9 plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
10 plt.show()
11 logistic = linear_model.LogisticRegression()
12 logistic.fit(x_data, y_data)
13 # 獲取數據值所在的范圍
14 x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
15 y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1
16 # 生成網格矩陣
17 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
18                      np.arange(y_min, y_max, 0.02))
19 z = logistic.predict(np.c_[xx.ravel(), yy.ravel()])# ravel與flatten類似，多維數據轉一維。flatten不會改變原始數據，ravel會改變原始數據
20 z = z.reshape(xx.shape)
21 # 等高線圖
22 cs = plt.contourf(xx, yy, z)
23 # 樣本散點圖
24 plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
25 plt.show()
26 print('score:',logistic.score(x_data,y_data))#使用線性邏輯回歸，得分很低
27 
28 # 定義多項式回歸,degree的值可以調節多項式的特征
29 poly_reg  = PolynomialFeatures(degree=5)
30 # 特征處理
31 x_poly = poly_reg.fit_transform(x_data)
32 # 定義邏輯回歸模型
33 logistic = linear_model.LogisticRegression()
34 # 訓練模型
35 logistic.fit(x_poly, y_data)
36 
37 # 獲取數據值所在的范圍
38 x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
39 y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1
40 
41 # 生成網格矩陣
42 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
43                      np.arange(y_min, y_max, 0.02))
44 z = logistic.predict(poly_reg.fit_transform(np.c_[xx.ravel(), yy.ravel()]))# ravel與flatten類似，多維數據轉一維。flatten不會改變原始數據，ravel會改變原始數據
45 z = z.reshape(xx.shape)
46 # 等高線圖
47 cs = plt.contourf(xx, yy, z)
48 # 樣本散點圖
49 plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
50 plt.show()
51 print('score:',logistic.score(x_poly,y_data))#得分很高

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 邏輯回歸理解及代碼實現線性回歸和梯度下降代碼demo 邏輯回歸原理（python代碼實現）邏輯回歸邏輯回歸一切從邏輯回歸開始邏輯回歸邏輯回歸邏輯回歸邏輯回歸