Python實現LogisticRegression


數據集:

鏈接:https://pan.baidu.com/s/1Hp8MYa6Q6QBzexlJ2z9mDw&shfl=sharepset 
提取碼:azbe 

引入庫,加載文件

 1 import matplotlib.pyplot as plt
 2 import numpy as np
 3 import pandas as pd
 4 from scipy import optimize
 5 
 6 def loadtxtAndcsv(fileName,split,datatype):
 7     return np.loadtxt(fileName,delimiter=split,dtype= datatype)
 8 
 9 def loadnpy(fileName):
10     return np.load(fileName)

畫圖函數

def plotData(X,y):
    pos = np.where(y == 1);
    neg = np.where(y == 0);
    
    plt.plot(X[pos,0],X[pos,1],'+')
    plt.plot(X[neg,0],X[neg,1],'o')
    plt.title("Two kinds of data's comparison")
    plt.show()

邏輯回歸Sigmoid函數,后面在神經網絡中,作為激活函數:

def sigmoid(z):
    return 1/(1+np.exp(-z))

代價函數和梯度的計算:

def costFunction(initial_theta,X,y,inital_lambda):    
  m = len(y)

  J
= 0 h = sigmoid(np.dot(X,initial_theta)) # 計算h(z) theta1 = initial_theta.copy() # 因為正則化j=1從1開始,不包含0,所以復制一份,前theta(0)值為0 theta1[0] = 0 temp = np.dot(np.transpose(theta1),theta1) J = (-np.dot(np.transpose(y),np.log(h))-np.dot(np.transpose(1-y),np.log(1-h))+temp*inital_lambda/2)/m
    # 正則化的代價方程 return J # 計算梯度 def gradient(initial_theta,X,y,inital_lambda): m = len(y) grad = np.zeros((initial_theta.shape[0])) h = sigmoid(np.dot(X,initial_theta))# 計算h(z) theta1 = initial_theta.copy() theta1[0] = 0 grad = np.dot(np.transpose(X),h-y)/m+inital_lambda/m*theta1 #正則化的梯度 return grad

畫出邊界曲線:

def plotDecisionBoundary(theta,X,y):
    pos = np.where(y == 1);
    neg = np.where(y == 0);
    
    plt.plot(X[pos,0],X[pos,1],'+')
    plt.plot(X[neg,0],X[neg,1],'o')
    plt.title("DecisionBoundary")
    plot_x = np.array([np.min(X[:,1])-2,np.max(X[:,1])+2])
   #只需要2個點來定義一條線,所以選擇兩個端點 plot_y = (-1/theta[2])*(theta[1]*plot_x + theta[0]) #計算決策邊界線 plt.plot(plot_x, plot_y) plt.show()

預測:

def predict(X,theta):
    m = X.shape[0]
    p = np.zeros((m,1))
    p = sigmoid(np.dot(X,theta))    # 預測的結果,是個概率值
    
    for i in range(m):
        if p[i] > 0.5:  #概率大於0.5預測為1,否則預測為0
            p[i] = 1
        else:
            p[i] = 0
    return p

主函數:

def LogisticRegression():
    data = loadtxtAndcsv("ex2data1.txt",",",np.float64);
    X = data[:,0:-1]
    y = data[:,-1]
    y = np.transpose(y)
    print("得到數據的圖像")
    plotData(X,y) 
    
    
    [m,n] = np.shape(X)
    X = np.hstack((np.ones((m,1)),X))
    initial_theta = np.zeros((n+1,1))
    cost = costFunction(initial_theta,X,y,0)
    grad = gradient(initial_theta,X,y,0)
    result = optimize.fmin_bfgs(costFunction, initial_theta, fprime=gradient, args=(X,y,0))    
    
    p = (predict(X,result)) #預測結果值
    print(u'在訓練集上的准確度為%f%%'%np.mean(np.float64(p==y)*100))
    
    X = data[:,0:-1]        #畫出決策邊界
    y = data[:,-1] 
    plotDecisionBoundary(result,X,y)
    
if __name__ == "__main__":
    LogisticRegression()

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM