數據集:
鏈接:https://pan.baidu.com/s/1Hp8MYa6Q6QBzexlJ2z9mDw&shfl=sharepset
提取碼:azbe
引入庫,加載文件
1 import matplotlib.pyplot as plt 2 import numpy as np 3 import pandas as pd 4 from scipy import optimize 5 6 def loadtxtAndcsv(fileName,split,datatype): 7 return np.loadtxt(fileName,delimiter=split,dtype= datatype) 8 9 def loadnpy(fileName): 10 return np.load(fileName)
畫圖函數
def plotData(X,y): pos = np.where(y == 1); neg = np.where(y == 0); plt.plot(X[pos,0],X[pos,1],'+') plt.plot(X[neg,0],X[neg,1],'o') plt.title("Two kinds of data's comparison") plt.show()
邏輯回歸Sigmoid函數,后面在神經網絡中,作為激活函數:
def sigmoid(z): return 1/(1+np.exp(-z))
代價函數和梯度的計算:
def costFunction(initial_theta,X,y,inital_lambda):
m = len(y)
J = 0 h = sigmoid(np.dot(X,initial_theta)) # 計算h(z) theta1 = initial_theta.copy() # 因為正則化j=1從1開始,不包含0,所以復制一份,前theta(0)值為0 theta1[0] = 0 temp = np.dot(np.transpose(theta1),theta1) J = (-np.dot(np.transpose(y),np.log(h))-np.dot(np.transpose(1-y),np.log(1-h))+temp*inital_lambda/2)/m
# 正則化的代價方程 return J # 計算梯度 def gradient(initial_theta,X,y,inital_lambda): m = len(y) grad = np.zeros((initial_theta.shape[0])) h = sigmoid(np.dot(X,initial_theta))# 計算h(z) theta1 = initial_theta.copy() theta1[0] = 0 grad = np.dot(np.transpose(X),h-y)/m+inital_lambda/m*theta1 #正則化的梯度 return grad
畫出邊界曲線:
def plotDecisionBoundary(theta,X,y): pos = np.where(y == 1); neg = np.where(y == 0); plt.plot(X[pos,0],X[pos,1],'+') plt.plot(X[neg,0],X[neg,1],'o') plt.title("DecisionBoundary") plot_x = np.array([np.min(X[:,1])-2,np.max(X[:,1])+2])
#只需要2個點來定義一條線,所以選擇兩個端點 plot_y = (-1/theta[2])*(theta[1]*plot_x + theta[0]) #計算決策邊界線 plt.plot(plot_x, plot_y) plt.show()
預測:
def predict(X,theta): m = X.shape[0] p = np.zeros((m,1)) p = sigmoid(np.dot(X,theta)) # 預測的結果,是個概率值 for i in range(m): if p[i] > 0.5: #概率大於0.5預測為1,否則預測為0 p[i] = 1 else: p[i] = 0 return p
主函數:
def LogisticRegression(): data = loadtxtAndcsv("ex2data1.txt",",",np.float64); X = data[:,0:-1] y = data[:,-1] y = np.transpose(y) print("得到數據的圖像") plotData(X,y) [m,n] = np.shape(X) X = np.hstack((np.ones((m,1)),X)) initial_theta = np.zeros((n+1,1)) cost = costFunction(initial_theta,X,y,0) grad = gradient(initial_theta,X,y,0) result = optimize.fmin_bfgs(costFunction, initial_theta, fprime=gradient, args=(X,y,0)) p = (predict(X,result)) #預測結果值 print(u'在訓練集上的准確度為%f%%'%np.mean(np.float64(p==y)*100)) X = data[:,0:-1] #畫出決策邊界 y = data[:,-1] plotDecisionBoundary(result,X,y) if __name__ == "__main__": LogisticRegression()