作業三：使用minibatch的方式進行梯度下降

項目	內容
這個作業屬於的課程	人工智能實戰2019(北京航空航天大學）
這個作業的要求	第三次作業：使用minibatch的方式進行梯度下降
我在這個課程的目標是	學習算法，積累項目經驗，鍛煉coding能力
這個作業在哪個具體方面幫助我實現目標	了解batch, iteration,epoch的概念；學習使用批處理操作
作業正文	見下文
其他參考文獻	微軟示例代碼

1. 作業要求

使用minibatch的方式進行梯度下降
復習講過的課程（鏈接），並回答關於損失函數的 2D 示意圖的問題
- 為什么是橢圓而不是圓？如何把這個圖變成一個圓？
- 為什么中心是個橢圓區域而不是一個點？

2. 實現隨機選取數據的方式進行minibatch梯度下降

示例代碼位置：/B-教學案例與實踐/B6-神經網絡基本原理簡明教程/微軟-方案1/NeuralNetwork/ch04/level4-BatchGradientDescent.py

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import savefig
from pathlib import Path
x_data_name =  "TemperatureControlXData.dat"
y_data_name =  "TemperatureControlYData.dat"
class  CData(object):
    def  __init__(self, loss, w, b, epoch, iteration):
    self.loss = loss
    self.w = w
    self.b = b
    self.epoch = epoch
    self.iteration = iteration

def  ReadData():
	Xfile = Path(x_data_name)
	Yfile = Path(y_data_name)
	if Xfile.exists() & Yfile.exists():
		X = np.load(Xfile)
		Y = np.load(Yfile)
		return X.reshape(1,-1),Y.reshape(1,-1)
	else:
		return  None,None


def  ForwardCalculationBatch(W,B,batch_x):
    Z = np.dot(W, batch_x) + B
    return Z

  

def  BackPropagationBatch(batch_x, batch_y, batch_z):

	m = batch_x.shape[1]

	dZ = batch_z - batch_y

	dB = dZ.sum(axis=1, keepdims=True)/m

	dW = np.dot(dZ, batch_x.T)/m

	return dW, dB


def  UpdateWeights(w, b, dW, dB, eta):

	w = w - eta*dW

	b = b - eta*dB

	return w,b


def  InitialWeights(num_input, num_output, flag):

	if flag ==  0:

		# zero

		W = np.zeros((num_output, num_input))

	elif flag ==  1:

		# normalize

		W = np.random.normal(size=(num_output, num_input))

	elif flag ==  2:

		# xavier

		W=np.random.uniform(
			-np.sqrt(6/(num_input+num_output)),

			np.sqrt(6/(num_input+num_output)),

			size=(num_output,num_input))

  

	B = np.zeros((num_output, 1))

	return W,B

def  CheckLoss(W, B, X, Y):

m = X.shape[1]

Z = np.dot(W, X) + B

LOSS = (Z - Y)**2

loss = LOSS.sum()/m/2

return loss

  

def  shuffle(X, Y):

	num_example = X.shape[1]

	rank = np.arange(0, num_example)

	np.random.shuffle(rank)

	X_shuffle = []

	Y_shuffle = []

	for i in rank:

		X_shuffle.append(X[:,i])

		Y_shuffle.append(Y[:,i])

	X_shuffle = np.transpose(X_shuffle)

	Y_shuffle = np.transpose(Y_shuffle)

	return X_shuffle, Y_shuffle


def  GetBatchSamples(X,Y,batch_size,iteration):

	num_feature = X.shape[0]

	start = iteration * batch_size

	end = start + batch_size

	batch_x = X[0:num_feature, start:end].reshape(num_feature, batch_size)

	batch_y = Y[0, start:end].reshape(1, batch_size)

	return batch_x, batch_y


def  GetMinimalLossData(dict_loss):

	key =  sorted(dict_loss.keys())[0]

	w = dict_loss[key].w

	b = dict_loss[key].b

	return w,b,dict_loss[key]


def  ShowIterLossHistory(dict_loss, batch_size):

	loss = []

	for key in dict_loss:

	loss.append(key)

	plt.title("batch size :"  +  str(batch_size))

	plt.xlabel("iteration")

	plt.plot(loss[30:800])

	plt.ylabel("loss")

	savefig("/Users/souchiguu/Desktop/"  +  str(batch_size) +  ".png")

	plt.show()


def  ShowEpochLossHistory(list_epoch, Batchsize):

	color = ['b','g','y']

	for num_batch in  range(len(Batchsize)):

		loss = []

		for key in list_epoch[num_batch]:

		loss.append(key)

		plt.plot(loss, color[num_batch], label  =  'batchsize='+str(Batchsize[num_batch]))

	plt.title("learning rate = 0.01" )

	plt.xlabel("epoch")

	plt.ylabel("loss")

	plt.legend()

	savefig("/Users/souchiguu/Desktop/"  +  "0.1"  ".png")

	plt.show()



if  __name__  ==  '__main__':
	# method = "MiniBatch"

	eta, max_epoch =  0.01, 50

	Batchsize = [5, 10, 15]

	list_epoch = []

	# read data

	X_origin, Y_origin = ReadData()

	# count of samples

	num_example = X_origin.shape[1]

	num_feature = X_origin.shape[0]

	for batch_size in Batchsize:

		W, B = InitialWeights(1,1,0)

		# calculate loss to decide the stop condition

		# loss = 5

		dict_epoch_loss = {}

		dict_iter_loss = {}

		for epoch in  range(max_epoch):

			# random shuffle

			X, Y = shuffle(X_origin, Y_origin)

			# if num_example=200, batch_size=10, then iteration=200/10=20

			max_iteration = (int)(num_example / batch_size)
	
			sum_loss =  0

			for iteration in  range(max_iteration):

				# get x and y value for one sample

				batch_x, batch_y = GetBatchSamples(X,Y,batch_size,iteration)

				# get z from x,y

				batch_z = ForwardCalculationBatch(W, B, batch_x)

				# calculate gradient of w and b

				dW, dB = BackPropagationBatch(batch_x, batch_y, batch_z)

				# update w,b

				W, B = UpdateWeights(W, B, dW, dB, eta)
				# calculate loss for this batch
				
				loss = CheckLoss(W,B,X,Y)

				# print("batchsize=%d, epoch=%d, iteration=%d, loss=%f" %(batch_size, epoch, iteration, loss))

				dict_iter_loss[loss] = CData(loss, W, B, epoch, iteration)

				sum_loss += loss
			# end for
			
			dict_epoch_loss[sum_loss] = CData(sum_loss, W, B, epoch, max_iteration)
			
		# end for
		
		list_epoch.append(dict_epoch_loss)
		
		ShowIterLossHistory(dict_iter_loss, batch_size)
		
		w,b,cdata = GetMinimalLossData(dict_epoch_loss)
		
		print("w:", cdata.w, "b:", cdata.b)
		
		print("batchsize=%d, epoch=%d, iteration=%d, loss=%f"  %(batch_size, cdata.epoch, cdata.iteration, cdata.loss))
		
	ShowEpochLossHistory(list_epoch, Batchsize)

learning rate 為0.1，loss達到最小時的結果、loss隨epoch, iteration, batchsize的變化趨勢：

0.1 lr
w: [[1.99854936]] b: [[3.00973446]]
batchsize=5, epoch=27, iteration=40, loss=0.196999
w: [[1.99322693]] b: [[3.00426234]]
batchsize=10, epoch=37, iteration=20, loss=0.098183
w: [[1.99197605]] b: [[3.0102479]]
batchsize=15, epoch=46, iteration=13, loss=0.063790

learning rate 為0.01，loss達到最小時的結果、loss隨epoch, iteration, batchsize的變化趨勢：

0.01 lr
w: [[1.90823943]] b: [[3.05213545]]
batchsize=5, epoch=49, iteration=40, loss=0.209842
w: [[1.82458827]] b: [[3.09514729]]
batchsize=10, epoch=49, iteration=20, loss=0.123555
w: [[1.78029477]] b: [[3.11700018]]
batchsize=15, epoch=49, iteration=13, loss=0.089676

3. 思考題

loss_2d:

為什么是橢圓而不是圓？如何把這個圖變成一個圓？
直觀上是因為\(w\)與\(b\)在損失函數中的系數不同。本質原因是它們在正向傳播中的地位不同，類似兩個正交的特征向量，對應着不同的特征值。損失函數 \(J(w,b)=\frac{1}{m}\sum_{i=1}^m(wx_i+b_i-y_i)^2\)。\(w\)的系數是\(b\)的系數的\(\frac{1}{m}\sum_{i=1}^mx_i^2\)倍，當\(\sum_{i=1}^mx_i^2\)不等於1時，滿足橢圓型方程\(\frac{x^2}{a^2}+\frac{y^2}{b^2}=1\)。強制\(\sum_{i=1}^mx_i^2==1\)可以把這個圖變成一個圓。如，令\(x\)的均值為0，模長為m。
為什么中心是個橢圓區域而不是一個點？
loss最小的點應該是唯一取得的。中心不是一個點而是一個橢圓區域是因為無法連續對w,b進行取值，只能用離散的形式逼近，中心點附近的loss取值相似的地方構成這個橢圓區域。

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 梯度下降之隨機梯度下降 -minibatch 與並行化方法梯度下降與隨機梯度下降【stanford】梯度、梯度下降，隨機梯度下降梯度下降、隨機梯度下降和批量梯度下降梯度下降法和隨機梯度下降法梯度下降法和隨機梯度下降法梯度上升與梯度下降 tensorflow隨機梯度下降算法使用滑動平均模型三、sklearn實現梯度下降梯度下降優化算法