1,思路
- 根據定義
\[\frac{dy}{dx}=\lim_{\Delta{x\to{0}}}\frac{\Delta{y}}{\Delta{x}} \]
而為了使得上式在計算機中可計算,就體現出了泰勒展開的重要性
- 使用pytorch的自動求導功能(結合nn.Parameter以及backward()自動求導)
2,例子
'''使用pytorch'''
import torch
import torch.nn as nn
import numpy as np
from matplotlib import pyplot as plt
aList = np.arange(-10, 10, 0.01)
resList = []
gradList = []
func = torch.sin
for a in aList:
a = nn.Parameter(torch.tensor(a))
b = func(a)
resList.append(b.item())
b.backward()
gradList.append(a.grad.item())
plt.plot(aList, resList, label='sin')
plt.plot(aList, gradList, label='grad')
plt.plot(aList, [np.cos(i) for i in aList], '-.', label='cos')
plt.legend()
plt.savefig('求導.jpg')
plt.show()
'''利用反射'''
import torch
import torch.nn as nn
import numpy as np
from matplotlib import pyplot as plt
aList = np.arange(-10, 10, 0.01)
resList = []
gradList = []
funcName = 'ReLU'
func = getattr(nn, funcName)()
for a in aList:
a = nn.Parameter(torch.tensor(a))
b = func(a)
resList.append(b.item())
b.backward()
gradList.append(a.grad.item())
plt.plot(aList, resList, label=funcName)
plt.plot(aList, gradList, label='grad')
plt.legend()
plt.savefig('求導.jpg')
plt.show()
'''使用定義'''
import torch
import torch.nn as nn
import numpy as np
from matplotlib import pyplot as plt
aList = np.arange(-10, 10, 0.01)
resList = [np.sin(i) for i in aList]
gradList = [(torch.sin(torch.tensor(i+0.01, dtype=torch.float64))-torch.sin(torch.tensor(i, dtype=torch.float64))).item()/0.01 for i in resList]
plt.plot(aList, resList, label='sin')
plt.plot(aList, gradList, label='grad')
plt.plot(aList, [np.cos(i) for i in aList], '-.', label='cos')
plt.legend()
plt.savefig('求導.jpg')
plt.show()
3,問題
那么問題來了,根據定義就算是將tensor轉為torch.float64依舊因為計算的近似導致結果的不准確,那么pytorch的底層使用什么方法做到精確求導的呢?
一個猜測:pytorch的底層算法給每一個函數全都綁定了對應的backward()方法,結合鏈式法則可以直接計算任意組合函數的導數值(也就是torch.autograd.Function的作用,現在看來這應該是最核心的類之一,也許所有的計算函數都會繼承這個類)
PYTORCH 自動微分(一)
PYTORCH 自動微分(二)
PYTORCH 自動微分(三)
官方教程
https://bqleng.blog.csdn.net/article/details/108642299
4,torch.autograd.grad
import torch
import torch.nn as nn
import torch.autograd as autograd
dataIn = torch.randn(10, 1, 10, requires_grad=True).cuda()
dataOut = nn.Sequential(nn.Linear(10, 10), nn.Tanh(), nn.Conv1d(1, 1, kernel_size=5, padding=2), nn.Linear(10, 10)).cuda()(dataIn)
gradients = autograd.grad(outputs=dataOut, inputs=dataIn,
grad_outputs=torch.ones(dataIn.size()).cuda(),
create_graph=True, retain_graph=True, only_inputs=True)[0]
# gradients = autograd.grad(outputs=dataOut, inputs=dataIn, grad_outputs=None,
# create_graph=True, retain_graph=True, only_inputs=True)[0]
print(gradients)
5,backward以及torch.autograd.grad的一致性,以及當輸出為任意tensor而非標量時如何求導
import torch
import torch.nn as nn
import torch.autograd as autograd
seed = 10
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.conv1_1 = nn.Sequential(nn.Conv1d(1, 5, kernel_size=5, padding=2), nn.ReLU(), nn.MaxPool1d(10))
self.conv1_2 = nn.Sequential(nn.Conv1d(1, 5, kernel_size=9, padding=4), nn.ReLU(), nn.MaxPool1d(10))
self.conv1_3 = nn.Sequential(nn.Conv1d(1, 6, kernel_size=17, padding=8), nn.ReLU(), nn.MaxPool1d(10))
self.conv1_4 = nn.Sequential(nn.Conv1d(1, 6, kernel_size=65, padding=32), nn.ReLU(), nn.MaxPool1d(10))
self.conv1_5 = nn.Sequential(nn.Conv1d(1, 5, kernel_size=129, padding=64), nn.ReLU(), nn.MaxPool1d(10))
self.conv1_6 = nn.Sequential(nn.Conv1d(1, 5, kernel_size=257, padding=128), nn.ReLU(), nn.MaxPool1d(10))
self.feature = nn.Sequential(nn.Conv1d(32, 48, 55), nn.ReLU(), nn.MaxPool1d(10))
self.class_classifier = nn.Sequential(nn.Linear(720, 128), nn.ReLU(), nn.Dropout(), nn.Linear(128, 1))
def forward(self, dataRandom, oneHotLabel):
x1_1 = self.conv1_1(dataRandom)
print(x1_1)
x1_2 = self.conv1_2(dataRandom)
x1_3 = self.conv1_3(dataRandom)
x1_4 = self.conv1_4(dataRandom)
x1_5 = self.conv1_5(dataRandom)
x1_6 = self.conv1_6(dataRandom)
x1 = torch.cat([x1_1, x1_2, x1_3, x1_4, x1_5, x1_6], dim=1)
feature = self.feature(x1)
feature = feature.view(-1, 48*15)
class_output = self.class_classifier(feature)
return class_output
D = Discriminator()
dataIn = torch.randn(10, 2048).unsqueeze(1)
dataIn.requires_grad = True
dataOut = D(dataIn, None)
'''V1, backward'''
dataOut.backward(torch.ones_like(dataOut))
print(dataIn.grad)
'''V2, torch.autograd.grad'''
# gradients = autograd.grad(outputs=dataOut, inputs=dataIn,
# grad_outputs=torch.ones(dataOut.size()),
# create_graph=True, retain_graph=True, only_inputs=True)[0]
# print(gradients)
當輸出dataOut為任意shape的tensor時,backward(?)以及torch.autograd.grad(grad_outpus=?)中的?都需要是與dataOut同一shape的任一tensor,一般取ones