基本數據類型
5月16號更新
---------------------------------------------
- 保存模型
print("Saving state, iter:", str(epoch))
torch.save(model.state_dict(), f'logs/Epoch{epoch}-acc{acc}.pth')
- 加載模型 / 用於預訓練
model = ResNet18().to(device)
# ----------------------------#
model_path = r"logs/Epoch2-acc0.6816.pth"
print('Loading weights into state dict...')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
state_dict = torch.load(model_path, map_location=device)
model.load_state_dict(state_dict, strict=True)
- 使用部分預訓練權重(5月21號更新)
# -----使用部分預訓練權重------------------#
model_path = r"logs/Epoch2-acc0.6831.pth"
print('Loading weights into state dict...')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_dict = model.state_dict()
pretrained_dict = torch.load(model_path, map_location=device)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
# pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} # 上面不能用時用下面這個
model_dict.update(pretrained_dict)
model.load_state_dict(model_dict)
print('Finished!')
# ---------------------------------------#
---------------------------------------------
Data type
type check
a = torch.randn(2, 3) # 隨機生成2行3列的矩陣
print(a.shape)
print(a.size(1)) # 返回shape的第2個元素
print(a.shape[1]) # 3
# cpu上
print(a.type()) # torch.FloatTensor
print(type(a))
print(isinstance(a, torch.FloatTensor))
# Gpu上
data = a.cuda()
print(isinstance(data, torch.cuda.FloatTensor))
"""
在pytorch0.3的版本中dimention為0的tensor是不存在的,如果表達是標量返回[0.3]
在之后的版本中,標量返回0.3 (為了語義更加清晰,使用長度為0的標量)
區分dim/size/shape/tensor
[2, 2]
dim: 2 rank
size/shape: [2, 2]
tensor: 具體數字 [1, 3 ]
[2, 4]
"""
Dimension 0/1/2
# Dim=0,用於loss
a = torch.tensor(2.2)
print(a.shape) # torch.Size([])
print(len(a.shape)) # 0
print(a.size()) # torch.Size([])
# Dim=1,用於Bias/Linear input
b = torch.tensor([2]) # 直接這樣寫,里面的數據類型跟着里面數據變化
print(b)
print(b.type())
c = torch.tensor([1.1, 2.2])
print(c)
print(c.type())
d = torch.FloatTensor(2)
print(d)
e = torch.IntTensor([2.2])
print(e)
data = np.ones(3)
print(data)
f = torch.from_numpy(data) # 將numpy轉換成tensor
print(f)
# Dim=2,Linear input/batch
g = torch.randn(2, 3) # 隨機正太分布
print(g)
print(g.shape)
print(g.size())
print(g.size(0))
print(g.size(1))
print(g.shape[1])
# Dim=3 RNN input/Batch
h = torch.rand(3, 2, 3) # 隨機均勻分布
print(h)
print(h.shape)
print(h[0])
print(h[1])
print(list(h.shape))
# Dim=4 CNN:[b,c,h,w]
# 下面解釋為2張照片,每張照片通道數為3,長寬為28×28
i = torch.rand(2, 3, 28, 28) # 照片數 通道數(彩色圖片為3) 圖片長 圖片寬
print(i)
創建Tensor
import from numpy
import torch
import numpy as np
# 從numpy中導入
a = np.array([2, 3.3])
data = torch.from_numpy(a)
print(data)
b = np.ones([3, 4])
dd = torch.from_numpy(b)
print(dd)
import from list
# 從list中導入
# 大寫的Tensor():與FloatTensor類似,接受shape作為參數,小寫的接受現有的數據
c = torch.tensor([2., 3.2])
d = torch.FloatTensor([2., 3.2]) # 也可接受現有數據,但是數據必須用一個list來表示。如果接受shape:(2, 3)
e = torch.tensor([[2., 3.2], [1., 22.3]])
print(c)
print(d)
print(e)
uninitialized
# 生成未初始化數據:只是作為一個容器,后面會把數據寫進來
# torch.empty() : 給shape
# torch.FloatTensor(d1, d2, d3)
# torch.IntTensor(d1, d2, d3)
f = torch.empty(2, 3)
print(f)
print(torch.Tensor(2, 3)) # 數據大小相差大,記得覆蓋否則可能出現torch.not number或torch.infinity
print(torch.IntTensor(2, 3))
print(torch.FloatTensor(2, 3))
set default type
# set default type: torch中默認的類型是torch.FloatTensor
print(torch.tensor([1.2, 3]).type())
torch.set_default_tensor_type(torch.DoubleTensor)
print(torch.tensor([1.2, 3]).type())
rand/rand_like, randint
# rand/rand_like, randint
# rand : [0, 1] 均勻分布
# rand_like: [min, max) 最大值不包含在里面
# randint *_like
print(torch.rand(3, 3)) # 比較均勻的采樣出來
a = torch.rand(3, 3)
print(torch.rand_like(a)) # rand_like接受的參數是一個tensor,相當於把a.shape讀出來再送給rand函數
print(torch.randint(1, 10, [3, 3]))
randn
# randn: 正態分布
# N(0, 1) 用在bias比較多
# N(u, std)
print(torch.randn(3, 3))
# full函數生成長度為10都為0的list 反差從1到0慢慢減小
print(torch.normal(mean=torch.full([10], 0), std=torch.arange(1, 0, -0.1)))
full
# full
print(torch.full([2, 3], 7))
print(torch.full([], 7)) # dim=0
print(torch.full([1], 7)) # dim=1
arange/range
print(torch.arange(0, 10)) # 不包括10
print(torch.arange(0, 10, 2))
linspace/logspace
print(torch.linspace(0, 10, steps=4)) # 等分的切,包括10
print(torch.logspace(0, 1, steps=10)) # 切10等分,每個取指數0**10~1**10
Ones/zeros/eye
# ones:生成全是0的,直接給出shape
# zeros:生成全是1的
# eye: 生成對角線全是1的,只接受1個參數或2個參數
print(torch.ones(3, 3))
print(torch.zeros(3, 3))
print(torch.eye(3, 4))
data = torch.ones(3, 3)
print(torch.ones_like(data))
randperm:隨機打散
# randperm:隨機打散
print(torch.randperm(10))
a = torch.rand(2, 3)
b = torch.rand(2, 2)
idx = torch.randperm(2)
print(idx)
print(a)
print(b)
print(a[idx]) # 達到協同shuffle的功能
print(b[idx])
索引與切片
indexing
a = torch.rand(4, 3, 28, 28)
print(a[0])
print(a[0].shape) # torch.Size([3, 28, 28]) :索引第一個維度 :取第0張圖片
print(a[0, 0].shape) # torch.Size([28, 28]):第二個維度:第0張圖片的第0個通道
print(a[0, 0, 2])
print(a[0, 0, 2, 4]) # tensor(0.9441) : 第0張圖片第0個通道第二行第4列
select first/last N
# select first/last N
a = torch.rand(4, 3, 28, 28)
print(a.shape) # torch.Size([4, 3, 28, 28])
print(a[:2].shape) # torch.Size([2, 3, 28, 28])
print(a[:2, :1, :, :].shape) # torch.Size([2, 1, 28, 28])
print(a[:2, 1:, :, :].shape) # torch.Size([2, 2, 28, 28])
print(a[:2, -1:, :, :].shape) # torch.Size([2, 1, 28, 28])
select by steps
# select by steps
a = torch.rand(4, 3, 28, 28)
print(a[:, :, 0:28:2, 0:28:2].shape) # torch.Size([4, 3, 14, 14])
print(a[:, :, ::2, ::2].shape) # torch.Size([4, 3, 14, 14])
select by specific index
# select by specific index
a = torch.rand(4, 3, 28, 28)
print(a)
print(a.index_select(0, torch.tensor([0, 2])).shape) # 第1個參數的第0和第1個
print(a.index_select(2, torch.arange(20)).shape)
... 任意多的維度
# ... 任意多的維度
a = torch.rand(4, 3, 28, 28)
print(a[...].shape) # torch.Size([4, 3, 28, 28])
print(a[:, 1, ...].shape) # torch.Size([4, 28, 28])
print(a[..., :2].shape) # torch.Size([4, 3, 28, 2])
select by mask
# select by mask
x = torch.randn(3, 4)
y = torch.randn(3, 4)
print(x)
mask = x.ge(0.5) # >=0.5的位置為True
print(mask)
print(torch.masked_select(y, mask)) # 為True的位置選出來
select by flatten index
# select by flatten index
src = torch.IntTensor(3, 4)
print(src)
print(torch.take(src, torch.tensor([0, 2, 5])))
Tensor維度變換
- view # 將一個shape轉換成例一個shape
- squeeze(減少維度)/unsqueeze(增加維度)
- transpose(單維變換)/t(轉置)/repeat(多維變換)
- expand(改變理解方式)/repeat(實實在在增加數據 memory copied)
view: lost dim information
# view: lost dim information
a = torch.rand(4, 1, 28, 28)
print(a)
print(a.shape)
print(a.view(4, 28 * 28).shape)
print(a.view(4 * 28, 28).shape)
print(a.view(4*1, 28, 28).shape)
b = a.view(4, 784)
b.view(4, 28, 28, 1) # logic bug
# flexible but prone to corrupt, 維度不匹配
print(a.view(4, 783)) # RuntimeError: shape '[4, 783]' is invalid for input of size 3136
squeeze / unsqueeze
unsqueeze
"""
范圍:
[-a.dim()-1, a.dim()+1]
[-5, 5)
"""
a = torch.rand(4, 1, 28, 28)
print(a.shape)
print(a.unsqueeze(0).shape)
print(a.unsqueeze(-1).shape)
print(a.unsqueeze(4).shape)
print(a.unsqueeze(-5).shape)
print(a.unsqueeze(5).shape) # IndexError: Dimension out of range (expected to be in range of [-5, 4], but got 5)
a = torch.tensor([1.2, 2.3])
print(a)
print(a.unsqueeze(-1))
print(a.unsqueeze(0))
# 案例:
b = torch.rand(32)
f = torch.rand(4, 32, 14, 14)
b = b.unsqueeze(1).unsqueeze(2).unsqueeze(0)
print(b.shape)
squeeze
# squeeze
b = torch.rand(1, 32, 1, 1)
print(b.squeeze()) # 能壓縮的都壓縮
print(b.squeeze(0).shape) # 壓縮第0個元素
print(b.squeeze(-1).shape)
print(b.squeeze(1).shape) # 32不能壓縮就不壓縮
print(b.squeeze(-4).shape)
expand/repeat
# expand/repeat
# expand: broadcasting 改變理解方式
# repeat: memory copied 實實在在的增加數據
a = torch.rand(4, 32, 14, 14)
b = torch.rand(1, 32, 1, 1)
print(b)
print(b.expand(4, 32, 14, 14)) # torch.Size([4, 32, 14, 14])
print(b.expand(-1, 32, -1, -1).shape) # -1表示該維度不變
print(b.expand(-1, 32, -1, -4).shape) # 寫-4變-4 RuntimeError: invalid shape dimension -128
# repeat:不建議使用
print(b.repeat(4, 32, 1, 1).shape) # 第二個拷貝32次
print(b.repeat(4, 1, 1, 1).shape)
print(b.repeat(4, 1, 32, 32).shape) #
t():轉置 只適合2D tensor
# t():轉置 只適合2D tensor
a = torch.randn(3, 4)
print(a)
print(a.t())
Transpose: 維度變換
# Transpose: 維度變換
a = torch.rand(4, 3, 32, 32)
print(a.shape)
"""
RuntimeError: view size is not compatible with input tensor's size and stride
(at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
"""
a1 = a.transpose(1, 3).contiguous().view(4, 3*32*32).view(4, 3, 32, 32) # 要加contigous
a2 = a.transpose(1, 3).contiguous().view(4, 3*32*32).view(4, 3, 32, 32).transpose(1, 3)
print(a1.shape)
print(a2.shape)
permute:可以直接排位置,可以使用任意多次的transpose來達到他的目的
# permute:可以直接排位置,可以使用任意多次的transpose來達到他的目的
a = torch.rand(4, 3, 28, 28)
print(a.transpose(1, 3).shape) # torch.Size([4, 28, 28, 3])
b = torch.rand(4, 3, 28, 32)
print(b.transpose(1, 3).shape) # torch.Size([4, 32, 28, 3])
print(b.transpose(1, 3).transpose(1, 3).shape) # torch.Size([4, 3, 28, 32])
print(b.permute(0, 2, 3, 1).shape) # torch.Size([4, 28, 32, 3])
Broadcast自動擴展
"""
expand
without copying data
insert 1 dim ahead
expand dims with size 1 to same size
feature maps:[4, 32, 14, 14]
bias:[32, 1, 1] => [1, 32, 1, 1] => [4, 32, 14, 14] bias的擴張
"""
broadcast
# situation 1
# [4, 32, 14, 14]
# [1, 32, 1, 1] => [4, 32, 14, 14]
# situation2
# [4, 32, 14, 14]
# [14, 14] => [1, 1, 14, 14] => [4, 32, 14, 14] # 可以先unsqueeze再expand
# situation3(不符合)
# [4, 32, 14, 14]
# [2, 32, 14, 14]
# a = torch.tensor([2, 32, 14, 14])
# # print(a)
# # print(a[:])
# a = torch.IntTensor(4, 3)
# b = torch.IntTensor(3)
# print(a)
# print(b)
"""
match from last dim
1. no dim
2. dim of size 1
"""
拼接與拆分
"""
Merge or split
合並:
cat
stack
分割:
split
chunk
"""
cat
# cat
a = torch.rand(4, 32, 8)
b = torch.rand(5, 32, 8)
print(torch.cat([a, b], dim=0).shape) # torch.Size([9, 32, 8])
a1 = torch.rand(4, 3, 32, 32)
a2 = torch.rand(4, 1, 32, 32)
print(torch.cat([a1, a2], dim=0).shape) # RuntimeError: invalid argument 0 其他維度要一致
print(torch.cat([a1, a2], dim=1).shape) # torch.Size([4, 4, 32, 32])
stack: create a new dim: 需求 維度完全一致
# stack: create a new dim: 需求 維度完全一致
a1 = torch.rand(4, 3, 16, 32)
a2 = torch.rand(4, 3, 16, 32)
print(torch.cat([a1, a2], dim=2).shape) # torch.Size([4, 3, 32, 32])
print(torch.stack([a1, a2], dim=2).shape) # torch.Size([4, 3, 2, 16, 32])
a = torch.rand(32, 8)
b = torch.rand(32, 8)
print(torch.stack([a, b], dim=0).shape) # torch.Size([2, 32, 8])
split: by len: 根據長度來分
# split: by len: 根據長度來分
b = torch.rand(32, 8)
a = torch.rand(32, 8)
# print(a.shape) # torch.Size([32, 8])
c = torch.stack([a, b], dim=0)
# print(c.shape) # torch.Size([2, 32, 8])
aa, bb = c.split([4, 4], dim=2)
print(aa.shape, bb.shape) # torch.Size([1, 32, 8]) torch.Size([1, 32, 8])
# aa, bb = c.split(2, dim=0) # ValueError: not enough values to unpack (expected 2, got 1)
print(c.shape) # torch.Size([2, 32, 8])
chunk: by num: 根據數量來分
# chunk: by num: 根據數量來分
aa, bb = c.chunk(2, dim=2) # torch.Size([2, 32, 8])
print(aa.shape, bb.shape) # torch.Size([1, 32, 8]) torch.Size([1, 32, 8])
數學運算
"""
Math operation
1. add/minus/multiply/divide
2. matmul
3. pow
4. sqrt/rsqrt
5. round
"""
基礎部分
# 基礎部分
a = torch.rand(3, 4)
b = torch.rand(4)
print(a)
print(b)
print(a + b) # b會被廣播
# all()函數的功能: 如果張量tensor中所有元素都是True, 才返回True; 否則返回False
b = torch.tensor([1, 1, 1, 1])
print(torch.all(torch.eq(a-b, torch.sub(a, b))))
matmul
# matmul
# torch.mm
# only for 2d
# torch.matmul
# @
a = torch.tensor([[3., 3.],
[3., 3.]])
print(a)
b = torch.ones(2, 2)
print(b)
print(torch.mm(a, b)) # 只針對2d矩陣
print(torch.matmul(a, b))
print(a@b)
# 案例:
# ==2d的tensor運算
a = torch.rand(4, 784)
x = torch.rand(4, 784)
w = torch.rand(512, 784) # 分別為ch-out ch-in
print((x.@w.t()).shape) # torch.Size([4, 512]) ×時第一個元素為out,所以需要轉置
print(torch.matmul(x, w.t()).shape) # torch.Size([4, 512])
# >2d的tensor運算
a = torch.rand(4, 3, 28, 64)
b = torch.rand(4, 3, 64, 32)
print(torch.matmul(a, b).shape) # torch.Size([4, 3, 28, 32])
b = torch.rand(4, 1, 64, 32)
print(torch.matmul(a, b).shape) # torch.Size([4, 3, 28, 32]), 這種情況會先使用broadcast,再使用矩陣相乘
power
# power
a = torch.full([2, 2], 3)
print(a.pow(2))
print(a**2)
aa = a**2
print(aa.sqrt())
print(aa.rsqrt())
print(aa**(0.5))
exp/log
# exp/log
a = torch.exp(torch.ones(2, 2))
print(a)
print(torch.log(a))
approximation
# approximation
a = torch.tensor(3.14)
print(a.floor(), a.ceil(), a.trunc(), a.frac()) # tensor(3.) tensor(4.) tensor(3.) tensor(0.1400)
# 往下取整 往上取整 截取,保留整數 截取,保留小數
a = torch.tensor(3.499)
print(a.round()) # tensor(3.) 四舍五入
a = torch.tensor(3.5)
print(a.round()) # tensor(4.)
clamp:裁剪
# clamp:裁剪
"""
gradient clipping
(min)
(min, max)
"""
grad = torch.rand(2, 3)*15
print(grad)
print(grad.max())
print(grad.median())
print(grad.clamp(10)) # 里面的元素小於10的全部變成10
print(grad.clamp(2, 10)) # 小於2的裁剪成2, 大於10的裁剪成10
Tensor統計
"""
statistics
norm:范數
mean sum
prod
max, min, argmin(最小值的位置), argmax
kthvalue(第幾個值 默認是小的: 比如第8個小的), topk(top多少)
"""
norm:
# norm:
a = torch.full([8], 1)
b = a.view(2, 4)
c = a.view(2, 2, 2)
print(a)
print(b)
print(c)
print(a.norm(1), b.norm(1), c.norm(1)) # nsor(8.) tensor(8.) tensor(8.)
print(a.norm(2), b.norm(2), c.norm(2)) # tensor(2.8284) tensor(2.8284) tensor(2.8284)
print(b.norm(1, dim=1)) # dim=1:將dim=1的部分取范數,同時二維向量變成一維向量 tensor([4., 4.])
print(b.norm(2, dim=1)) # tensor([2., 2.])
print(c.norm(1, dim=0))
print(c.norm(2, dim=0))
mean sum min max prod(階乘)
# mean sum min max prod(階乘)
a = torch.arange(8).view(2, 4).float()
print(a)
"""
tensor([[0., 1., 2., 3.],
[4., 5., 6., 7.]])
"""
print(a.min(), a.max(), a.mean(), a.prod()) # tensor(0.) tensor(7.) tensor(3.5000) tensor(0.)
print(a.sum()) # tensor(28.)
print(a.argmin(), a.argmax()) # tensor(0) tensor(7)
argmin/argmax在指定維度的表示
# argmin/argmax在指定維度的表示
a = torch.rand(4, 5)
print(a)
print(a.argmax())
print(a.argmax(dim=1)) # 在dim=1即取每個維度中最大值的位置
keepdim
# keepdim
a = torch.rand(4, 10)
print(a)
# print(a.max(dim=1))
print(a.argmax(dim=1))
print(a.max(dim=1, keepdim=True)) # 這個會返回他在dim=1的最大值和最大值的位置
top-k or k-th
# top-k or k-th
a = torch.rand(4, 10)
print(a.topk(3, dim=1))
print(a.topk(3, dim=1, largest=False))
print(a.kthvalue(8, dim=1)) # 返回dim=1的第8大的值
"""
torch.return_types.kthvalue(
values=tensor([0.7363, 0.8011, 0.6856, 0.6297]),
indices=tensor([4, 0, 7, 8]))
"""
compare
# compare
"""
> >= < <= != ==
torch.eq(a, b)
"""
a = torch.rand(4, 10)
print(a > 5) # 里面的每個元素都要比較
print(torch.gt(a, 0))
print(a != 0)
a = torch.ones(2, 3)
b = torch.randn(2, 3)
"""
疑問: torch.rand()和torch.randn()的區分?
答:rand()是均勻分布,randn()是標准正太分布
"""
print(a)
print(b)
print(torch.eq(a,b))
print(torch.eq(a, a)) # 返回每個元素
"""
tensor([[True, True, True],
[True, True, True]])
"""
print(torch.equal(a, a)) # True 所有都為True才為True
Tensor高階
"""
tensor 高級操作
where
gather: 收集,gather語句類似於查表的過程. 設計目的:使用GPU實現CPU的功能
"""
where
# where
# torch.where(condition,x,y) --> Tensor
# 案例:
cond = torch.tensor([[0.6769, 0.7271],
[0.8884, 0.4163]])
print(cond)
a = torch.zeros(2, 2)
print(a)
b = torch.ones(2, 2)
print(b)
print(torch.where(cond > 0.5, a, b)) # 如果cond成立,選取a中的元素,否則選擇b中的元素
gather
# 案例:檢索 retrieve label
prob = torch.randn(4, 10)
# print(prob)
idx = prob.topk(dim=1, k=3)
# print(idx)
idx = idx[1]
# print(idx)
label = torch.arange(10) + 100
# print(label)
label_expand = label.expand(4, 10)
print(label_expand)
print(idx) # 這是索引
print('------------------')
# print(idx.long()) # 轉換成Longtensor數據格式
print(torch.gather(label_expand, dim=1, index=idx.long())) # 按照index索引進行取數據
梯度
"""
1. len: 可以表示變化的層度
2. dir:表示變化的方向
"""
激活函數
Sigmoid / Logistic梯度推導
# 激活函數
z = torch.linspace(-100, 100, 10)
# sigmoid激活函數
print(z)
print(torch.sigmoid(z)) # 范圍在0-1
Tanh
# tanh激活函數: 在rnn中用的比較多 取值范圍為-1-1
a = torch.linspace(-1, 1, 10)
print(torch.tanh(a))
Relu
# Relu激活函數
# 在pytorch中的兩種實現:1.從torch.nn中 2. 從torch.relu中
from torch.nn import functional as F
a = torch.linspace(-1, 1, 10)
print(torch.relu(a))
print(F.relu(a))
LOSS及其梯度
"""
1. Mean Squared Error
2. Cross Entropy Loss
1. binary
2. multi-class
"""
MSE
一:autograd.grad
# 一:autograd.grad
# Mean Squared Error
# 這里注意MSE於2范數相比,2范數有開根號但是這里沒有開根號
# 使用pytorch進行簡單的求導
# 這里pred = w * x + b
from torch.nn import functional as F
x = torch.ones(1)
w = torch.full([1], 2)
mse = F.mse_loss(torch.ones(1), x*w) # 第一個參數pred 第二個參數label
print(torch.autograd.grad(mse, [w])) # 第一個參數loss 第二個參數w1, w2, w3
"""
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn?
w函數在初始化的時候沒有設置他需要導數信息,pytorch在建圖的時候標注torch不需要求導信息
"""
# 改變如下:告訴pytorch w需要梯度信息
w.requires_grad_()
print(torch.autograd.grad(mse, [w]))
"""
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
更新之后還是會報錯,因為pytorch是一個動態圖
這里更新了w但是圖還沒有更新
因為pytorch是做一步計算一次圖
"""
# 必須經過計算圖的過程重新更新一遍
mse = F.mse_loss(torch.ones(1), x*w) # 動態圖的建圖
print(torch.autograd.grad(mse, [w])) # (tensor([2.]),) 圖重新更新后可以計算出結果
print(mse)
二:loss.backward
# 二:loss.backward
from torch.nn import functional as F
x = torch.ones(1)
w = torch.full([1], 2)
mse = F.mse_loss(torch.ones(1), x*w)
# torch.autograd.grad(mse, [w])
w.requires_grad_() # 使w獲取梯度
mse = F.mse_loss(torch.ones(1), x*w) # 再次計算獲取動態圖
# torch.autograd.grad(mse, [w]) # 1. 自動計算 再次計算梯度
mse.backward() # 2. 手動計算tensor([2.])
print(w.grad)
總結
"""
Gradient API
1. 手動求導torch.autograd.grad(loss, [w1, w2, ...])
[w1 grad, w2 grad...]
2. 自動求導loss.backward() # 他返回的梯度信息不會返回而是附在每個梯度信息上面
w1.grad
w2.grad
"""
Softmax
"""
softmax求導:
pi(1-pj) if i=j
-pj*pi if i!=j
1 if i=j
0 if i!=j
"""
import torch
from torch.nn import functional as F
a = torch.rand(3) # tensor([0.4207, 0.2955, 0.8440])
print(a.requires_grad_()) # 這樣之后就可以求梯度 tensor([0.5424, 0.1913, 0.9416], requires_grad=True)
p = F.softmax(a, dim=0) # 自動完成建圖操作 tensor([0.2489, 0.3556, 0.3954], grad_fn=<SoftmaxBackward>)
# 當你調用一次backward之后除了完成一次反向傳播以外,還會把這個圖的梯度信息清除掉
print(torch.autograd.grad(p[1], [a],retain_graph=True)) # (tensor([-0.0755, 0.1879, -0.1125]),) i=1為正的其他的為負的
# #
# #
print(torch.autograd.grad(p[2], [a])) # (tensor([-0.1349, -0.1125, 0.2473]),) # i=2為正的其他的為負的
感知機
單一輸出感知機
import torch
from torch.nn import functional as F
x = torch.randn(1, 10)
w = torch.randn(1, 10, requires_grad=True)
print(x)
print(w)
o = torch.sigmoid(x@w.t()) # 這里沒有寫bias
print(o)
print(torch.ones(1, 1))
loss = F.mse_loss(torch.ones(1, 1), o)
print(loss)
loss.backward()
print(w.grad)
多輸出感知機
import torch
from torch.nn import functional as F
x = torch.randn(1, 10)
w = torch.randn(2, 10, requires_grad=True)
print(x)
print(w)
o = torch.sigmoid(x@w.t())
loss = F.mse_loss(torch.ones(1, 2), o)
loss.backward()
print(w.grad)
鏈式法則
import torch
x = torch.tensor(1.)
w1 = torch.tensor(2., requires_grad=True)
b1 = torch.tensor(1.)
w2 = torch.tensor(2., requires_grad=True)
b2 = torch.tensor(1.)
y1 = x*w1 + b1
y2 = y1*w2 + b2
dy2_dy1 = torch.autograd.grad(y2, [y1], retain_graph=True)[0]
dy1_dw1 = torch.autograd.grad(y1, [w1], retain_graph=True)[0]
dy2_dw1 = torch.autograd.grad(y2, w1, retain_graph=True)[0] # 這里的w1加不加[]都行??
print(dy2_dy1*dy1_dw1)
print(dy2_dw1)
優化實例
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import pyplot as plt
import torch
def himmelblau(x):
return (x[0] ** 2 + x[1] - 11) ** 2 + (x[0] + x[1] ** 2 - 7) ** 2
# 畫圖
x = np.arange(-6, 6, 0.1)
y = np.arange(-6, 6, 0.1)
print('x,y range:', x.shape, y.shape)
X, Y = np.meshgrid(x, y) # 將x這個圖片和y這個圖片拼接到一起
print('X,Y maps:', X.shape, Y.shape)
Z = himmelblau([X, Y])
fig = plt.figure('himmelblau')
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z) # 把x, y的坐標送入Z函數里面得到z的坐標
ax.view_init(60, -30)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.show()
# 找最小值--初始點不同找的也不同
# [1., 0.], [-4, 0.], [4, 0.]
x = torch.tensor([4., 0.], requires_grad=True) # 在這里不同的初始化權重更新的速率和最后得到的結果都不太同。所以說梯度下降法的初始化很關鍵
optimizer = torch.optim.Adam([x], lr=1e-3)
for step in range(20000):
pred = himmelblau(x) # x送進來得到預測值,目的是min這個預測值
optimizer.zero_grad() # 將梯度信息進行清零
pred.backward() # 生成x.grad和y.grad即:x和y的梯度信息
optimizer.step() # 將x,y的梯度進行更新
if step % 2000 == 0:
print('step {}: x = {}, f(x) = {}'
.format(step, x.tolist(), pred.item()))
Logistic Regression
Cross Entropy
熵
import torch
a = torch.full([4], 1/4)
print(a)
print(a*torch.log2(a))
print(-(a*torch.log2(a)).sum()) # tensor(2.) 熵越高代表越穩定,沒有驚喜度
b = torch.tensor([0.1, 0.1, 0.1, 0.7])
print(-(b*torch.log2(b)).sum()) # tensor(1.3568) higher uncertainty 驚喜度較高
c = torch.tensor([0.001, 0.001, 0.001, 0.999])
print(-(c*torch.log2(c)).sum()) # tensor(0.0313) 極度不穩定
numerical stability
import torch
from torch.nn import functional as F
x = torch.randn(1, 784)
w = torch.randn(10, 784)
logits = x@w.t()
print(logits.shape)
pred = F.softmax(logits, dim=1)
print(pred)
pred_log = torch.log(pred)
loss1 = F.nll_loss(pred_log, torch.tensor([3]))
print(loss1)
loss2 = F.cross_entropy(logits, torch.tensor([3])) # 這里使用logits, 因為cross_entropy = softmax + log + nll_loss (這三個操作一起)
print(loss2)
全連接層
import torch
import torch.nn as nn
x = torch.randn(1, 784) # torch.Size([1, 784])
print(x.shape)
layer1 = nn.Linear(784, 200) # 在這里第一個參數使ch-in 第二個參數是ch-out
layer2 = nn.Linear(200, 200)
layer3 = nn.Linear(200, 10)
x = layer1(x)
print(x.shape) # torch.Size([1, 200])
x = layer2(x)
print(x.shape) # torch.Size([1, 200])
x = layer3(x)
print(x.shape) # torch.Size([1, 10])
print(x)
nn.Relu vs F.relu
import torch
import torch.nn as nn
from torch.nn import functional as F
x = torch.randn(1, 10)
print(x.shape)
# 調用方式:類方法
class ML(nn.module):
def __init__(self):
super(ML, self).__init__()
self.model = nn.Sequential( # 構建模型
nn.Linear(784, 200),
nn.ReLU(inplace=True),
nn.Linear(200, 200),
nn.ReLU(inplace=True),
nn.Linear(200, 10),
nn.ReLU(inplace=True),
)
# 函數方法
x = F.relu(x, inplace=True)
GPU加速
device = torch.device('cuda:0') # 使用設備, 可以選擇將需要運算的搬到你需要的設備。
# 將需要加速的運算送進GPU
criteon = nn.CrossEntropyLoss().to(device) # 使用.to()方法會返回個inference,他的類型取決於原來的類型
計算准確的代碼
"""
計算准確度的代碼
"""
import torch
from torch.nn import functional as F
logits = torch.rand(4, 10)
pred = F.softmax(logits, dim=1)
print(pred)
pred_label = pred.argmax(dim=1) # 取最大值的下標
print(pred_label)
label = torch.tensor([9, 3, 2, 9])
correct = torch.eq(pred_label, label)
print(correct)
print(correct.sum().float().item()/4) # item()作用是得到里面的元素
Visdom可視化
"""
pytorch可視化需要:
方法一:
pip install tensorboardX
1. 需要開啟一個監聽的進程
方法二:Visdom
1. pip install visdom
2. python -m visdom.server (相當於開啟了一個web服務器,web服務器會把數據渲染到網頁上去)
可能會遇到的問題: ERROR:root:Error 404 while downloading https://unpkg.com/layout-bin-packer@1.4.0
解決方法: install form source(從github的facebookresearch/visdom下載)
步驟1: pip uninstall visdom
步驟2: 官網下載源代碼,之后cd進去目錄(進去visdom-master),之后運行pip install -e .
步驟3: 退回用戶目錄后再python -m visdom.server
步驟4:打開瀏覽器,輸入他給的地址
"""
# 測試:
from visdom import Visdom
viz = Visdom()
"""
{Y的值,X的值} win可以理解為ID(還有一個id叫做env(默認使用main)) opts是額外的配置信息
對於非image還是numpy數據,image數據是tensor
"""
# viz.line([0.], [0.], win='train_loss', opts=dict(title='train loss'))
# viz.line([loss.item()], [global_step], win='train_loss', update='append')
在訓練中
global_step += 1
viz.line([loss.item()], [global_step], win='train_loss', update='append')
在test中
# viz進行可視化
viz.line([[test_loss, correct / len(test_loader.dataset)]],
[global_step], win='test', update='append')
viz.images(data.view(-1, 1, 28, 28), win='x')
viz.text(str(pred.detach().cpu().numpy()), win='pred',
opts=dict(title='pred'))
正則化
optimizer = optim.SGD(net.parameters(), lr=learning_rate, weight_decay=0.01) # 這里weight_decay=0.01是指進行正則化,這里是2范數
Dropout
import torch
net_droped = torch.nn.Sequential(
torch.nn.Linear(784, 200),
torch.nn.Dropout(0.5), # drop 50% of the neuron (在兩層之間斷掉一些層)
torch.nn.ReLU(),
torch.nn.Linear(200, 200),
torch.nn.Dropout(0.5), # drop 50% of the neuron
torch.nn.ReLU(),
torch.nn.Linear(200, 10),
)
"""
在訓練是需要加上Dropout()
但是在test/val是不需要Dropout()
例如:
for epoch in range(epochs):
# train
net_dropped.train()
for batch_idx, (data, targt) in enumerate(train_loader):
...
net_dropped.eval() # 在測試是需要加上這句話去掉dropout
test_loss = 0
correct = 0
for data, target in test_loader:
"""
卷積神經網絡
import torch.nn as nn
import torch
from torch.nn import functional as F
# 第一個參數為input的chanel,第二個參數為kernel的數量,kernel_size=3*3 [1, 3, 26, 26]
layer = nn.Conv2d(1, 3, kernel_size=3, stride=1, padding=0)
x = torch.rand(1, 1, 28, 28)
out = layer.forward(x)
print(out.shape) # torch.Size([1, 3, 26, 26]) # 26 = (28-3)/1 + 1
layer = nn.Conv2d(1, 3, kernel_size=3, stride=1, padding=1)
out = layer.forward(x)
print(out.shape) # torch.Size([1, 3, 28, 28])
layer = nn.Conv2d(1, 3, kernel_size=3, stride=2, padding=1)
out = layer.forward(x)
print(out.shape) # torch.Size([1, 3, 14, 14])
out = layer(x) # 會自動進行,運用了python的魔術方法 __call__
print(out.shape) # torch.Size([1, 3, 14, 14])
print(layer.weight) # 查看layer的權重
print(layer.weight.shape) # torch.Size([3, 1, 3, 3])
print(layer.bias.shape) # torch.Size([3])
# F.conv2D
# 上面x = torch.rand(1, 1, 28, 28)
w = torch.rand(16, 3, 5, 5)
b = torch.rand(16)
# out = F.conv2d(x, w, b, stride=1, padding=1)
# print(out) # 報錯,一位x和w的chanels數對應不上
"""
RuntimeError: Given groups=1, weight of size 16 3 5 5, expected input[1, 1, 28, 28] to have 3 channels,
but got 1 channels instead
"""
x = torch.randn(1, 3, 28, 28)
out = F.conv2d(x, w, b, stride=1, padding=1)
print(out.shape) # torch.Size([1, 16, 26, 26])
out = F.conv2d(x, w, b, stride=2, padding=2)
print(out.shape) # torch.Size([1, 16, 14, 14])
池化層
"""
outline:
Pooling
upsample
Relu
"""
import torch
import torch.nn as nn
from torch.nn import functional as F
x = torch.randn(1, 16, 14, 14)
print(x.shape) # torch.Size([1, 16, 14, 14])
# 從nn中導入最大池化
layer = nn.MaxPool2d(2, stride=2)
out = layer(x)
print(out.shape) # torch.Size([1, 16, 7, 7]) (14-2)/2 + 1 = 7
# 使用F.的方式平均池化
out = F.avg_pool2d(x, 2, stride=2) # torch.Size([1, 16, 7, 7])
print(out.shape)
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++#
# upsample
# 采用F.interpolate
# interpolate: 是插值的意思
# +++++++++++++++++++++++++++++++++++++++++++++++++++++#
x = out
out = F.interpolate(x, scale_factor=2, mode='nearest') # 采用最近鄰采樣
print(out.shape) # torch.Size([1, 16, 14, 14])
out = F.interpolate(x, scale_factor=3, mode='nearest')
print(out.shape) # torch.Size([1, 16, 21, 21])
#------------------------------------------------#
# Relu激活函數
#
# ------------------------------------------------#
x = torch.randn(1, 16, 7, 7)
print(x.shape) # torch.Size([1, 16, 7, 7])
# 方法1:采用nn.的方式
layer = nn.ReLU(inplace=True) # inplace=True x--->x'(x'使用x內存空間)
out = layer(x)
print(out.shape) # torch.Size([1, 16, 7, 7])
# 方法2:采用F.的方式
out = F.relu(x)
print(out.shape) # torch.Size([1, 16, 7, 7])
BatchNorm
import torch
import torch.nn as nn
# ----------------------------#
# BatchNorm1d
# ----------------------------#
x = torch.randn(100, 16) + 0.5
print(x.shape)
layer = torch.nn.BatchNorm1d(16) # 這個必須與前面的匹配起來否則會報錯
print(layer.running_mean, layer.running_var)
"""
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
"""
out = layer(x)
print(layer.running_mean, layer.running_var)
"""
tensor([0.0452, 0.0446, 0.0516, 0.0671, 0.0644, 0.0622, 0.0514, 0.0449, 0.0520,
0.0546, 0.0461, 0.0620, 0.0332, 0.0450, 0.0384, 0.0580])
tensor([0.9868, 0.9935, 1.0214, 1.0137, 1.0009, 0.9895, 1.0065, 1.0319, 0.9841,
1.0051, 0.9967, 0.9968, 1.0045, 0.9877, 1.0011, 1.0031])
"""
#----------------------------------------#
# 這里的分布服從於 U(0.5, 1)
#
# ---------------------------------------#
x = torch.randn(100, 16) + 0.5
layer = torch.nn.BatchNorm1d(16)
for i in range(5): # 疑問????????,每循環一次經過一次batchnorm
out = layer(x)
print(layer.running_mean, layer.running_var)
# ---------------------------#
# nn.BatchNorm2d
# ---------------------------#
x = torch.rand(1, 16, 7, 7)
print(x.shape)
layer = nn.BatchNorm2d(16)
out = layer(x)
print(out.shape) # torch.Size([1, 16, 7, 7])
print(layer.weight)
"""
這里的weight,bias更權重的那個不太一樣
"""
print(layer.weight.shape) # torch.Size([16])
print(layer.bias.shape) # torch.Size([16])
# -----------------------------------#
# class variables
# -----------------------------------#
print(vars(layer))
# ------------------------------------#
# Test
# ------------------------------------#
layer.eval() # 加這行表示現在是在test階段
out = layer(x)
print(vars(layer))
nn.Module
import torch
from torch import nn
from torch import optim
# -----------------------------------#
# 使用nn.Module的好處
# 1. 所有的常用的方法都在里面,比如: Linear/Relu/Sigmoid等
# 2. 使用nn.Sequential()容器[sequential是串行的意思], 不管是nn.Module中的還是自己寫的都可以在這里使用
# 3. nn.Module可以自動管理parameters
# 4. modules: all nodes / children: direct children
# 5. to(device) (第84行)
# 6. save and load(第90行)
# 7. train/test的方便的切換(第87行)
# 8. implement own layer 實現自己的類(第31 / 第41 行) 只有class才能寫到nn.Sequential里面去[第48行]
# -----------------------------------#
class MyLinear(nn.Module):
def __init__(self, inp, outp):
super(MyLinear, self).__init__()
# requires_grad = True
self.w = nn.Parameter(torch.randn(outp, inp)) # nn.Parameter會自動地將torch.tensor通過nn.Parameter加到nn.parameter()里面去
self.b = nn.Parameter(torch.randn(outp))
def forward(self, x):
x = x @ self.w.t() + self.b
return x
class Flatten(nn.Module): # 將所有的打平
def __init__(self):
super(Flatten, self).__init__()
def forward(self, input):
return input.view(input.size(0), -1) # -1表示將其他所有的打平
class TestNet(nn.Module):
def __init__(self):
super(TestNet, self).__init__()
self.net = nn.Sequential(nn.Conv2d(1, 16, stride=1, padding=1),
nn.MaxPool2d(2, 2),
Flatten(), # 實現自己的類,里面只能寫類
nn.Linear(1*14*14, 10))
def forward(self, x):
return self.net(x)
class BasicNet(nn.Module):
def __init__(self):
super(BasicNet, self).__init__()
self.net = nn.Linear(4, 3)
def forward(self, x):
return self.net(x)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 使用nn.Sequential()容器[sequential是串行的意思], 不管是nn.Module中的還是自己寫的都可以在這里使用
self.net = nn.Sequential(BasicNet(),
nn.ReLU(),
nn.Linear(3, 2))
def forward(self, x):
return self.net(x)
def main():
device = torch.device('cuda')
net = Net()
net.to(device) # .to()會返回net引用(和原來的net引用一樣) --->但是對於tensor操作來說不是這樣的
# train
net.train()
# test
net.eval()
# net.load_state_dict(torch.load('ckpt.mdl')) # 在開始的時候要加載模型
#
#
# torch.save(net.state_dict(), 'ckpt.mdl') # 在模型斷電或者中斷保存模型的當前狀態
for name, t in net.named_parameters():
print('parameters:', name, t.shape) # 打印里面地parameters:權重和bias
for name, m in net.named_children(): # 打印net Sequential的類
print('children:', name, m)
for name, m in net.named_modules():
print('modules:', name, m)
if __name__ == '__main__':
main()
數據增強
# Data argumentation
# ---------------------------------------#
# 這些操作在torchvision包里面
# 1. Flip:翻轉
# 2. Rotate
# 3. Random Move & Crop
# 4. GAN : 生成更多的樣本
# 5. Noise: N(0, 0.001)加高斯白噪聲
# ---------------------------------------#
batch_size=200
learning_rate=0.01
epochs=10
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([ # Compose的操作類似於nn.Sequential里面
transforms.RandomHorizontalFlip(), # 水平角度的翻轉 (隨機翻轉-可能翻轉也有可能不翻轉)
transforms.RandomVerticalFlip(), # 垂直方向
transforms.RandomRotation(15), # 旋轉方向,參數為旋轉的度數
transforms.RandomRotation([90, 180, 270]), # 隨機的從90度180度270度中挑一個角度旋轉
transforms.Resize([32, 32]), # 傳入的參數為list
transforms.RandomCrop([28, 28]), # 裁剪
transforms.ToTensor(),
# transforms.Normalize((0.1307,), (0.3081,))
])), # x 轉換成x'
batch_size=batch_size, shuffle=True)
Cifar-10與ResNet18實戰
resnet.py
import torch
from torch import nn
from torch.nn import functional as F # 這里F和nn經常是交叉使用的
class ResBlk(nn.Module):
"""
resnet block:這里是resnet的一個基本模塊
"""
def __init__(self, ch_in, ch_out, stride=1):
"""
:param ch_in:
:param ch_out:
"""
super(ResBlk, self).__init__()
# we add stride support for resbok, which is distinct from tutorials.
self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=stride, padding=1)
self.bn1 = nn.BatchNorm2d(ch_out)
self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2d(ch_out)
self.extra = nn.Sequential() # nn.Sequential()本來是空的
if ch_out != ch_in: # 如果不相等就把他的ch_in變成ch_out, 也就是說:他這個是resnet的旁邊短接線
# [b, ch_in, h, w] => [b, ch_out, h, w]
self.extra = nn.Sequential(
nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=stride),
nn.BatchNorm2d(ch_out)
)
# -------------------------------#
# 疑問: python在實例化的時候為啥不用調用forward函數?
# 因為pytorch在nn.modules中使用了__call__,里面實現了forward方法
# 只要實例化對象就會自動調用__call__,當自己又沒有__call__方法,所以調用父類方法,由於子類重寫了forward方法
# 所以優先調用子類的forward方法
# -------------------------------#
def forward(self, x):
"""
:param x: [b, ch, h, w]
:return:
"""
out = F.relu(self.bn1(self.conv1(x))) # 這里經過卷積層,BN層, 然后經過relu層
out = self.bn2(self.conv2(out)) # 這里經過卷積層,BN層
# short cut. # 這里是短接
# extra module: [b, ch_in, h, w] => [b, ch_out, h, w]
out = self.extra(x) + out # element-wise add:
out = F.relu(out) # 最后再經過relu層輸出
print('這里打印下out看看', out.shape)
return out
class ResNet18(nn.Module):
def __init__(self):
super(ResNet18, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=3, padding=0),
nn.BatchNorm2d(64)
)
# followed 4 blocks
# [b, 64, h, w] => [b, 128, h ,w] # 注意這里h,w是變化的
self.blk1 = ResBlk(64, 128, stride=2)
# [b, 128, h, w] => [b, 256, h, w]
self.blk2 = ResBlk(128, 256, stride=2)
# # [b, 256, h, w] => [b, 512, h, w]
self.blk3 = ResBlk(256, 512, stride=2)
# # [b, 512, h, w] => [b, 1024, h, w]
self.blk4 = ResBlk(512, 512, stride=2) # 這里視頻是self.blk4 = ResBlk(512, 1024)
self.outlayer = nn.Linear(512*1*1, 10) # 最后再跟一個全連接層
def forward(self, x):
"""
:param x:
:return:
"""
x = F.relu(self.conv1(x)) # 先經過一個卷積層,后面再跟一個relu函數, 經過后x.shape = [128, 64, 10, 10]
# [b, 64, h, w] => [b, 1024, h, w]
x = self.blk1(x) # 經過這層后x.shape = torch.Size([128, 128, 5, 5])
x = self.blk2(x) # 經過這層后x.shape = torch.Size([128, 256, 3, 3])
x = self.blk3(x) # 經過這層后x.shape = torch.Size([128, 512, 2, 2])
x = self.blk4(x) # 經過這層后x.shape = torch.Size([128, 512, 2, 2])
# print('after conv:', x.shape) #[b, 512, 2, 2]
# [b, 512, h, w] => [b, 512, 1, 1]
x = F.adaptive_avg_pool2d(x, [1, 1])
# print('after pool:', x.shape)
x = x.view(x.size(0), -1) # 經過這層后x.shape = torch.Size([128, 512]) x.size(0) = 128
x = self.outlayer(x) # 經過一個全連接層 經過這層后x.shape = torch.Size([128, 10])
return x
def main():
# ResBlk
blk = ResBlk(64, 128, stride=4)
tmp = torch.randn(2, 64, 32, 32)
out = blk(tmp)
print('block:', out.shape) # block: torch.Size([2, 128, 8, 8])
# ResNet18
x = torch.randn(2, 3, 32, 32)
model = ResNet18()
out = model(x)
print('resnet:', out.shape) # resnet: torch.Size([2, 10])
if __name__ == '__main__':
main()
# ---------------ResNet18模型----------------------------#
"""
ResNet18(
(conv1): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(3, 3))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(blk1): ResBlk(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(extra): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(blk2): ResBlk(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(extra): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(blk3): ResBlk(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(extra): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(blk4): ResBlk(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(extra): Sequential()
)
(outlayer): Linear(in_features=512, out_features=10, bias=True)
)
"""
# ----------------------------------------------------------------------#
main.py
import torch
from torch.utils.data import DataLoader # DataLoader是為了能夠批量加載數據
from torchvision import datasets # 從torchvision中導入數據集
from torchvision import transforms
from torch import nn, optim
from lenet5 import Lenet5
from resnet import ResNet18
def main():
batchsz = 128 # 這里是batch-size
# torchvision中提供一些已有的數據集 # 第一個參數:自定目錄,第二個參數:Train=True, transform:對數據做些變換
cifar_train = datasets.CIFAR10('cifar', True, transform=transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
]), download=False) # download=True:可以自動的download
cifar_train = DataLoader(cifar_train, batch_size=batchsz, shuffle=True) # Dataloader:方便一次加載多個. shuffle:加載的時候隨機換一下
cifar_test = datasets.CIFAR10('cifar', False, transform=transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
]), download=False)
cifar_test = DataLoader(cifar_test, batch_size=batchsz, shuffle=True)
x, label = iter(cifar_train).next()
print('x:', x.shape, 'label:', label.shape) # x: torch.Size([128, 3, 32, 32]) label: torch.Size([128])
device = torch.device('cuda') # 后面可以使用GPU計算
# model = Lenet5().to(device)
model = ResNet18().to(device)
criteon = nn.CrossEntropyLoss().to(device) # loss函數他包含softmax, 因為是分類任務所以采用crossentropy
optimizer = optim.Adam(model.parameters(), lr=1e-3) # 優化器把網絡里的參數傳給他
print(model)
for epoch in range(1000):
model.train() # 模型為train模式
for batchidx, (x, label) in enumerate(cifar_train): # 從每個epoch里的batch_size
# [b, 3, 32, 32]
# [b]
x, label = x.to(device), label.to(device) # 轉換到cuda上面來
logits = model(x) # 他與predict的區別是是否經過softmax操作
# logits: [b, 10]
# label: [b] # label不需要probality
# loss: tensor scalar # 長度為0的標量
loss = criteon(logits, label) # 這個label是y
# backprop
optimizer.zero_grad() # 如果不清0就是累加的效果
loss.backward()
optimizer.step() # 更新weight,更新的weight寫進optimizer里面
print(epoch, 'loss:', loss.item()) # 對於標量,使用item()把他轉換成Numpy
# test
model.eval() # 模型為test模式
with torch.no_grad(): # 這一步是告訴不需要構建梯度(不需要構建圖)
# test
total_correct = 0 # 正確的數量
total_num = 0 # 總的數量
for x, label in cifar_test:
# [b, 3, 32, 32]
# [b]
x, label = x.to(device), label.to(device)
# [b, 10]
logits = model(x)
# [b]
pred = logits.argmax(dim=1)
# [b] vs [b] => scalar tensor
correct = torch.eq(pred, label).float().sum().item()
total_correct += correct
total_num += x.size(0)
# print(correct)
acc = total_correct / total_num
print(epoch, 'test acc:', acc)
if __name__ == '__main__':
main()
數據集格式
-- cifar
--cifar-10-batches-py
--batches.meta
--data_batch_1
--data_batch_2
--data_batch_3
--data_batch_4
--data_batch_5
--readme.html
--test_batch