1. 基本數據結構
和pytorch等中的tensor類似,mxnet中的ndarray或者nd,用來操作矩陣或者張量數據。基本操作類似於Numpy, 支持計算,索引等。
創建矩陣
from mxnet import nd #或者 from mxnet import ndarray as nd #創建矩陣 x1 = nd.array([[1, 2,], [3, 4]]) x2 = nd.random.uniform(1, 10, shape=(3, 3)) #3*3的矩陣 x3 = nd.random.randn(2,3) #2*3 的矩陣 x4 = nd.random.randint(1, 10, shape=(2, 3)) #2*3 的矩陣 x5 = nd.ones(shape=(2, 2)) #2*2 的矩陣 x6 = nd.full(shape=(2, 3), val=2) #2*3 的矩陣, 值為2 print(x1.shape, x1.size, x1.dtype) #(2, 2) 4 <class 'numpy.float32'>
操作矩陣
x = nd.random.randn(2, 3) y = nd.random.randn(2, 3) print(y.exp()) # 2*3 的矩陣 print(x*y) # 2*3 的矩陣 print(nd.dot(x, y.T)) # 2*2 的矩陣
#和numpy相互轉換
a = y.asnumpy()
print(a)
a = nd.array(np.ones((2, 3)))
print(a)
矩陣索引
y = nd.random.randint(1, 10, shape=(3, 3)) print(y[1, 2]) # [5] print(y[:, 1:3]) # 3*2 y[:,1:3] = 2 #賦值 y[1:2,0:2] = 4 #賦值 print(y)
2. 創建神經網絡
mxnet中gluon包中包含神經網絡創建中的相關操作,和pytorch類似,可以繼承block來創建神經網絡,只需定義網絡結構和實現前向傳播函數。
方式一: 繼承nn.Block
class MyNet(nn.Block): def __init__(self): super(MyNet, self).__init__() self.features = nn.Sequential() self.features.add( nn.Conv2D(channels=16, kernel_size=5, strides=(1, 1), padding=(0, 0), activation="relu"), #和pytorch不同之處:不需要設置輸入通道數,可以設置激活函數 nn.MaxPool2D(pool_size=(2, 2), stides=2, padding=0), nn.Conv2D(channels=32, kernel_size=3, strides=(1, 1), padding=(0, 0), activation="relu"), nn.MaxPool2D(pool_size=(2, 2), stides=2, padding=0), ) self.fc = nn.Sequential() self.fc.add( nn.Dense(units=120, activation="relu"), #和pytorch不同之處:不需要設置輸入向量的大小,可以設置激活函數 nn.Dense(units=84, activation="relu"), nn.Dense(units=10) ) def forward(self, x): x = self.features(x) x = self.fc(x) return x
net = MyNet()
net.initialize() # 網絡內部的參數必須先進行初始化 (pytorch中需要逐層進行初始化)
x = nd.random.uniform(shape=(1, 3, 300, 300))
print(net(x))
方式二:直接利用nn.Sequential
net = nn.Sequential() net.add( nn.Conv2D(channels=16, kernel_size=5, strides=(1, 1), padding=(0, 0), activation="relu"), # 和pytorch不同之處:不需要設置輸入通道數,可以設置激活函數 nn.MaxPool2D(pool_size=(2, 2), strides=2, padding=0), nn.Conv2D(channels=32, kernel_size=3, strides=(1, 1), padding=(0, 0), activation="relu"), nn.MaxPool2D(pool_size=(2, 2), strides=2, padding=0), nn.Dense(units=120, activation="relu"), # 和pytorch不同之處:不需要設置輸入向量的大小,可以設置激活函數 nn.Dense(units=84, activation="relu"), nn.Dense(units=10) ) net.initialize() # 網絡內部的參數必須先進行初始化 (pytorch中需要逐層進行初始化) x = nd.random.uniform(shape=(1, 3, 300, 300)) print(net(x))
3. 神經網絡訓練
梯度反向傳播,mxnet會自動求導,需要利用mxnet的autograd,如下:
from mxnet import nd from mxnet import autograd x = nd.array([[1, 2], [3, 4]]) x.attach_grad() #1. 聲明存儲導數的地方 with autograd.record(): #2. 該上下文中的過程,反向傳播時會自動求導 y = 2*x*x y.backward() #3. 反向傳播; 會自動求和再計算導數,相當於y.sum().backward() print(x.grad) #4. 取導數值
3.1. 加載數據
自己加載數據,主要需要繼承mxnet.gluon.data.Dataset,然后傳遞給mxnet.gluon.data.DataLoader。有幾個坑:
A. Dataset返回img和label, label不能為字符串格式
B. Dataloader中的num_workers設置大於0時, 對於windows系統,由於采用多進程,需要寫在__main__中;若還是報錯時,num_workers改為0
#coding:utf-8 import mxnet as mx from mxnet import gluon from mxnet.gluon.data import Dataset, DataLoader from mxnet.gluon.data.vision import transforms import os import cv2 #1.繼承mxnet.gluon.data.Dataset, 實現__len__和__getitem__(返回每張圖片和標注) class MyDataset(Dataset): def __init__(self, img_root, anno_file): assert os.path.exists(anno_file), print("Annotation file {} not exist".format(anno_file)) self.img_root = img_root self.anno_file = anno_file with open(anno_file, "r", encoding="utf-8") as f: lines = f.readlines() self.items = [line.strip().split() for line in lines if line.strip()] def __len__(self): return len(self.items) def __getitem__(self, x): img_name, label = self.items[x] img_path = os.path.join(self.img_root, img_name) assert os.path.exists(img_path), print("img_file {} does not exist".format(img_path)) img = mx.image.imread(img_path) return img, label #注意此處label為字符串會報錯 if __name__ == "__main__": #2. 將dataset傳入mxnet.gluon.data.Dataloader img_root = r"D:\data\synthtext" anno_file = r"D:\data\synthtext\labels.txt" dataset = MyDataset(img_root, anno_file) transformer = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) # dataset.transform_first(transformer), 對圖片進行增強(即對__getitem__返回的第一項進行處理) train_data = DataLoader(dataset.transform_first(transformer), batch_size=2, shuffle=True, num_workers=0) print(train_data) for img, label in train_data: print(label) print(img.shape)
3.2 定義網絡
見文章上面第二點
3.3 定義損失函數
gluon.loss包含了部分常用的Loss,如下:
loss = gluon.loss.SoftmaxCrossEntropyLoss() #交叉熵損失函數 loss = gluon.loss.L2Loss() #均方差損失函數 loss = gluon.loss.CTCLoss() # CTC損失函數 loss = gluon.loss.L1Loss() # L1 損失函數 #位找到smoothL1,發現兩個相關的 mx.nd.smooth_l1(); mx.metric.Loss("SmoothL1")
_all__ = ['Loss', 'L2Loss', 'L1Loss',
'SigmoidBinaryCrossEntropyLoss', 'SigmoidBCELoss',
'SoftmaxCrossEntropyLoss', 'SoftmaxCELoss',
'KLDivLoss', 'CTCLoss', 'HuberLoss', 'HingeLoss',
'SquaredHingeLoss', 'LogisticLoss', 'TripletLoss', 'PoissonNLLLoss', 'CosineEmbeddingLoss']
3.4 定義優化器
優化器定義在gluon.Trainer() ,第一個參數params為網絡參數,第二個參數optimizer為優化器的名字,第三個參數optimizer_params為傳給優化器的參數
支持的optimizer如下:
__all__ = [ 'AdaDelta', 'AdaGrad', 'Adam', 'Adamax', 'DCASGD', 'FTML', 'Ftrl', 'LBSGD', 'NAG', 'NDabs', 'Nadam', 'Optimizer', 'RMSProp', 'SGD', 'SGLD', 'Signum', 'Test', 'ccSGD', ]
共同支持的optimizer_params如下: (不同優化器還有其特定的參數)

Parameters rescale_grad (float, optional, default 1.0) – Multiply the gradient with rescale_grad before updating. Often choose to be 1.0/batch_size. param_idx2name (dict from int to string, optional, default None) – A dictionary that maps int index to string name. clip_gradient (float, optional, default None) – Clip the gradient by projecting onto the box [-clip_gradient, clip_gradient]. learning_rate (float) – The initial learning rate. If None, the optimization will use the learning rate from lr_scheduler. If not None, it will overwrite the learning rate in lr_scheduler. If None and lr_scheduler is also None, then it will be set to 0.01 by default. lr_scheduler (LRScheduler, optional, default None) – The learning rate scheduler. wd (float, optional, default 0.0) – The weight decay (or L2 regularization) coefficient. Modifies objective by adding a penalty for having large weights. sym (Symbol, optional, default None) – The Symbol this optimizer is applying to. begin_num_update (int, optional, default 0) – The initial number of updates. multi_precision (bool, optional, default False) – Flag to control the internal precision of the optimizer. False: results in using the same precision as the weights (default), True: makes internal 32-bit copy of the weights and applies gradients in 32-bit precision even if actual weights used in the model have lower precision. Turning this on can improve convergence and accuracy when training with float16. param_dict (dict of int -> gluon.Parameter, default None) – Dictionary of parameter index to gluon.Parameter, used to lookup parameter attributes such as lr_mult, wd_mult, etc. param_dict shall not be deep copied. aggregate_num (int, optional, default None) – Number of weights to be aggregated in a list. They are passed to the optimizer for a single optimization step. In default, only one weight is aggregated. When aggregate_num is set to numpy.inf, all the weights are aggregated. use_fused_step (bool, optional, default None) – Whether or not to use fused kernels for optimizer. When use_fused_step=False, step is called, otherwise, fused_step is called. Properties – ---------- – learning_rate – The current learning rate of the optimizer. Given an Optimizer object optimizer, its learning rate can be accessed as optimizer.learning_rate.
常用優化器使用如下:
#優化器 #1.動量法 gluon.Trainer(params=net.collect_params(), optimizer="SGD", optimizer_params={"learning_rate":0.001, "wd":0.00005, "momentum":0.9}) #2. 自適應 #AdaGrad gluon.Trainer(params=net.collect_params(), optimizer="AdaGrad", optimizer_params={"learning_rate":0.001, "wd":0.00005,}) #RMSProp gluon.Trainer(params=net.collect_params(), optimizer="RMSProp", optimizer_params={"learning_rate": 0.001, "wd": 0.00005, "momentum":0.9}) #Adam gluon.Trainer(params=net.collect_params(), optimizer="RMSProp", optimizer_params={"learning_rate": 0.001, "wd": 0.00005})
3.5 模型訓練
for epoch in range(10): train_loss, train_acc, valid_acc = 0., 0., 0. tic = time.time() for data, label in train_data: # forward + backward with autograd.record(): output = net(data) loss = softmax_cross_entropy(output, label) loss.backward() # update parameters trainer.step(batch_size) # calculate training metrics train_loss += loss.mean().asscalar() train_acc += acc(output, label) # calculate validation accuracy for data, label in valid_data: valid_acc += acc(net(data), label) print("Epoch %d: loss %.3f, train acc %.3f, test acc %.3f, in %.1f sec" % ( epoch, train_loss / len(train_data), train_acc / len(train_data), valid_acc / len(valid_data), time.time() - tic))
4. 網絡參數保存和加載
Block 只能保存網絡參數,如下:
net = nn.Sequential()
net.add(
nn.Conv2D(channels=16, kernel_size=5, strides=(1, 1),
padding=(0, 0), activation="relu"), # 和pytorch不同之處:不需要設置輸入通道數,可以設置激活函數
nn.MaxPool2D(pool_size=(2, 2), strides=2, padding=0),
nn.Conv2D(channels=32, kernel_size=3, strides=(1, 1),
padding=(0, 0), activation="relu"),
nn.MaxPool2D(pool_size=(2, 2), strides=2, padding=0),
nn.Dense(units=120, activation="relu"), # 和pytorch不同之處:不需要設置輸入向量的大小,可以設置激活函數
nn.Dense(units=84, activation="relu"),
nn.Dense(units=10)
)
#1 保存網絡權重參數 net.save_parameters("checkpoint.params") #2 加載權重參數 net.load_parameters("checkpoint.params", ctx=None, allow_missing=False, ignore_extra=False, cast_dtype=False, dtype_source='current') ctx: 默認為Cpu allow_missing: True時表示:網絡結構中存在, 參數文件中不存在參數,不加載 ignore_extra: True時表示: 參數文件中存在,網絡結構中不存在的參數,不加載
HybridBlock可以向Block一樣保存網絡參數,也可以同時保存網絡結構和網絡參數, 如下:
net = nn.HybridSequential() net.add( nn.Conv2D(channels=16, kernel_size=5, strides=(1, 1), padding=(0, 0), activation="relu"), # 和pytorch不同之處:不需要設置輸入通道數,可以設置激活函數 nn.MaxPool2D(pool_size=(2, 2), strides=2, padding=0), nn.Conv2D(channels=32, kernel_size=3, strides=(1, 1), padding=(0, 0), activation="relu"), nn.MaxPool2D(pool_size=(2, 2), strides=2, padding=0), nn.Dense(units=120, activation="relu"), # 和pytorch不同之處:不需要設置輸入向量的大小,可以設置激活函數 nn.Dense(units=84, activation="relu"), nn.Dense(units=10) ) # 1.對於HybridBlock, 可以同時保存網絡結構和權重參數 #首先要進行hybridize()和一次前向傳播,才能進行export net.initialize() net.hybridize() x = mx.nd.zeros((1, 3, 100, 100)) print(net(x)) net.export(path="./checkpoint", epoch=1) #同時生成checkpoint-0001.params 和 checkpoint-symbol.json # # net.save_parameters("./checkpoint.params")
#2. 加載export的網絡結構(json)和權重參數(params)
#或者mx.SymbolBlock.imports()
net = gluon.SymbolBlock.imports(symbol_file="./checkpoint-symbol.json",
input_names=["data"],
param_file="./checkpoint-0100.params",
ctx=mx.cpu())
net.hybridize()
x = mx.nd.zeros((1, 3, 100, 100))
print(net(x))
# net = mx.mod.Module.load(prefix="./checkpoint", epoch=100)
5. 使用GPU
在進行訓練和計算時網絡參數和數據必須在同一環境下,同在CPU或同在GPU,采用GPU計算矩陣時能加速運算;可以在GPU上操作數據和網絡,如下:
數據:可以在GPU上創建數據,也可以在CPU上創建數據,載移動到GPU
#1. 在GPU上創建數據,或者將數據從cpu移動到GPU #GPU創建 x = mx.nd.zeros((1, 3, 100, 100), ctx=mx.gpu(0)) print(x) #cpu創建,復制一份到GPU x = mx.nd.zeros((1, 3, 100, 100)) x = x.copyto(mx.gpu(0)) print(x) # cpu創建,復制一份移動到GPU x = mx.nd.zeros((1, 3, 100, 100)) x = x.as_in_context(mx.gpu(0)) print(x)
網絡:可以在GPU上加載網絡參數,或者在CPU上加載,隨后移動到GPU
#2.在GPU上加載網絡參數,或者將網絡參數移動到GPU net = nn.Sequential() net.add( nn.Conv2D(channels=16, kernel_size=3, strides=1, padding=1), nn.Dense(18) ) #GPU上初始化參數 net.initialize(init=mx.init.Xavier(), ctx=mx.gpu(0)) net.load_parameters("./checkpoint.params", ctx=mx.gpu(0)) # #CPU上初始化參數,移動到GPU net.initialize(init=mx.init.Xavier()) net.collect_params().reset_ctx(mx.gpu())
https://github.com/apache/incubator-mxnet
https://zhuanlan.zhihu.com/p/39420301
http://mxnet.incubator.apache.org/