http://blog.csdn.net/zzzzzzz0407/article/details/69831388
网上有许多FCN网络的安装和训练教程,但却没有代码解读的详细教程,这让我这种刚刚入门深度学习的萌新不知所措;为了弄清楚FCN,不知走了多少弯路,想把它记录下来,给自己看看,也希望能帮助到那些和我一样刚刚入门的人。
以下只是小弟的一些拙见,若有错误与不足欢迎指出~
话不多说,上代码:
首先是solve.py文件,fcn没有用sh脚本去写训练文件,应该是和他自己写的surgery.py和score.py有关;里面有两个问题我希望有大牛可以帮忙解决:
1. for _ in range(50):
solver.step(2000)
发现设置完这个以后solver.prototxt中设置的 max_iter失效;
2. score.seg_tests(solver, False, test, layer=’score_sem’, gt=’sem’)
score.seg_tests(solver, False, test, layer=’score_geo’, gt=’geo’)
语义分割和几何分割的解读
#solve.py import caffe import surgery, score import numpy as np import os #os模块封装了操作系统的目录和文件操作 import sys try: import setproctitle setproctitle.setproctitle(os.path.basename(os.getcwd()#获得当前路径)#返回最后的文件名) #比如os.getcwd()获得的当前路径为/home/zhangrf/fcn,则os.path.basename()为fcn; #setproctitle是用来修改进程入口名称,如C++中入口为main()函数 except: pass weights = '../ilsvrc-nets/vgg16-fcn.caffemodel' #用来fine-tune的FCN参数 vgg_weights = '../ilsvrc-nets/vgg16-fcn.caffemodel' #训练好的VGGNet参数 vgg_proto = '../ilsvrc-nets/VGG_ILSVRC_16_layers_deploy.prototxt' #VGGNet模型 # init #caffe.set_device(int(sys.argv[1])) #获取命令行参数,其中sys.argv[0]为文件名,argv[1]为紧随其后的那个参数 caffe.set_device(0) #GPU型号 caffe.set_mode_gpu() #用GPU模式运行 #solver.net.copy_from(weights) solver = caffe.SGDSolver('solver.prototxt') #调用SGD(随即梯度下降)Solver方法,solver.prototxt为所需参数 vgg_net = caffe.Net(vgg_proto,vgg_weights,caffe.TRAIN) #vgg_net是原来的VGGNet模型(包括训练好的参数) surgery.transplant(solver.net,vgg_net) #FCN模型(参数)与原来的VGGNet模型之间的转化 del vgg_net #删除VGGNet模型 # surgeries interp_layers = [k for k in solver.net.params.keys() if 'up' in k] #interp_layers为upscore层 surgery.interp(solver.net, interp_layers) #将upscore层中每层的权重初始化为双线性内核插值。 # scoring test = np.loadtxt('../data/sift-flow/test.txt', dtype=str) #载入测试图片信息 for _ in range(50): solver.step(2000) #每2000次训练迭代执行后面的函数 # N.B. metrics on the semantic labels are off b.c. of missing classes; # score manually from the histogram instead for proper evaluation score.seg_tests(solver, False, test, layer='score_sem', gt='sem') #测试图片语义特征 score.seg_tests(solver, False, test, layer='score_geo', gt='geo') #测试图片几何特征
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
surgery.py是精髓,全连接层参数到全卷积层的参数转化是靠它完成的,每一步都很巧妙,如沐春风~
#surgery.py from __future__ import division #导入python未来支持的语言特征division(精确除法) import caffe #导入caffe import numpy as np #导入模块numpy并以np作为别名 def transplant(new_net#FCN, net#VGGNet, suffix=''): #用于将VGGNet的参数转化给FCN(包括全连接层的参数) """ Transfer weights by copying matching parameters, coercing parameters of incompatible shape, and dropping unmatched parameters. 通过复制匹配的参数,强制转换不兼容形状的参数和丢弃不匹配的参数来达到传输(转化)权重的目的; The coercion is useful to convert fully connected layers to their equivalent convolutional layers, since the weights are the same and only the shapes are different. 因为权重的个数是一样的仅仅是Blob的形状不一样,所以强制转换对于将全连接层转换为等效的卷积层是有用的; In particular, equivalent fully connected and convolution layers have shapes O x I and O x I x H x W respectively for O outputs channels, I input channels, H kernel height, and W kernel width. 参数数量为O*I*H*W Both `net` to `new_net` arguments must be instantiated `caffe.Net`s. 参数一对一 """ for p in net.params: #net.params是字典形式,存放了所有的key-value,p为key p_new = p + suffix #将p赋给p_new if p_new not in new_net.params: #用来丢弃fc8(因为FCN中没有fc8) print 'dropping', p continue for i in range(len(net.params[p])): if i > (len(new_net.params[p_new]) - 1): #感觉没啥用? print 'dropping', p, i break if net.params[p][i].data.shape!= new_net.params[p_new][i].data.shape: #Blob不一样转换(这边就是全连接层和卷积层的转换,很精髓!!!) print 'coercing', p, i, 'from', net.params[p][i].data.shape, 'to', new_net.params[p_new][i].data.shape else: #形状一样则直接copy print 'copying', p, ' -> ', p_new, i new_net.params[p_new][i].data.flat = net.params[p][i].data.flat #将参数按顺序赋值(flat函数只要保证参数个数相同,不用保证数组形状完全一样) def upsample_filt(size): """ Make a 2D bilinear kernel suitable for upsampling of the given (h, w) size. 上采样卷积核的制作 """ factor = (size + 1) // 2 if size % 2 == 1: center = factor - 1 else: center = factor - 0.5 og = np.ogrid[:size, :size] #生成一列向量和一行向量 return (1 - abs(og[0] - center) / factor) * \ #(64*1)的列向量和(1*64)行向量相乘则得到一个64*64的数组 (1 - abs(og[1] - center) / factor) def interp(net, layers): """ Set weights of each layer in layers to bilinear kernels for interpolation. 将每一层的权重设置为双线性内核插值。 """ for l in layers: m, k, h, w = net.params[l][0].data.shape if m != k and k != 1: print 'input + output channels need to be the same or |output| == 1' raise if h != w: print 'filters need to be square' raise filt = upsample_filt(h) #初始化卷积核的参数(64*64*1) net.params[l][0].data[range(m), range(k), :, :] = filt #这边很关键!!!只有对于对应层的那层filter有参数,其余都为0,而且有filter参数的那层还都是相等的~ #因为前一层已经是个分类器了,对分类器进行特征组合没有任何意义!所以这一层的上采样效果上而言只是对应的上采样(属于猴子还是属于猴子) def expand_score(new_net, new_layer, net, layer):#这个函数干啥用的没看懂- -貌似solve.py里没有这个函数的调用 """ Transplant an old score layer's parameters, with k < k' classes, into a new score layer with k classes s.t. the first k' are the old classes. """ old_cl = net.params[layer][0].num new_net.params[new_layer][0].data[:old_cl][...] = net.params[layer][0].data new_net.params[new_layer][1].data[0,0,0,:old_cl][...] = net.params[layer][1].data
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
score.py是评估文件,fcn不用caffe框架自带的test测试,而是自己写了个评估文件,里面有许多评估指标。不过小弟才疏学浅,对于很多评估指标的概念完全没接触过,比方说IU的概念,overall accuracy和mean accuracy的区别,希望有大牛可以科普,这部分由于对概念的不知以及数据量的巨大,没有很好的解读~实在有愧
#score.py from __future__ import division import caffe import numpy as np import os import sys from datetime import datetime from PIL import Image def fast_hist(a, b, n): k = (a >= 0) & (a < n) return np.bincount(n * a[k].astype(int) + b[k], minlength=n**2).reshape(n, n) def compute_hist(net, save_dir#False, dataset, layer='score', gt='label'): n_cl = net.blobs[layer].channels #3通道的图 if save_dir: os.mkdir(save_dir) hist = np.zeros((n_cl, n_cl)) #创建一个二维数组hist[3][3],元素都为0 loss = 0 for idx in dataset: net.forward() hist += fast_hist(net.blobs[gt].data[0, 0].flatten() #将数据拉为1列, net.blobs[layer].data[0].argmax(0).flatten(), n_cl) if save_dir: #是否需要保存图片 im = Image.fromarray(net.blobs[layer].data[0].argmax(0).astype(np.uint8), mode='P') im.save(os.path.join(save_dir, idx + '.png')) # compute the loss as well loss += net.blobs['loss'].data.flat[0] return hist, loss / len(dataset) def seg_tests(solver#配置文件, save_format#False, dataset#test文件, layer='score'#实验输出, gt='label'#真实输出): print '>>>', datetime.now(), 'Begin seg tests' solver.test_nets[0].share_with(solver.net) #将solver.net复制给solver.test_net[0] do_seg_tests(solver.test_nets[0], solver.iter, save_format, dataset, layer, gt) def do_seg_tests(net, iter#累计迭代次数, save_format, dataset, layer='score', gt='label'): n_cl = net.blobs[layer].channels if save_format: save_format = save_format.format(iter) #format函数用来格式化数据;如果save_format为TRUE,则为1 hist, loss = compute_hist(net, save_format, dataset, layer, gt) # mean loss print '>>>', datetime.now(), 'Iteration', iter, 'loss', loss #平均误差 # overall accuracy acc = np.diag(hist).sum() / hist.sum() print '>>>', datetime.now(), 'Iteration', iter, 'overall accuracy', acc # per-class accuracy acc = np.diag(hist) / hist.sum(1) print '>>>', datetime.now(), 'Iteration', iter, 'mean accuracy', np.nanmean(acc) # per-class IU iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) print '>>>', datetime.now(), 'Iteration', iter, 'mean IU', np.nanmean(iu) freq = hist.sum(1) / hist.sum() print '>>>', datetime.now(), 'Iteration', iter, 'fwavacc', \ (freq[freq > 0] * iu[freq > 0]).sum() return hist
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
以上是我对fcn代码的一点拙见,这次经历也让我明白,未接触过的代码并不可怕,只要沉下心来,一条一条的进行调试,你也可以知其所以然,发现其中奥秘。在研究生生涯开始之际写下我的第一篇博客,希望自己能在今后的学习生涯中保持这份初心,送给自己,也送给看过这篇博客的有缘人。