DLT（DeepLearningTracker）學習與代碼理解 (1)

本文轉載自查看原文 2014-12-13 15:14 2153 代碼/ 深度學習/ 目標跟蹤/ DLT

持續更新中……

DLT是香港科大的Naiyan Wang和Dit-Yan Yeung提出的，文章和代碼見 http://winsty.net/dlt.html

相關論文是 Learning A Deep Compact Image Representation for Visual Tracking (NIPS2013')

論文本身並無太多亮點，可以說基本沒講啥。

大體思路是先用SDAE（Stacked Denoising AutoEncoder）對很多數據進行離線訓練，離線訓練的目的是提取特征。然后再用來在線跟蹤。跟蹤時使用粒子濾波來完成。

還是從研究代碼開始吧。

run_individual.m只是只是提供了一些圖像信息及參數，核心的函數在run_DLT.m里實現。

在這里必須吐槽一下MATLAB 2012a，如果系統語言為英文的話，居然沒法在M文件編輯器里用中文寫注釋。

  1 function results=run_DLT(seq, res_path, bSaveImage) 
  2     addpath('affineUtility');
  3     addpath('drawUtility');
  4     addpath('imageUtility');
  5     addpath('NN');
  6     rand('state',0);  randn('state',0);
  7     
  8     %  如果opt未定義  
  9     if isfield(seq, 'opt')
 10         opt = seq.opt;
 11     else
 12         trackparam_DLT;
 13     end
 14     rect=seq.init_rect; % [水平左側 垂直上方 寬度 高度 0] 
 15     % 下面它會再蛋疼地改成 [水平中心 垂直中心 寬度 高度 0] 
 16     p = [rect(1)+rect(3)/2, rect(2)+rect(4)/2, rect(3), rect(4), 0];
 17     
 18     frame = imread(seq.s_frames{1});    % seq.s_frames 里放着所有圖像的路徑
 19     if size(frame,3)==3
 20         frame = double(rgb2gray(frame));   
 21     end
 22     
 23     scaleHeight = size(frame, 1) / opt.normalHeight;    % 讀入圖像的高度/標准圖像的高度，下同 24     scaleWidth = size(frame, 2) / opt.normalWidth;
 25     p(1) = p(1) / scaleWidth;   % 將GT中的目標位置參數按比例轉成標准圖像大小的對應值 26     p(3) = p(3) / scaleWidth;
 27     p(2) = p(2) / scaleHeight;
 28     p(4) = p(4) / scaleHeight;
 29     frame = imresize(frame, [opt.normalHeight, opt.normalWidth]); % 將讀入圖像進行比例放縮
 30     frame = double(frame) / 255;    
 31     
 32     
 33     % paramOld [ 模板水平中心 模板垂直中心, 寬度/標准模板寬度(32)
 34     %               0, 模板寬高比/標准模板寬高比(1:1), 0  ]
 35     paramOld = [p(1), p(2), p(3)/opt.tmplsize(2), p(5), p(4) /p(3) / (opt.tmplsize(1) / opt.tmplsize(2)), 0];
 36     param0 = affparam2mat(paramOld);    
 37     % param0 [ 模板水平中心 模板垂直中心, 寬度/標准模板寬度(32), 0, 高度/標准模板高度(32), 0 ]
 39    
 40     
 41     if ~exist('opt','var')  opt = [];  end
 42     if ~isfield(opt,'minopt')
 43       opt.minopt = optimset; opt.minopt.MaxIter = 25; opt.minopt.Display='off';
 44     end
 45     reportRes = [];
 46     
 47     % 處理模板（ 變換后） 
 48     tmpl.mean = warpimg(frame, param0, opt.tmplsize); % tmlsize = 32X32
 49     tmpl.basis = [];
 50     
 51     % 10個正樣本，在模板內取 
 52     for i = 1 : opt.maxbasis / 10
 53         tmpl.basis(:, (i - 1) * 10 + 1 : i * 10) = samplePos_DLT(frame, param0, opt.tmplsize);
 54     end
 55     % 100個負樣本（好像是在模板外隨機……待驗證） 
 56     p0 = paramOld(5);
 57     tmpl.basis(:, opt.maxbasis + 1 : 100 + opt.maxbasis) = sampleNeg(frame, param0, opt.tmplsize, 100, opt, 8);
 58 
 59     param.est = param0;
 60     param.lastUpdate = 1;
 61 
 62     wimgs = [];
 63 
 64     % draw initial track window
 65     drawopt = drawtrackresult([], 0, frame, tmpl, param, []);
 66     drawopt.showcondens = 0;  drawopt.thcondens = 1/opt.numsample;
 67     if (bSaveImage)
 68         imwrite(frame2im(getframe(gcf)),sprintf('%s0000.jpg',res_path));    
 69     end
 70     
 71     % track the sequence from frame 2 onward
 72     duration = 0; tic;
 73     if (exist('dispstr','var'))  dispstr='';  end
 74     L = [ones(opt.maxbasis, 1); (-1) * ones(100, 1)];
 75     
 76     % initialize NN and train
 77     nn = initDLT(tmpl, L);
 78     L = [];
 79     % add tmpl.basis to pos(:,11) cuz opt.maxbasis = 10
 80     pos = tmpl.basis(:, 1 : opt.maxbasis);
 81     pos(:, opt.maxbasis + 1) = tmpl.basis(:, 1);
 82     opts.numepochs = 5 ;
 83     
 84     % image iteration start
 85     for f = 1:size(seq.s_frames,1)  
 86       frame = imread(seq.s_frames{f});
 87       
 88       % if RGB ->gray
 89       if size(frame,3)==3
 90         frame = double(rgb2gray(frame));
 91       end  
 92       frame = imresize(frame, [opt.normalHeight, opt.normalWidth]);
 93       frame = double(frame) / 255;
 94 
 95       % do tracking
 96        param = estwarp_condens_DLT(frame, tmpl, param, opt, nn, f);
 97 
 98       % do update
 99 
100       temp = warpimg(frame, param.est', opt.tmplsize);
101       pos(:, mod(f - 1, opt.maxbasis) + 1) = temp(:);
102       if  param.update
103           opts.batchsize = 10;
104           % Sample two set of negative samples at different range.
105           neg = sampleNeg(frame, param.est', opt.tmplsize, 49, opt, 8);
106           neg = [neg sampleNeg(frame, param.est', opt.tmplsize, 50, opt, 4)];
107           nn = nntrain(nn, [pos neg]', [ones(opt.maxbasis + 1, 1); zeros(99, 1)], opts);
108       end
109 
110       duration = duration + toc;
111       
112       res = affparam2geom(param.est);
113       p(1) = round(res(1));
114       p(2) = round(res(2)); 
115       p(3) = round(res(3) * opt.tmplsize(2));
116       p(4) = round(res(5) * (opt.tmplsize(1) / opt.tmplsize(2)) * p(3));
117       p(5) = res(4);
118       p(1) = p(1) * scaleWidth;
119       p(3) = p(3) * scaleWidth;
120       p(2) = p(2) * scaleHeight;
121       p(4) = p(4) * scaleHeight;
122       paramOld = [p(1), p(2), p(3)/opt.tmplsize(2), p(5), p(4) /p(3) / (opt.tmplsize(1) / opt.tmplsize(2)), 0];
123       
124       reportRes = [reportRes;  affparam2mat(paramOld)];
125       
126       tmpl.basis = [pos];
127       drawopt = drawtrackresult(drawopt, f, frame, tmpl, param, []);
128       if (bSaveImage)
129           imwrite(frame2im(getframe(gcf)),sprintf('%s/%04d.jpg',res_path,f));
130       end
131       tic;
132     end
133     duration = duration + toc
134     fprintf('%d frames took %.3f seconds : %.3fps\n',f,duration,f/duration);
135     results.res=reportRes;
136     results.type='ivtAff';
137     results.tmplsize = opt.tmplsize;
138     results.fps = f/duration;
139 end

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 LearnOpenGL學習筆記（一）——現有代碼理解學習筆記163—理解模型正則化：L1正則、L2正則（理論+代碼） Pytorch學習筆記13----LSTM+CRF模型的CRF層原理與代碼理解從代碼角度理解什么是蜜罐 StarGAN論文及代碼理解 Faster rcnn代碼理解（2） Vitess 學習（1）理解Vitess 深度學習—卷積的理解 BERT學習理解關於線性基的學習與理解