Deeplearning原文作者Hinton代碼注解

本文轉載自查看原文 2013-07-10 19:41 3244

Matlab示例代碼為兩部分，分別對應不同的論文：

1. Reducing the Dimensionality of data with neural networks

　　ministdeepauto.m backprop.m rbmhidlinear.m

2. A fast learing algorithm for deep belief net

　　mnistclassify.m 　　backpropclassfy.m

其余部分代碼通用。

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
mnistclassify.m
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

clear all
close all

maxepoch=50; %迭代次數
numhid=500; numpen=500; numpen2=2000;

fprintf(1,'Converting Raw files into Matlab format \n');
converter;

fprintf(1,'Pretraining a deep autoencoder. \n');
fprintf(1,'The Science paper used 50 epochs. This uses %3i \n', maxepoch);

makebatches;%分批數據
[numcases numdims numbatches]=size(batchdata); %獲取batchdata數據大小
%%numcases 每批數據的個數
%%numdims 數據元組的維度
%%numbtches 數據批數

fprintf(1,'Pretraining Layer 1 with RBM: %d-%d \n',numdims,numhid);%圖像輸入層到第一個隱藏層
restart=1; %設置初始化參數
rbm; %調用RBM訓練數據
hidrecbiases=hidbiases; %獲取隱藏層偏置值
save mnistvhclassify vishid hidrecbiases visbiases; %

fprintf(1,'\nPretraining Layer 2 with RBM: %d-%d \n',numhid,numpen);%第一個隱藏層到第二個隱藏層
batchdata=batchposhidprobs; %上一個RBM的隱藏層輸出，讀入作為這個RBM的輸入
numhid=numpen;%設置隱藏層的節點數，輸入的節點數已經由讀入數據給出
restart=1;
rbm;
hidpen=vishid; penrecbiases=hidbiases; hidgenbiases=visbiases; %同上，提取權值，偏置，
save mnisthpclassify hidpen penrecbiases hidgenbiases;

fprintf(1,'\nPretraining Layer 3 with RBM: %d-%d \n',numpen,numpen2);%第二個隱藏層到第三層隱藏層，其余同上
batchdata=batchposhidprobs;
numhid=numpen2;
restart=1;
rbm;
hidpen2=vishid; penrecbiases2=hidbiases; hidgenbiases2=visbiases;
save mnisthp2classify hidpen2 penrecbiases2 hidgenbiases2;

backpropclassify;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
backpropclassify.m
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
maxepoch=200;
fprintf(1,'\nTraining discriminative model on MNIST by minimizing cross entropy error. \n');%最小化交叉熵
fprintf(1,'60 batches of 1000 cases each. \n');

load mnistvhclassify%加載各層之間的權值，以及偏置
load mnisthpclassify
load mnisthp2classify

makebatches;%分批數據
[numcases numdims numbatches]=size(batchdata);
N=numcases; %獲取每批數據向量數

%%%% PREINITIALIZE WEIGHTS OF THE DISCRIMINATIVE MODEL%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

w1=[vishid; hidrecbiases];%第一層到第二層的權重，以及第二層的偏置
w2=[hidpen; penrecbiases];%類上
w3=[hidpen2; penrecbiases2];%類上
w_class = 0.1*randn(size(w3,2)+1,10);%隨機生成第四層列數+1行，10列的矩陣

%%%%%%%%%% END OF PREINITIALIZATIO OF WEIGHTS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

l1=size(w1,1)-1;%獲取每層的單元個數
l2=size(w2,1)-1;
l3=size(w3,1)-1;
l4=size(w_class,1)-1;%最高層的單元個數
l5=10; %label層單元個數
test_err=[];%
train_err=[];%

for epoch = 1:maxepoch

%%%%%%%%%%%%%%%%%%%% COMPUTE TRAINING MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
err=0;
err_cr=0;
counter=0;
[numcases numdims numbatches]=size(batchdata);
%%numcases 每批數據的個數
%%numdims 數據元組的維度
%%numbtches 數據批數
N=numcases;%%每批次數據向量個數
for batch = 1:numbatches
data = [batchdata(:,:,batch)];%讀取一批次數據
target = [batchtargets(:,:,batch)];%讀取當前批次的目標值
data = [data ones(N,1)];%在原數據后添加N行1列數據
w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)];%sigmod計算各層的概率值，參見BP算法
w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)];

targetout = exp(w3probs*w_class);%計算最后的輸出值N行10列
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%對最后的label的輸出處理過程，見公式6.1，其中w3probs*w_class是label的輸入
%最后只能有一個單元被激活，激活單元的選擇即通過下面計算得出的概率來進行選擇
%10個單元組成的“softmax”組
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
targetout = targetout./repmat(sum(targetout,2),1,10);%計算最后10個label輸出除以輸出值的總和

[I J]=max(targetout,[],2);%取計算結果每行中的最大值,以及其列標
[I1 J1]=max(target,[],2);%取原先設定目標值的最大值以及列標
counter=counter+length(find(J==J1));%統計正確的條數
err_cr = err_cr- sum(sum( target(:,1:end).*log(targetout))) ; %%%%????
end
train_err(epoch)=(numcases*numbatches-counter);%總的錯誤條數？？？
train_crerr(epoch)=err_cr/numbatches;%平均每批次錯誤率？？？

%%%%%%%%%%%%%% END OF COMPUTING TRAINING MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%% COMPUTE TEST MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
err=0;
err_cr=0;
counter=0;
[testnumcases testnumdims testnumbatches]=size(testbatchdata);

N=testnumcases;
for batch = 1:testnumbatches
data = [testbatchdata(:,:,batch)];
target = [testbatchtargets(:,:,batch)];
data = [data ones(N,1)];
w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)];
w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)];
targetout = exp(w3probs*w_class);
targetout = targetout./repmat(sum(targetout,2),1,10);

[I J]=max(targetout,[],2);
[I1 J1]=max(target,[],2);
counter=counter+length(find(J==J1));
err_cr = err_cr- sum(sum( target(:,1:end).*log(targetout))) ;
end
test_err(epoch)=(testnumcases*testnumbatches-counter);
test_crerr(epoch)=err_cr/testnumbatches;
fprintf(1,'Before epoch %d Train # misclassified: %d (from %d). Test # misclassified: %d (from %d) \t \t \n',...
epoch,train_err(epoch),numcases*numbatches,test_err(epoch),testnumcases*testnumbatches);

%%%%%%%%%%%%%% END OF COMPUTING TEST MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

tt=0;
for batch = 1:numbatches/10
fprintf(1,'epoch %d batch %d\r',epoch,batch);

%%%%%%%%%%% COMBINE 10 MINIBATCHES INTO 1 LARGER MINIBATCH %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%組合10個小批次為1000樣例的批次，然后用conjugate gradient來進行微調
tt=tt+1;
data=[];
targets=[];
for kk=1:10
data=[data
batchdata(:,:,(tt-1)*10+kk)]; %10個小批次合成
targets=[targets
batchtargets(:,:,(tt-1)*10+kk)];
end

%%%%%%%%%%%%%%% PERFORM CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
max_iter=3; %設置線性搜索的次數

if epoch<6 % First update top-level weights holding other weights fixed.
N = size(data,1); %獲取數據的行數
XX = [data ones(N,1)]; %每行數據后面增加1，用來增加偏置
w1probs = 1./(1 + exp(-XX*w1)); w1probs = [w1probs ones(N,1)];
w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
w3probs = 1./(1 + exp(-w2probs*w3)); %w3probs = [w3probs ones(N,1)];

VV = [w_class(:)']'; %VV將隨機生成的向量w_class展開成一列？？？為什么展開成一列與minimize的參數有關
%
Dim = [l4; l5]; %記錄最后兩層的單元節點數，即2000的隱藏層和10的label層
[X, fX] = minimize(VV,'CG_CLASSIFY_INIT',max_iter,Dim,w3probs,targets);%只訓練兩層 %%%詳細見函數定義
%minimize is Cari Rasmusssen's "minimize" code
%%------------------參數含義------------------%%
%VV 隨機權重向量的展開，其作為輸入參數，列必須為1（D by 1)
%X 函數f="CG_CLASSIFY_INIT"的最優化參數
%fX 函數f對X的偏導
%max_iter 如果為正，表示線性搜索次數，為負，函數的最大值個數
%%-------------------------------------------------%
w_class = reshape(X,l4+1,l5);%恢復權值矩陣結構

else %進入整體微調過程
VV = [w1(:)' w2(:)' w3(:)' w_class(:)']'; %將所有權值按列展開成一列
Dim = [l1; l2; l3; l4; l5]; %記錄各層單元個數傳入
[X, fX] = minimize(VV,'CG_CLASSIFY',max_iter,Dim,data,targets);

w1 = reshape(X(1:(l1+1)*l2),l1+1,l2); %恢復W1權值1.0
xxx = (l1+1)*l2; %臨時變量，用於恢復權值單元
w2 = reshape(X(xxx+1:xxx+(l2+1)*l3),l2+1,l3);
xxx = xxx+(l2+1)*l3;
w3 = reshape(X(xxx+1:xxx+(l3+1)*l4),l3+1,l4);
xxx = xxx+(l3+1)*l4;
w_class = reshape(X(xxx+1:xxx+(l4+1)*l5),l4+1,l5);

end
%%%%%%%%%%%%%%% END OF CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%

end

save mnistclassify_weights w1 w2 w3 w_class
save mnistclassify_error test_err test_crerr train_err train_crerr;

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rbm.m
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\
epsilonw = 0.1; % Learning rate for weights
epsilonvb = 0.1; % Learning rate for biases of visible units
epsilonhb = 0.1; % Learning rate for biases of hidden units
weightcost = 0.0002;
initialmomentum = 0.5;
finalmomentum = 0.9;

[numcases numdims numbatches]=size(batchdata);
%%numcases 每批數據的個數
%%numdims 數據元組的維度
%%numbtches 數據批數

if restart ==1,
restart=0;
epoch=1;

% Initializing symmetric weights and biases. 初始化對稱權值和偏置
vishid = 0.1*randn(numdims, numhid); %初始化生成可視層到隱藏層的權值
hidbiases = zeros(1,numhid);%隱藏單元的偏置值
visbiases = zeros(1,numdims);%可見單元的偏置值

poshidprobs = zeros(numcases,numhid); %正向的隱藏單元概率生成
neghidprobs = zeros(numcases,numhid);%反向的隱藏單元概率生成
posprods = zeros(numdims,numhid);%正向可見單元概率生成
negprods = zeros(numdims,numhid);%反向可見單元概率生成
vishidinc = zeros(numdims,numhid);%%%%%可視單元和隱藏單元之間的權值增量
hidbiasinc = zeros(1,numhid);%%隱藏單元的偏置增量
visbiasinc = zeros(1,numdims);%%可視單元的偏置增量
batchposhidprobs=zeros(numcases,numhid,numbatches);%存儲每次迭代計算好的每層的隱藏層概率，作為下一個RBM的輸入
end

%%%%%%%%%%%%%%%%簡單輸出迭代次數處理的批次%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
for epoch = epoch:maxepoch, %迭代處理
fprintf(1,'epoch %d\r',epoch);
errsum=0; %初始化輸出錯誤為0
for batch = 1:numbatches, %每次處理一批次的數據
fprintf(1,'epoch %d batch %d\r',epoch,batch);

%%%%%%%%% START POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
data = batchdata(:,:,batch); %讀取當前批次的全部數據vi
poshidprobs = 1./(1 + exp(-data*vishid - repmat(hidbiases,numcases,1))); %計算前向傳播的隱藏層概率hi
batchposhidprobs(:,:,batch)=poshidprobs;%將計算好的概率賦值給當前批次前向傳播的隱藏層最后一次計算好的值作為下一層的輸入
posprods = data' * poshidprobs;%contrastive divergence過程<vi,hi>

poshidact = sum(poshidprobs);%average-wise隱藏層激活概率值
posvisact = sum(data);%average-wise可視層激活概率值

%%%%%%%%% END OF POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
poshidstates = poshidprobs > rand(numcases,numhid);%gibbs抽樣,設定狀態

%%%%%%%%% START NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
negdata = 1./(1 + exp(-poshidstates*vishid' - repmat(visbiases,numcases,1)));%根據hi計算vi+1
neghidprobs = 1./(1 + exp(-negdata*vishid - repmat(hidbiases,numcases,1))); %根據vi+1計算hi+1
negprods = negdata'*neghidprobs;%contrastive divergence <vi+1,hi+1>

neghidact = sum(neghidprobs);
negvisact = sum(negdata);

%%%%%%%%% END OF NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
err= sum(sum( (data-negdata).^2 )); %重新構建數據的方差
errsum = err + errsum;%整體方差

if epoch>5, %迭代次數不同調整沖量
momentum=finalmomentum;
else
momentum=initialmomentum;
end;

%%%%%%%%% UPDATE WEIGHTS AND BIASES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
vishidinc = momentum*vishidinc + ...
epsilonw*( (posprods-negprods)/numcases - weightcost*vishid);%權重增量計算
visbiasinc = momentum*visbiasinc + (epsilonvb/numcases)*(posvisact-negvisact);%偏置增量計算
hidbiasinc = momentum*hidbiasinc + (epsilonhb/numcases)*(poshidact-neghidact);%隱藏層增量計算

vishid = vishid + vishidinc;
visbiases = visbiases + visbiasinc;
hidbiases = hidbiases + hidbiasinc;

%%%%%%%%%%%%%%%% END OF UPDATES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

end
fprintf(1, 'epoch %4i error %6.1f \n', epoch, errsum);
end;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
CG_CLASSIFY_INIT.M
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\
function [f, df] = CG_CLASSIFY_INIT(VV,Dim,w3probs,target);%CG對最上面兩層的訓練
l1 = Dim(1);
l2 = Dim(2);
N = size(w3probs,1);
% Do decomversion.
w_class = reshape(VV,l1+1,l2); %恢復權重，
w3probs = [w3probs ones(N,1)]; %一列，偏置

targetout = exp(w3probs*w_class); %計算label層的輸出結果為numbercase＊lablesnumber的矩陣
targetout = targetout./repmat(sum(targetout,2),1,10); %選擇最后的激活單元，見backpropclassify.m 的76行
f = -sum(sum( target(:,1:end).*log(targetout))) ; %交叉熵只采用了前邊部分

IO = (targetout-target(:,1:end)); % 輸入和輸出結果之間的差值
Ix_class=IO; %
dw_class = w3probs'*Ix_class;%導數F(x)((1-F(x))乘以輸出結果的偏差..其中F為sigmoid函數

df = [dw_class(:)']';

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
CG_CLASSIFY.M
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 該段代碼對所有權重進行整體微調
% 各部分過程見 CG_CLASSIFY_INIT.m注解
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [f, df] = CG_CLASSIFY(VV,Dim,XX,target);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rbmhidlinear.m
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%除了最后計算單元值采用的是線性單元其余過程全部一樣
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 DeepLearning 代碼解析--隨機梯度下降SGD Markdown學習筆記原文(用代碼塊顯示) 向論文作者要代碼的郵件怎么寫 Git 統計某作者代碼量 Pycharm 如何設置代碼作者信息向論文作者索要源代碼郵件模板 IDEA如何自動添加注解作者等信息? pycharm查看代碼注釋的方法，代碼編寫日志及作者信息等【DeepLearning】GoogLeNet 如何修改git上傳代碼的作者姓名