卷積神經網絡
深度神經網絡的重要性在於,它開啟了通向復雜非線性模型和對知識進行分層處理的系統方法的大門。人們開發了很多提取圖像特征的技術:SIFT、HoG、Textons、圖像旋轉、RIFT、GLOH等。卷積神經網絡的特點和優勢在於自動提取特征。
卷積層生成特征映射圖(feature map)的新圖像,其突出了原始圖像的獨特特征。卷積濾波器矩陣的值時通過訓練過程確定的。
池化是一種在其他許多圖像處理處理方法中采用的典型技術。池化層在一定程度上補償了目標的偏心和傾斜。由於池化縮減了圖像大小,它對減輕計算負載和防止過擬合是非常有益的。
MNIST示例
60000幅圖像用於訓練,10000幅圖像用於測試,每幅都是28*28黑白圖像。本例使用10000幅圖像,訓練測試比為8:2。網絡結果如下表所示:
層 | 備注 | 激活函數 |
輸入層 | 28*28個節點 | |
卷積層 | 20個9*9卷積濾波器 | ReLU |
池化層 | 一個平均池化(2*2) | |
隱含層 | 100個節點 | ReLU |
輸出層 | 10個節點 | Softmax |
網絡結構定義如下:
function [W1, W5, Wo] = MnistConv(W1, W5, Wo, X, D)
% W1,W5,W0分別是卷積濾波器矩陣、池化-隱含層權重矩陣和隱含-輸出層權重矩陣
% 使用反向傳播算法、minibatch方法訓練網絡
% 使用動量來調整權重
alpha = 0.01;
beta = 0.95;
momentum1 = zeros(size(W1));
momentum5 = zeros(size(W5));
momentumo = zeros(size(Wo));
N = length(D);
% minibatch
bsize = 100;
blist = 1:bsize:(N-bsize+1);
% One epoch loop
%
for batch = 1:length(blist)
dW1 = zeros(size(W1));
dW5 = zeros(size(W5));
dWo = zeros(size(Wo));
% Mini-batch loop
% 對100個權重更新值求和並取平均值
begin = blist(batch);
for k = begin:begin+bsize-1
% Forward pass = inference
%
x = X(:, :, k); % Input, 28x28
y1 = Conv(x, W1); % Convolution, 20x20x20
y2 = ReLU(y1); %
y3 = Pool(y2); % Pooling, 10x10x20
y4 = reshape(y3, [], 1); %
v5 = W5*y4; % ReLU, 2000
y5 = ReLU(v5); %
v = Wo*y5; % Softmax, 10x1
y = Softmax(v); %
% One-hot encoding
%
d = zeros(10, 1);
d(sub2ind(size(d), D(k), 1)) = 1;
% Backpropagation
%
e = d - y; % Output layer
delta = e;
e5 = Wo' * delta; % Hidden(ReLU) layer
delta5 = (y5 > 0) .* e5;
e4 = W5' * delta5; % Pooling layer
e3 = reshape(e4, size(y3));
e2 = zeros(size(y2));
W3 = ones(size(y2)) / (2*2);
for c = 1:20
e2(:, :, c) = kron(e3(:, :, c), ones([2 2])) .* W3(:, :, c);
end
delta2 = (y2 > 0) .* e2; % ReLU layer
delta1_x = zeros(size(W1)); % Convolutional layer
for c = 1:20
delta1_x(:, :, c) = conv2(x(:, :), rot90(delta2(:, :, c), 2), 'valid');
end
dW1 = dW1 + delta1_x;
dW5 = dW5 + delta5*y4';
dWo = dWo + delta *y5';
end
% Update weights
%
dW1 = dW1 / bsize;
dW5 = dW5 / bsize;
dWo = dWo / bsize;
% 動量
momentum1 = alpha*dW1 + beta*momentum1;
W1 = W1 + momentum1;
momentum5 = alpha*dW5 + beta*momentum5;
W5 = W5 + momentum5;
momentumo = alpha*dWo + beta*momentumo;
Wo = Wo + momentumo;
end
end
其中卷積定義為:
function y = Conv(x, W)
% 接受輸入圖像和卷積和卷積濾波器矩陣,返回特征映射圖
%
[wrow, wcol, numFilters] = size(W);
[xrow, xcol, ~ ] = size(x);
yrow = xrow - wrow + 1;
ycol = xcol - wcol + 1;
y = zeros(yrow, ycol, numFilters);
for k = 1:numFilters
filter = W(:, :, k);
filter = rot90(squeeze(filter), 2);
y(:, :, k) = conv2(x, filter, 'valid');
end
end
池化定義為:
function y = Pool(x)
%
% 2x2 mean pooling
%
%
[xrow, xcol, numFilters] = size(x);
y = zeros(xrow/2, xcol/2, numFilters);
for k = 1:numFilters
filter = ones(2) / (2*2); % for mean
image = conv2(x(:, :, k), filter, 'valid');
y(:, :, k) = image(1:2:end, 1:2:end);
end
end
測試代碼如下:
clear all
Images = loadMNISTImages('./MNIST/t10k-images.idx3-ubyte');
Images = reshape(Images, 28, 28, []);
Labels = loadMNISTLabels('./MNIST/t10k-labels.idx1-ubyte');
Labels(Labels == 0) = 10; % 0 --> 10
rng(1);
% Learning
%
W1 = 1e-2*randn([9 9 20]);
W5 = (2*rand(100, 2000) - 1) * sqrt(6) / sqrt(360 + 2000);
Wo = (2*rand( 10, 100) - 1) * sqrt(6) / sqrt( 10 + 100);
X = Images(:, :, 1:8000);
D = Labels(1:8000);
for epoch = 1:3
epoch
[W1, W5, Wo] = MnistConv(W1, W5, Wo, X, D);
end
save('MnistConv.mat');
% Test
%
X = Images(:, :, 8001:10000);
D = Labels(8001:10000);
acc = 0;
N = length(D);
for k = 1:N
x = X(:, :, k); % Input, 28x28
y1 = Conv(x, W1); % Convolution, 20x20x20
y2 = ReLU(y1); %
y3 = Pool(y2); % Pool, 10x10x20
y4 = reshape(y3, [], 1); % 2000
v5 = W5*y4; % ReLU, 360
y5 = ReLU(v5); %
v = Wo*y5; % Softmax, 10
y = Softmax(v); %
[~, i] = max(y);
if i == D(k)
acc = acc + 1;
end
end
acc = acc / N;
fprintf('Accuracy is %f\n', acc);
三輪運算后,結果為Accuracy is 0.946500。同時生成MnistConv.mat文件。下面分析圖像在卷積神經網絡每一層中的實際演變過程。代碼如下:
clear all
load('MnistConv.mat')
k = 2;
x = X(:, :, k);
y1 = Conv(x, W1); % Convolution, 20x20x20
y2 = ReLU(y1); %
y3 = Pool(y2); % Pool, 10x10x20
y4 = reshape(y3, [], 1); % 2000
v5 = W5*y4; % ReLU, 360
y5 = ReLU(v5); %
v = Wo*y5; % Softmax, 10
y = Softmax(v); %
figure;
display_network(x(:));
title('Input Image')
convFilters = zeros(9*9, 20);
for i = 1:20
filter = W1(:, :, i);
convFilters(:, i) = filter(:);
end
figure
display_network(convFilters);
title('Convolution Filters')
fList = zeros(20*20, 20);
for i = 1:20
feature = y1(:, :, i);
fList(:, i) = feature(:);
end
figure
display_network(fList);
title('Features [Convolution]')
fList = zeros(20*20, 20);
for i = 1:20
feature = y2(:, :, i);
fList(:, i) = feature(:);
end
figure
display_network(fList);
title('Features [Convolution + ReLU]')
fList = zeros(10*10, 20);
for i = 1:20
feature = y3(:, :, i);
fList(:, i) = feature(:);
end
figure
display_network(fList);
title('Features [Convolution + ReLU + MeanPool]')
輸出的結果為:
圖1
圖2
圖2是20個經過訓練的卷積濾波器組成。值越大,陰影越亮。
圖3
圖3呈現的是卷積層圖像處理的結果。
圖4
圖4是卷積層采用ReLU處理特征映射圖所得的結果。去除之前圖像中的暗色像素,當前圖像的字符主要是白色像素。但其中有一個格子變暗了,這不是一個好現象,因為它未能捕捉到輸入圖像“2”的任何特征。
圖5
圖5顯示的是特征提取神經網絡的最終結果。這些圖像被轉換為一個一維向量,並存儲在分類神經網絡中。
上述代碼中用到的輔助函數如下:
function images = loadMNISTImages(filename)
%loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing
%the raw MNIST images
fp = fopen(filename, 'rb');
assert(fp ~= -1, ['Could not open ', filename, '']);
magic = fread(fp, 1, 'int32', 0, 'ieee-be');
assert(magic == 2051, ['Bad magic number in ', filename, '']);
numImages = fread(fp, 1, 'int32', 0, 'ieee-be');
numRows = fread(fp, 1, 'int32', 0, 'ieee-be');
numCols = fread(fp, 1, 'int32', 0, 'ieee-be');
images = fread(fp, inf, 'unsigned char=>unsigned char');
images = reshape(images, numCols, numRows, numImages);
images = permute(images,[2 1 3]);
fclose(fp);
% Reshape to #pixels x #examples
images = reshape(images, size(images, 1) * size(images, 2), size(images, 3));
% Convert to double and rescale to [0,1]
images = double(images) / 255;
end
function labels = loadMNISTLabels(filename)
%loadMNISTLabels returns a [number of MNIST images]x1 matrix containing
%the labels for the MNIST images
fp = fopen(filename, 'rb');
assert(fp ~= -1, ['Could not open ', filename, '']);
magic = fread(fp, 1, 'int32', 0, 'ieee-be');
assert(magic == 2049, ['Bad magic number in ', filename, '']);
numLabels = fread(fp, 1, 'int32', 0, 'ieee-be');
labels = fread(fp, inf, 'unsigned char');
assert(size(labels,1) == numLabels, 'Mismatch in label count');
fclose(fp);
end
function [h, array] = display_network(A, opt_normalize, opt_graycolor, cols, opt_colmajor)
% This function visualizes filters in matrix A. Each column of A is a
% filter. We will reshape each column into a square image and visualizes
% on each cell of the visualization panel.
% All other parameters are optional, usually you do not need to worry
% about it.
% opt_normalize: whether we need to normalize the filter so that all of
% them can have similar contrast. Default value is true.
% opt_graycolor: whether we use gray as the heat map. Default is true.
% cols: how many columns are there in the display. Default value is the
% squareroot of the number of columns in A.
% opt_colmajor: you can switch convention to row major for A. In that
% case, each row of A is a filter. Default value is false.
warning off all
if ~exist('opt_normalize', 'var') || isempty(opt_normalize)
opt_normalize= true;
end
if ~exist('opt_graycolor', 'var') || isempty(opt_graycolor)
opt_graycolor= true;
end
if ~exist('opt_colmajor', 'var') || isempty(opt_colmajor)
opt_colmajor = false;
end
% rescale
A = A - mean(A(:));
if opt_graycolor, colormap(gray); end
% compute rows, cols
[L M]=size(A);
sz=sqrt(L);
buf=1;
if ~exist('cols', 'var')
if floor(sqrt(M))^2 ~= M
n=ceil(sqrt(M));
while mod(M, n)~=0 && n<1.2*sqrt(M), n=n+1; end
m=ceil(M/n);
else
n=sqrt(M);
m=n;
end
else
n = cols;
m = ceil(M/n);
end
array=-ones(buf+m*(sz+buf),buf+n*(sz+buf));
if ~opt_graycolor
array = 0.1.* array;
end
if ~opt_colmajor
k=1;
for i=1:m
for j=1:n
if k>M,
continue;
end
clim=max(abs(A(:,k)));
if opt_normalize
array(buf+(i-1)*(sz+buf)+(1:sz),buf+(j-1)*(sz+buf)+(1:sz))=reshape(A(:,k),sz,sz)/clim;
else
array(buf+(i-1)*(sz+buf)+(1:sz),buf+(j-1)*(sz+buf)+(1:sz))=reshape(A(:,k),sz,sz)/max(abs(A(:)));
end
k=k+1;
end
end
else
k=1;
for j=1:n
for i=1:m
if k>M,
continue;
end
clim=max(abs(A(:,k)));
if opt_normalize
array(buf+(i-1)*(sz+buf)+(1:sz),buf+(j-1)*(sz+buf)+(1:sz))=reshape(A(:,k),sz,sz)/clim;
else
array(buf+(i-1)*(sz+buf)+(1:sz),buf+(j-1)*(sz+buf)+(1:sz))=reshape(A(:,k),sz,sz);
end
k=k+1;
end
end
end
if opt_graycolor
h=imagesc(array,'EraseMode','none',[-1 1]);
else
h=imagesc(array,'EraseMode','none',[-1 1]);
end
axis image off
drawnow;
warning on all
function rng(x)
randn('seed', x)
rand('seed', x)
end
function y = Softmax(x)
ex = exp(x);
y = ex / sum(ex);
end
function y = ReLU(x)
y = max(0, x);
end