前言:最近接觸到一些神經網絡的東西,看到很多人使用PSO(粒子群優化算法)優化BP神經網絡中的權值和偏置,經過一段時間的研究,寫了一些代碼,能夠跑通,嫌棄速度慢的可以改一下訓練次數或者適應度函數。
在我的理解里,PSO優化BP的初始權值w和偏置b,有點像數據遷徙,等於用粒子去嘗試作為網絡的參數,然后訓練網絡的閾值,所以總是會看到PSO優化了權值和閾值的說法,(一開始我是沒有想通為什么能夠優化閾值的),下面是我的代碼實現過程,關於BP和PSO的原理就不一一贅述了,網上有很多大佬解釋的很詳細了……
首先是利用BP作為適應度函數
function [error] = BP_fit(gbest,input_num,hidden_num,output_num,net,inputn,outputn) %BP_fit 此函數為PSO的適應度函數 % gbest:最優粒子 % input_num:輸入節點數目; % output_num:輸出層節點數目; % hidden_num:隱含層節點數目; % net:網絡; % inputn:網絡訓練輸入數據; % outputn:網絡訓練輸出數據; % error : 網絡輸出誤差,即PSO適應度函數值 w1 = gbest(1:input_num * hidden_num); B1 = gbest(input_num * hidden_num + 1:input_num * hidden_num + hidden_num); w2 = gbest(input_num * hidden_num + hidden_num + 1:input_num * hidden_num... + hidden_num + hidden_num * output_num); B2 = gbest(input_num * hidden_num+ hidden_num + hidden_num * output_num + 1:... input_num * hidden_num + hidden_num + hidden_num * output_num + output_num); net.iw{1,1} = reshape(w1,hidden_num,input_num); net.lw{2,1} = reshape(w2,output_num,hidden_num); net.b{1} = reshape(B1,hidden_num,1); net.b{2} = B2'; %建立BP網絡 net.trainParam.epochs = 200; net.trainParam.lr = 0.05; net.trainParam.goal = 0.000001; net.trainParam.show = 100; net.trainParam.showWindow = 0; net = train(net,inputn,outputn); ty = sim(net,inputn); error = sum(sum(abs((ty - outputn)))); end
然后是PSO部分:
%%基於多域PSO_RBF的6R機械臂逆運動學求解的研究 clear; close; clc; %定義BP參數: % input_num:輸入層節點數; % output_num:輸出層節點數; % hidden_num:隱含層節點數; % inputn:網絡輸入; % outputn:網絡輸出; %定義PSO參數: % max_iters:算法最大迭代次數 % w:粒子更新權值 % c1,c2:為粒子群更新學習率 % m:粒子長度,為BP中初始W、b的長度總和 % n:粒子群規模 % gbest:到達最優位置的粒子 format long input_num = 3; output_num = 3; hidden_num = 25; max_iters =10; m = 500; %種群規模 n = input_num * hidden_num + hidden_num + hidden_num * output_num + output_num; %個體長度 w = 0.1; c1 = 2; c2 = 2; %加載網絡輸入(空間任意點)和輸出(對應關節角的值) load('pfile_i2.mat') load('pfile_o2.mat') % inputs_1 = angle_2'; inputs_1 = inputs_2'; outputs_1 = outputs_2'; train_x = inputs_1(:,1:490); % train_y = outputs_1(4:5,1:490); train_y = outputs_1(1:3,1:490); test_x = inputs_1(:,491:500); test_y = outputs_1(1:3,491:500); % test_y = outputs_1(4:5,491:500); [inputn,inputps] = mapminmax(train_x); [outputn,outputps] = mapminmax(train_y); net = newff(inputn,outputn,25); %設置粒子的最小位置與最大位置 % w1閾值設定 for i = 1:input_num * hidden_num MinX(i) = -0.01*ones(1); MaxX(i) = 3.8*ones(1); end % B1閾值設定 for i = input_num * hidden_num + 1:input_num * hidden_num + hidden_num MinX(i) = 1*ones(1); MaxX(i) = 8*ones(1); end % w2閾值設定 for i = input_num * hidden_num + hidden_num + 1:input_num * hidden_num + hidden_num + hidden_num * output_num MinX(i) = -0.01*ones(1); MaxX(i) = 3.8*ones(1); end % B2閾值設定 for i = input_num * hidden_num+ hidden_num + hidden_num * output_num + 1:input_num * hidden_num + hidden_num + hidden_num * output_num + output_num MinX(i) = 1*ones(1); MaxX(i) = 8*ones(1); end %%初始化位置參數 %產生初始粒子位置 pop = rands(m,n); %初始化速度和適應度函數值 V = 0.15 * rands(m,n); BsJ = 0; %對初始粒子進行限制處理,將粒子篩選到自定義范圍內 for i = 1:m for j = 1:input_num * hidden_num if pop(i,j) < MinX(j) pop(i,j) = MinX(j); end if pop(i,j) > MaxX(j) pop(i,j) = MaxX(j); end end for j = input_num * hidden_num + 1:input_num * hidden_num + hidden_num if pop(i,j) < MinX(j) pop(i,j) = MinX(j); end if pop(i,j) > MaxX(j) pop(i,j) = MaxX(j); end end for j = input_num * hidden_num + hidden_num + 1:input_num * hidden_num + hidden_num + hidden_num * output_num if pop(i,j) < MinX(j) pop(i,j) = MinX(j); end if pop(i,j) > MaxX(j) pop(i,j) = MaxX(j); end end for j = input_num * hidden_num+ hidden_num + hidden_num * output_num + 1:input_num * hidden_num + hidden_num + hidden_num * output_num + output_num if pop(i,j) < MinX(j) pop(i,j) = MinX(j); end if pop(i,j) > MaxX(j) pop(i,j) = MaxX(j); end end end %評估初始粒子 for s = 1:m indivi = pop(s,:); fitness = BP_fit(indivi,input_num,hidden_num,output_num,net,inputn,outputn); BsJ = fitness; %調用適應度函數,更新每個粒子當前位置 Error(s,:) = BsJ; %儲存每個粒子的位置,即BP的最終誤差 end [OderEr,IndexEr] = sort(Error);%將Error數組按升序排列 Errorleast = OderEr(1); %記錄全局最小值 for i = 1:m %記錄到達當前全局最優位置的粒子 if Error(i) == Errorleast gbest = pop(i,:); break; end end ibest = pop; %當前粒子群中最優的個體,因為是初始粒子,所以最優個體還是個體本身 for kg = 1:max_iters %迭代次數 for s = 1:m %個體有52%的可能性變異 for j = 1:n %粒子長度 for i = 1:m %種群規模,變異是針對某個粒子的某一個值的變異 if rand(1)<0.04 pop(i,j) = rands(1); end end end %r1,r2為粒子群算法參數 r1 = rand(1); r2 = rand(1); %個體位置和速度更新 V(s,:) = w * V(s,:) + c1 * r1 * (ibest(s,:)-pop(s,:)) + c2 * r2 * (gbest(1,:)-pop(s,:)); pop(s,:) = pop(s,:) + 0.3 * V(s,:); %對更新的位置進行判斷,超過設定的范圍就處理下。粒子中不同的值對應不同的范圍 for j = 1:input_num * hidden_num if pop(s,j) < MinX(j) pop(s,j) = MinX(j); end if pop(s,j) > MaxX(j) pop(s,j) = MaxX(j); end end for j = input_num * hidden_num + 1:input_num * hidden_num + hidden_num if pop(s,j) < MinX(j) pop(s,j) = MinX(j); end if pop(s,j) > MaxX(j) pop(s,j) = MaxX(j); end end for j = input_num * hidden_num + hidden_num + 1:input_num * hidden_num + hidden_num + hidden_num * output_num if pop(s,j) < MinX(j) pop(s,j) = MinX(j); end if pop(s,j) > MaxX(j) pop(s,j) = MaxX(j); end end for j = input_num * hidden_num+ hidden_num + hidden_num * output_num + 1:input_num * hidden_num + hidden_num + hidden_num * output_num + output_num if pop(i,j) < MinX(j) pop(i,j) = MinX(j); end if pop(i,j) > MaxX(j) pop(i,j) = MaxX(j); end end %更新后的每個個體適應度值 BsJ = BP_fit(indivi,input_num,hidden_num,output_num,net,inputn,outputn); error(s,:) = BsJ; %根據適應度值對個體最優和群體最優進行更新 if error(s)<Error(s) ibest(s,:) = pop(s,:); Error(s,:) = error(s); end %更新全局最優粒子以及最小誤差 if error(s)<Errorleast gbest(s,:) = pop(s,:); Errorleast = error(s); end end Best(kg,:) = Errorleast; end %plot(Best); save pfile_gbest gbest;
最后是利用訓練好的最優粒子去訓練網絡:
clear clc close; load pfile_gbest; input_num = 3; output_num = 3; hidden_num = 25; w1 = gbest(1:input_num * hidden_num); B1 = gbest(input_num * hidden_num + 1:input_num * hidden_num + hidden_num); w2 = gbest(input_num * hidden_num + hidden_num + 1:input_num * hidden_num... + hidden_num + hidden_num * output_num); B2 = gbest(input_num * hidden_num+ hidden_num + hidden_num * output_num + 1:... input_num * hidden_num + hidden_num + hidden_num * output_num + output_num); net.iw{1,1} = reshape(w1,hidden_num,input_num); net.lw{2,1} = reshape(w2,output_num,hidden_num); net.b{1} = reshape(B1,hidden_num,1); net.b{2} = B2'; load('pfile_i2.mat') % load('pfile_a2.mat') load('pfile_o2.mat') % inputs_1 = angle_2'; inputs_1 = inputs_2'; outputs_1 = outputs_2'; train_x = inputs_1(:,1:490); % train_y = outputs_1(4:5,1:490); train_y = outputs_1(1:3,1:490); test_x = inputs_1(:,491:500); test_y = outputs_1(1:3,491:500); % test_y = outputs_1(4:5,491:500); [inputn,inputps] = mapminmax(train_x); [outputn,outputps] = mapminmax(train_y); %建立BP網絡 net.trainParam.epochs = 200; net.trainParam.lr = 0.05; net.trainParam.goal = 0.000001; net = newff(inputn,outputn,25); [net,per2] = train(net,inputn,outputn); inputn_test = mapminmax('apply',test_x,inputps); ty = sim(net,inputn_test); net_J = mapminmax('reverse',ty,outputps); error = abs(test_y - net_J);
水平有限,希望能給大家參考一下..........