%% 基於隨機森林思想的組合分類器設計
%% 清空環境變量
close all;
clear;
clc;
%% 導入數據
Data=load('E:\study\研究生\實驗\dataset\new_housing.txt');
Label=load('E:\study\研究生\實驗\dataset\new_housingLabel.txt');
sum_Acc=0;
sum_MCC=0;
sum_F_measure=0;
sum_G_mean=0;
sum_AUC=0;
[M,N]=size(Data);%數據集為一個M*N的矩陣,其中每一行代表一個樣本
indices=crossvalind('Kfold',M,5);%進行隨機分包
for k=1:5 %交叉驗證k=5,5個包輪流作為測試集
test = (indices == k); %獲得test集元素在數據集中對應的單元編號
train = ~test;%train集元素的編號為非test元素的編號
train_data=Data(train,:);%從數據集中划分出train樣本的數據
train_target=Label(train,:);%獲得樣本集的測試目標,在本例中是實際分類情況
test_data=Data(test,:);%test樣本集
test_target=Label(test,:);
%模型與預測結果
%% 創建隨機森林分類器
model = classRF_train(train_data,train_target);
%% 仿真測試
[Predict_label,votes] = classRF_predict(test_data,model);
%預測結果概率輸出
prob_estimates=votes/500;%500為決策樹數目
output=prob_estimates(:,2);%預測為正類的概率
%調整閾值進行預測和混淆矩陣的計算
T=0.5;
max_MCC=0;%記錄最大的MCC值
evaluation=[0,0,0,0];%四個評估指標存放地,初始化為全為0
for T=0.1:0.01:0.9%步長0.01
TP=0;
FN=0;
FP=0;
TN=0;
[r,~]=size(test_target);
for i=1:r%樣本個數
if test_target(i,1)==1&&prob_estimates(i,2)>=T%本為正類,大於等於T則預測為正類 %正類的預測概率在prob_estimates第2列
TP=TP+1;
elseif test_target(i,1)==1&&prob_estimates(i,2)<T%本為正類,小於T則預測為負類
FN=FN+1;
elseif test_target(i,1)==-1&&prob_estimates(i,2)>=T%本為負類,大於等於T則預測為正類
FP=FP+1;
else %即tsetLabel(i,1)==-1&&prob_estimates(i,1)<T%本為負類小於T則預測為負類
TN=TN+1;
end
end
TP
FN
FP
TN
%Sen=TP/(TP+FN);
%Spe=TN/(TN+FP);
MCC=(TP*TN-FP*FN)/sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
if MCC>max_MCC%選擇MCC值最大的那一組評估指標值
max_MCC=MCC;
Precision=TP/(TP+FP);
Recall=TP/(TP+FN);
TPR=TP/(TP+FN);
TNR=TN/(TN+FP);
Acc=(TP+TN)/(TP+TN+FP+FN);
F_measure=(2*Precision*Recall)/(Precision+Recall);
G_mean=sqrt(TPR*TNR);
evaluation(1,1)=max_MCC;
evaluation(1,2)=Acc;
evaluation(1,3)=F_measure;
evaluation(1,4)= G_mean;
end
end
auc=AUC(test_target,output);%每一次分類結束后進行一次計算
sum_AUC=sum_AUC+auc;
sum_MCC=sum_MCC+ evaluation(1,1);
sum_Acc=sum_Acc+ evaluation(1,2);
sum_F_measure=sum_F_measure+ evaluation(1,3);
sum_G_mean=sum_G_mean+evaluation(1,4);
end
avg_Acc=sum_Acc/5
avg_MCC= sum_MCC/5
avg_F_measure=sum_F_measure/5
avg_G_mean=sum_G_mean/5
avg_AUC=sum_AUC/5