library(data.table)
library(randomForest)
data <- iris
str(data)
#交叉驗證,使用rf預測sepal.length
k = 5
data$id <- sample(1:k, nrow(data), replace = TRUE)
list <- 1:k
# 每次迭代的預測用數據框,測試用數據框
# the folds
prediction <- data.table()
testsetCopy <- data.table()
# 寫一個進度條,用來了解CV的進度
progress.bar <- create_progress_bar("text")
progress.bar$init(k)
#k層的函數
for(i in 1:k){
# 刪除id為i的行,創建訓練集
# 選id為i的行,創建訓練集
trainingset <- subset(data, id %in% list[-i])
testset <- subset(data, id %in% c(i))
#運行一個隨機森林模型
mymodel <- randomForest(trainingset$Sepal.Length ~ ., data = trainingset, ntree = 100)
#去掉回應列1, Sepal.Length
temp <- as.data.frame(predict(mymodel, testset[,-1]))
# 將迭代出的預測結果添加到預測數據框的末尾
prediction <- rbind(prediction, temp)
# 將迭代出的測試集結果添加到測試集數據框的末尾
# 只保留Sepal Length一列
testsetCopy <- rbind(testsetCopy, as.data.frame(testset[,1]))
progress.bar$step()
}
# 將預測和實際值放在一起
result <- cbind(prediction, testsetCopy[, 1])
names(result) <- c("Predicted", "Actual")
result$Difference <- abs(result$Actual - result$Predicted)
# 用誤差的絕對平均值作為評估
summary(result$Difference)
交叉驗證偽代碼
for each epoch
for each training data instance
propagate error through the network
adjust the weights
calculate the accuracy over training data
for each validation data instance
calculate the accuracy over the validation data
if the threshold validation accuracy is met
exit training
else
continue training
