R中cluster中包含多種聚類算法,下面通過某個數據集,進行三種聚類算法的評估
# ============================ # 評估聚類 # # ============================ # 引入fpc包(cluster.stats) library(fpc) # 引入包庫(clara、fanny) library(cluster) #=====調用聚類算法======================================================= # 確定簇心個數 cluster_num <- 3 # 讀取數據 data <- read.csv("data.csv",header = T) # 調用kmeans算法 km <- kmeans(data,cluster_num) # 調用(clara)算法 cl <- clara(data,cluster_num) # 調用模糊C-Means聚類算法 fan <- fanny(data,cluster_num) #=====調用聚類算法======================================================= # 聚類評價統計量 km_stats <- cluster.stats(dist(data), km$cluster) cl_stats <- cluster.stats(dist(data), cl$cluster) fcm_stats <- cluster.stats(dist(data), fan$clustering) # 信息數據框表化 info <- data.frame( Algorithm = c("KMeans", "Clara", "FCM"), Silwidth = c(km_stats$avg.silwidth,cl_stats$avg.silwidth,fcm_stats$avg.silwidth), AverageWithin = c(km_stats$average.within, cl_stats$average.within, fcm_stats$average.within), averageBetween = c(km_stats$average.between, cl_stats$average.between, fcm_stats$average.between), ch = c(km_stats$ch, cl_stats$ch, fcm_stats$ch) ) # 重命名字段 names(info)[2:5] <- c("輪廓系數","簇內平均距離","簇間平均聚類","Calinski和Harabasz指數")

