scale的作用
1、中心化: 減去平均值
2、標准化: 除以根方差
test <- c(2,3,4,6) ## 測試數據 a <- scale(test,center = F, scale = F) ## 既不中心化, 也不標准化 a b <- scale(test, center = T, scale = F) ## 中心化, 但是不標准化 b b_test = test - mean(test) ##中心化, 減去平均值進行驗證 b_test c <- scale(test, center = F, scale = T) ## 標准化, 但不中心化 c c_test <- test/sqrt(sum(test^2)/(length(test) - 1)) ##標准化, 除以根方差驗證 c_test d <- scale(test, center = T, scale = T) ## 即中心化, 又標准化 d d_temp <- test - mean(test) ## 中心化, 減去平均值 d_test <- d_temp/sqrt(sum(d_temp^2)/(length(d_temp) - 1)) ## 標准化,除以根方差 d_test
運行過程:
> test <- c(2,3,4,6) > a <- scale(test,center = F, scale = F) > a ## a沒有變化 [,1] [1,] 2 [2,] 3 [3,] 4 [4,] 6 > b <- scale(test, center = T, scale = F) ## 中心化 > b [,1] [1,] -1.75 [2,] -0.75 [3,] 0.25 [4,] 2.25 attr(,"scaled:center") [1] 3.75 > b_test = test - mean(test) ## 減去平均值驗證 > b_test [1] -1.75 -0.75 0.25 2.25 > c <- scale(test, center = F, scale = T) ## 標准化 > c [,1] [1,] 0.4296689 [2,] 0.6445034 [3,] 0.8593378 [4,] 1.2890068 attr(,"scaled:scale") [1] 4.654747 > c_test <- test/sqrt(sum(test^2)/(length(test) - 1)) ## 標准化,除以根方差驗證 > c_test [1] 0.4296689 0.6445034 0.8593378 1.2890068 > d <- scale(test, center = T, scale = T) ## 即中心化, 又標准化 > d [,1] [1,] -1.024695 [2,] -0.439155 [3,] 0.146385 [4,] 1.317465 attr(,"scaled:center") [1] 3.75 attr(,"scaled:scale") [1] 1.707825 > d_temp <- test - mean(test) ## 中心化 > d_test <- d_temp/sqrt(sum(d_temp^2)/(length(d_temp) - 1)) ## 除以根方差,標准化 > d_test [1] -1.024695 -0.439155 0.146385 1.317465
參考:https://blog.csdn.net/hac_kill_you/article/details/120498460