實驗中的Friedman檢驗方法代碼實現,代碼如下:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# def friedman(n,k,rank_matrix):
# sumr = sum(list(map(lambda x:np.mean(x) ** 2, rank_matrix.T)))
# result = 12 * n / (k * (k + 1)) * (sumr - k * (k + 1) ** 2 / 4)
# result = (n - 1) * result / (n * (k - 1) - result)
# return result
def friedman(n,k,data_matrix): #n:數據集 k:算法個數,data是csv格式,n行k列
hang, lie = data_matrix.shape
print(hang)
print(lie)
print(data_matrix)
data_matrix = data_matrix.values
XuZhi_mean = list()
for i in range(lie):
print(data_matrix[:,i])
XuZhi_mean.append(data_matrix[:,i].mean())
print(XuZhi_mean) #這里輸出平均序值
sum_mean = np.array(XuZhi_mean)
sum_ri2_mean = (sum_mean ** 2).sum()
result_Tx2 = (12 * n) * (sum_ri2_mean - ((k * (k + 1) ** 2) / 4))/ (k * (k + 1))
result_Tf = (n - 1) * result_Tx2 / (n * (k - 1) - result_Tx2)
return result_Tf
#用法
result = friedman(n,k,data_matrix)
算法間是否存在顯著性差異需要將result與臨界值作比較,臨界值查表可得。
對於k個算法,n個數據集,查表中(k-1, (n-1)*(k-1))處的值即為臨界值
