import pandas as pd
import numpy as np
# x = pd.DataFrame([[2000,0.732,0.836,0.628,0.743], [2001,0.758,0.883,0.688,0.787], [2002,0.859,0.914,0.781,0.929],[2003,1.0125,1.0440,1.0237,0.9847],[2004,1.2356,1.1069,1.2833,1.2363],[2005,1.4013,1.2152,1.5405,1.3182]])
# x=pd.read_excel('D:\date\winequality\winequality-red1.xlsx')
csv_file = "D:\date\winequality\winequality-red1.csv"
csv_data = pd.read_csv(csv_file, low_memory=False) # 防止彈出警告
x= pd.DataFrame(csv_data)
x=x.iloc[:,1:].T
# 1、數據均值化處理
x_mean=x.mean(axis=1)
for i in range(x.index.size):
x.iloc[i,:] = x.iloc[i,:]/x_mean[i]
# x.iloc[i,:] = x.iloc[i,:]/x_mean[i] #i或者i+1不太明晰
# 2、提取參考隊列和比較隊列
ck=x.iloc[0,:]
cp=x.iloc[1:,:]
# 比較隊列與參考隊列相減
t=pd.DataFrame()
for j in range(cp.index.size):
temp=pd.Series(cp.iloc[j,:]-ck)
t=t.append(temp,ignore_index=True)
#求最大差和最小差
mmax=t.abs().max().max()
mmin=t.abs().min().min()
rho=0.5
#3、求關聯系數
ksi=((mmin+rho*mmax)/(abs(t)+rho*mmax))
#4、求關聯度
r=ksi.sum(axis=1)/ksi.columns.size
#5、關聯度排序,得到結果r3>r2>r1
result=r.sort_values(ascending=False)
print(result)