GRA算法步驟
step1:確定參考序列 \(x_0\) 和比較序列 \(x_i\);
step2:對原始數據變換,將第 i 個屬性的第 k 個數據 \(x_i(k)\) 轉換為 \(y_i(k)\) 。方法有:
- 初值變換: \(y_i(k) = \frac{x_i(k)}{x_i(1)}\).
- 均值變換: \(y_i(k) = \frac{x_i(k)}{\overline{x}_i}\).
- 百分比變換(變換成小於1的數):\(y_i(k) = \frac{x_i(k)}{\max_k {x}_i(k)}\) .
- 倍數變換(變換成大於1的數):\(y_i(k) = \frac{x_i(k)}{\min_k {x}_i(k)}\) .
- 歸一化變換:\(y_i(k) = \frac{x_i(k)}{∑_k {x}_i(k)}\) .
- 區間變換:\(y_i(k) = \frac{x_i(k) - \min_k {x}_i(k)}{\max_k {x}_i(k) - \min_k {x}_i(k)}\) .
step3:求絕對差序列,即比較序列與參考序列的差值 $△_{0i}(k) =|x_0(k)-x_i(k)| $ .
step4:使用下面公式計算灰關聯系數,\(γ(x_0(k),x_i(k))=\frac{△_{min}+ρ△_{max}}{△_{0i}(k)+ρ△_{max}}\),一般取分辨系數ρ=0.5。
step5:使用下面公式計算灰關聯度,$γ(x_0,x_i)=\frac{1}{N}∑_{k=1}^N w_kγ(x_0(k),x_i(k)) $ ,wk為第k條數據權重。
step6:得到關鍵屬性
- 如果參考序列 \(\{x_0\}\) 為最優值數據列,那么灰關聯度 \(γ(x_0,x_i)\) 越大,則第 i 個屬性越好;
- 如果參考序列 \(\{x_0\}\) 為最劣值數據列,那么灰關聯度 \(γ(x_0,x_i)\) 越大,則第 i 個屬性越不好;
GRA算法的Python實現
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.sparse import issparse
## 矩陣檢查,必須是數據型,必須是二維的
def check_array(array, dtype="numeric"):
if issparse(array):
raise TypeError('PCA does not support sparse input.') # 不接受稀疏矩陣
if array.ndim != 2:
raise ValueError('Expected 2D array.') # 只接受二維矩陣
array = np.array(array, dtype=np.float64)
return array
class GRA():
def __init__(self, k=0, norm_method='norm', rho=0.5):
self.k = k
self.norm_method = norm_method
self.rho = rho
def fit(self, X, k=None):
''' 與單個參考序列比較 '''
if not k==None: self.k = k
X = check_array(X)
Y = self.__normalization(X) #歸一化
self.r = self.__calculation_relevancy(Y)
return self.r
def fit_all(self, X):
''' 所有序列依次為參考序列,互相比較 '''
X = check_array(X)
self.data = np.zeros([X.shape[1], X.shape[1]])
for k in range(X.shape[1]):
self.k = k
Y = self.__normalization(X) #歸一化
r = self.__calculation_relevancy(Y)
self.data[:,k] = r
return self.data
def __normalization(self, X):
if self.norm_method == 'mean':
Y = self.__mean(X)
elif self.norm_method == 'initial':
Y = self.__initial(X)
elif self.norm_method == 'norm':
Y = self.__norm(X)
elif self.norm_method == 'section':
Y = self.__section(X)
elif self.norm_method == 'max':
Y = self.__max(X)
elif self.norm_method == 'min':
Y = self.__min(X)
else:
raise ValueError("Unrecognized norm_method='{0}'".format(self.norm_method))
print(Y)
return Y
def __norm(self, X):
Xsum = np.sum(X, axis=0)
for i in range(X.shape[1]):
X[:, i] = X[:, i]/Xsum[i]
return X
def __mean(self, X):
''' 平均值歸一化 '''
Xmean = np.mean(X, axis=0, keepdims=True) #每一列的平均
for i in range(X.shape[1]):
X[:, i] = X[:, i]/Xmean[0][i]
return X
def __initial(self, X):
''' 初值歸一化 '''
X0 = X[0,:]
for i in range(X.shape[1]):
X[:, i] = X[:, i]/X0[i]
return X
def __max(self, X):
''' 百分比歸一化 '''
Xmax = np.max(X, axis=0)
for i in range(X.shape[1]):
X[:, i] = X[:, i]/Xmax[i]
return X
def __min(self, X):
''' 倍數歸一化 '''
Xmin = np.min(X, axis=0)
for i in range(X.shape[1]):
X[:, i] = X[:, i]/(Xmin[i]+0.000001) #避免除數為零
return X
def __section(self, X):
Xmax = np.max(X, axis=0)
Xmin = np.min(X, axis=0)
for i in range(X.shape[1]):
X[:, i] = (X[:, i]-Xmin[i])/(Xmax[i]-Xmin[i])
return X
def __calculation_relevancy(self, X):
''' 計算關聯度 '''
# 計算參考序列與比較序列差值
Delta = np.zeros((X.shape))
for i in range(X.shape[1]):
Delta[:, i] = np.fabs(X[:, i]-X[:, self.k])
# 計算關聯系數
t = np.delete(Delta, self.k, axis=1)
mmax=t.max().max()
mmin=t.min().min()
ksi=((mmin+self.rho*mmax)/(Delta+self.rho*mmax))
# 計算關聯度
r = ksi.sum(axis=0) / ksi.shape[0]
return r
def sort_comparison(self):
idxs = np.argsort(-self.r)
data = []
for idx in idxs:
if idx == self.k: continue
data.append(['第{}個特征'.format(idx), self.r[idx]])
df = pd.DataFrame(
data=np.array(data),
columns=['特征','相關度'],
index=[f"{i+1}" for i in range(len(data))],
)
print('\n與第{}個特征相關度的從大到小排序:'.format(self.k))
print(df)
return df
def ShowGRAHeatMap(self):
''' 灰色關聯結果矩陣可視化 '''
df = pd.DataFrame(
data=self.data,
columns=[f"{i}" for i in range(self.data.shape[1])],
index=[f"{i}" for i in range(self.data.shape[0])],
)
colormap = plt.cm.RdBu
plt.figure()
plt.title('Pearson Correlation of Features')
sns.heatmap(df.astype(float), linewidths=0.1, vmax=1.0, square=True, cmap=colormap, linecolor='white', annot=True)
plt.show()
接口調用
test 1
import numpy as np
X = np.array([[0.732, 0.646, 0.636, 0.598, 0.627],
[0.038, 0.031, 0.042, 0.036, 0.043],
[0.507, 0.451, 0.448, 0.411, 0.122],
[0.048, 0.034, 0.030, 0.030, 0.031],
[183.25, 207.28, 240.98, 290.80, 370.00],
[24.03, 44.98, 62.79, 83.44, 127.22],
[85508, 74313, 85966, 100554, 109804],
[175.87, 175.72, 183.69, 277.11, 521.26],
[10, 13, 13, 1, 1],])
X=X.T # 每一行為一條記錄,每一列為一個特征數據
print(X.shape)
print(X)
gra = GRA(k=0, norm_method='min')
r = gra.fit(X)
print(r)
gra.sort_comparison()
tese 2
from GRA import GRA
import numpy as np
X = np.array([[0.732, 0.646, 0.636, 0.598, 0.627],
[0.038, 0.031, 0.042, 0.036, 0.043],
[0.507, 0.451, 0.448, 0.411, 0.122],
[0.048, 0.034, 0.030, 0.030, 0.031],
[183.25, 207.28, 240.98, 290.80, 370.00],
[24.03, 44.98, 62.79, 83.44, 127.22],
[85508, 74313, 85966, 100554, 109804],
[175.87, 175.72, 183.69, 277.11, 521.26],
[10, 13, 13, 1, 1],])
gra = GRA(norm_method='initial')
data = gra.fit_all(X)
print(data)
gra.ShowGRAHeatMap()