標准化互信息NMI (Normalized Mutual Information)常用在聚類評估中。
標准化互信息NMI計算步驟


Python 實現
代碼:
''' 利用Python實現NMI計算'''
import math
import numpy as np
from sklearn import metrics
def NMI(A,B):
# 樣本點數
total = len(A)
A_ids = set(A)
B_ids = set(B)
# 互信息計算
MI = 0
eps = 1.4e-45
for idA in A_ids:
for idB in B_ids:
idAOccur = np.where(A==idA) # 輸出滿足條件的元素的下標
idBOccur = np.where(B==idB)
idABOccur = np.intersect1d(idAOccur,idBOccur) # Find the intersection of two arrays.
px = 1.0*len(idAOccur[0])/total
py = 1.0*len(idBOccur[0])/total
pxy = 1.0*len(idABOccur)/total
MI = MI + pxy*math.log(pxy/(px*py)+eps,2)
# 標准化互信息
Hx = 0
for idA in A_ids:
idAOccurCount = 1.0*len(np.where(A==idA)[0])
Hx = Hx - (idAOccurCount/total)*math.log(idAOccurCount/total+eps,2)
Hy = 0
for idB in B_ids:
idBOccurCount = 1.0*len(np.where(B==idB)[0])
Hy = Hy - (idBOccurCount/total)*math.log(idBOccurCount/total+eps,2)
MIhat = 2.0*MI/(Hx+Hy)
return MIhat
if __name__ == '__main__':
A = np.array([1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3])
B = np.array([1,2,1,1,1,1,1,2,2,2,2,3,1,1,3,3,3])
print(NMI(A,B))
print(metrics.normalized_mutual_info_score(A,B)) # 直接調用sklearn中的函數
運行結果:
0.3645617718571898 0.3646247961942429
