python聚類分析


 

#!/usr/bin/env python
#-*- coding:utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn import preprocessing
from scipy.spatial.distance import cdist
from sklearn import metrics

# 讀取原始數據
X = []
y_true = []
id = []

f = open('data/wina.data')
for line in f:
    y = []
    for index,item in enumerate(line.split(",")):
        if index == 0:
            id.append(int(item))
            continue
        y.append(float(item))
    X.append(y)
# 轉化為numpy array
X = np.array(X)
y_true = np.array(id)


min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)


K = range(1, 10)
meandistortions = []
for k in K:
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(X)
    meandistortions.append(sum(np.min(cdist(X, kmeans.cluster_centers_, 'euclidean'), axis=1)) / X.shape[0])
plt.plot(K, meandistortions, 'bx-')
plt.xlabel('k')
plt.ylabel('meandistortions')
plt.title('best K of the model')
plt.show()
n_clusters = 3


cls = KMeans(n_clusters).fit(X)
y_pre = cls.predict(X)

n_samples,n_features=X.shape     #總樣本量,總特征數
inertias = cls.inertia_   #樣本距離最近的聚類中心的總和
adjusted_rand_s=metrics.adjusted_rand_score(y_true,y_pre)           #調整后的蘭德指數
homogeneity_s=metrics.homogeneity_score(y_true,y_pre)               #同質化得分
silhouette_s=metrics.silhouette_score(X,y_pre,metric='euclidean')   #平均輪廓系數
print("蘭德指數ART",adjusted_rand_s)
print("同質化得分homo",homogeneity_s)
print("平均輪廓系數",silhouette_s)

centers=cls.cluster_centers_  #各類別中心

colors=['#ff0000','#00ff00','#0000ff']   #設置不同類別的顏色
plt.figure()    #建立畫布
for i in range(n_clusters):    #循環讀取類別
    index_sets=np.where(y_pre==i)  #找到相同類的索引集合、
    cluster=X[index_sets]   #將相同類的數據划分為一個聚類子集
    plt.scatter(cluster[:,0],cluster[:,0],c=colors[i],marker='.')   #展示聚類子集內的樣本點
    plt.plot(centers[i][0],centers[i][0],'*',markerfacecolor=colors[i],markeredgecolor='k',markersize=6)
plt.show()

 

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM