# -*- coding: utf-8 -*- """ Created on Mon Feb 18 14:59:53 2019 @author: Administrator """ #from pyclust import KMedoids #保留,用於切換函數 import numpy as np from sklearn.manifold import TSNE import matplotlib.pyplot as plt import random def im_txt(file): """ 讀取數據 """ data=np.loadtxt(file,dtype=np.float32) return data def out_txt(outfile,line): f=open(outfile,"w") try: for i in line: f.write(str(i)) f.write("\n") f.close() except: f.close() print("分類數據未保存!!!!") def initianlize_centers(n_clusters): """初始化,生成隨機聚類中心""" global n_data centers=[] #聚類中心位置信息例:[101,205,5,3,7] i=0 while i<n_clusters: temp=random.randint(0,n_data-1) if temp not in centers: centers.append(temp) i=i+1 else: pass return centers def clus_process(centers,data): """根據聚類中心進行聚類""" result_clusters=[] centers=np.array(centers) """遍歷每個樣本""" for i in range(0,len(data)): uni_temp=[] #臨時存儲距離數據 for j in centers: temp=np.sqrt(np.sum(np.square(data[i]-data[j]))) uni_temp.append(temp) c_min=min(uni_temp) #距離最小值 result_clusters.append(uni_temp.index(c_min)) #距離最小值所在位置即為歸屬簇 return result_clusters def chose_centers(result_clusters,n_clusters): centers=[] for i in range(0,n_clusters): #逐個簇進行隨機 temp=[] #記錄每個簇樣本在data中的位置 for j in range(0,len(result_clusters)): #遍歷每個樣本 if result_clusters[j]==i: #尋找簇i的樣本 temp.append(j) try: c_temp=random.sample(temp,1) #在樣本中隨機取一個值作為新的聚類中心 except: print("sample bug") print(temp) centers.append(c_temp[0]) return centers def count_E(centers_new,data,result_clusters_new): """計算價值函數""" E=0 for i in range(0,len(centers_new)): for j in range(0,len(data)): if result_clusters_new[j]==i: temp=np.sqrt(np.sum(np.square(data[j]-data[centers_new[i]]))) E+=temp return E def KMedoids(n_clusters,data,max_iter): """初始化""" centers=initianlize_centers(n_clusters) """根據隨機中心進行聚類""" result_clusters=clus_process(centers,data) """重新選擇聚類中心,並比較""" xie=0 #計數器 E=5*5000 """ _old:用來記錄上一次的聚類結果 _new:新一次聚類的結果 無old和new:輸出結果 """ while xie<=max_iter: centers_new=chose_centers(result_clusters,n_clusters) #新的聚類中心 result_clusters_new=clus_process(centers,data) #新的聚類結果 """計算價值函數E""" E_new=count_E(centers_new,data,result_clusters_new) """價值函數變小,則更新聚類中心和聚類結果""" if E_new<E: centers=centers_new result_clusters=result_clusters_new E=E_new print("價值函數為:%s"%E) print("聚類中心:%s"%centers) xie=0 """閾值計數器""" xie=xie+1 if xie%10==0 and xie!=0: print(xie) return centers,result_clusters def randomcolor(x): """隨機生成十六進制編碼""" colors=[] i=0 while i<x: colorArr = ['1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'] color = "" j=0 while j<6: color += colorArr[random.randint(0,14)] j=j+1 color="#"+color if color in colors: continue else: colors.append(color) i=i+1 return colors def main(): global n_data file="text.txt" data=im_txt(file) n_data=len(data) '''准備可視化需要的降維數據''' data_TSNE = TSNE(learning_rate=100,n_iter=5000).fit_transform(data) '''對不同的k進行試探性K-medoids聚類並可視化''' plt.figure(figsize=(12,8)) """聚類數""" k=18 ### centers,result_clusters = KMedoids(k,data,10) ### color=randomcolor(k) colors = ([color[k] for k in result_clusters]) plt.subplot(222) plt.rcParams['figure.dpi'] = 300 plt.scatter(data_TSNE[:,0],data_TSNE[:,1],s=10,c=colors) plt.title('K-medoids Resul of '.format(str(k))) out_txt("分類數數(ture).txt",result_clusters) main()