# -*- coding: utf-8 -*-
"""
Created on Mon Feb 18 14:59:53 2019
@author: Administrator
"""
#from pyclust import KMedoids #保留,用於切換函數
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import random
def im_txt(file):
"""
讀取數據
"""
data=np.loadtxt(file,dtype=np.float32)
return data
def out_txt(outfile,line):
f=open(outfile,"w")
try:
for i in line:
f.write(str(i))
f.write("\n")
f.close()
except:
f.close()
print("分類數據未保存!!!!")
def initianlize_centers(n_clusters):
"""初始化,生成隨機聚類中心"""
global n_data
centers=[] #聚類中心位置信息例:[101,205,5,3,7]
i=0
while i<n_clusters:
temp=random.randint(0,n_data-1)
if temp not in centers:
centers.append(temp)
i=i+1
else:
pass
return centers
def clus_process(centers,data):
"""根據聚類中心進行聚類"""
result_clusters=[]
centers=np.array(centers)
"""遍歷每個樣本"""
for i in range(0,len(data)):
uni_temp=[] #臨時存儲距離數據
for j in centers:
temp=np.sqrt(np.sum(np.square(data[i]-data[j])))
uni_temp.append(temp)
c_min=min(uni_temp) #距離最小值
result_clusters.append(uni_temp.index(c_min)) #距離最小值所在位置即為歸屬簇
return result_clusters
def chose_centers(result_clusters,n_clusters):
centers=[]
for i in range(0,n_clusters): #逐個簇進行隨機
temp=[] #記錄每個簇樣本在data中的位置
for j in range(0,len(result_clusters)): #遍歷每個樣本
if result_clusters[j]==i: #尋找簇i的樣本
temp.append(j)
try:
c_temp=random.sample(temp,1) #在樣本中隨機取一個值作為新的聚類中心
except:
print("sample bug")
print(temp)
centers.append(c_temp[0])
return centers
def count_E(centers_new,data,result_clusters_new):
"""計算價值函數"""
E=0
for i in range(0,len(centers_new)):
for j in range(0,len(data)):
if result_clusters_new[j]==i:
temp=np.sqrt(np.sum(np.square(data[j]-data[centers_new[i]])))
E+=temp
return E
def KMedoids(n_clusters,data,max_iter):
"""初始化"""
centers=initianlize_centers(n_clusters)
"""根據隨機中心進行聚類"""
result_clusters=clus_process(centers,data)
"""重新選擇聚類中心,並比較"""
xie=0 #計數器
E=5*5000
"""
_old:用來記錄上一次的聚類結果
_new:新一次聚類的結果
無old和new:輸出結果
"""
while xie<=max_iter:
centers_new=chose_centers(result_clusters,n_clusters) #新的聚類中心
result_clusters_new=clus_process(centers,data) #新的聚類結果
"""計算價值函數E"""
E_new=count_E(centers_new,data,result_clusters_new)
"""價值函數變小,則更新聚類中心和聚類結果"""
if E_new<E:
centers=centers_new
result_clusters=result_clusters_new
E=E_new
print("價值函數為:%s"%E)
print("聚類中心:%s"%centers)
xie=0
"""閾值計數器"""
xie=xie+1
if xie%10==0 and xie!=0:
print(xie)
return centers,result_clusters
def randomcolor(x):
"""隨機生成十六進制編碼"""
colors=[]
i=0
while i<x:
colorArr = ['1','2','3','4','5','6','7','8','9','A','B','C','D','E','F']
color = ""
j=0
while j<6:
color += colorArr[random.randint(0,14)]
j=j+1
color="#"+color
if color in colors:
continue
else:
colors.append(color)
i=i+1
return colors
def main():
global n_data
file="text.txt"
data=im_txt(file)
n_data=len(data)
'''准備可視化需要的降維數據'''
data_TSNE = TSNE(learning_rate=100,n_iter=5000).fit_transform(data)
'''對不同的k進行試探性K-medoids聚類並可視化'''
plt.figure(figsize=(12,8))
"""聚類數"""
k=18 ###
centers,result_clusters = KMedoids(k,data,10) ###
color=randomcolor(k)
colors = ([color[k] for k in result_clusters])
plt.subplot(222)
plt.rcParams['figure.dpi'] = 300
plt.scatter(data_TSNE[:,0],data_TSNE[:,1],s=10,c=colors)
plt.title('K-medoids Resul of '.format(str(k)))
out_txt("分類數數(ture).txt",result_clusters)
main()