python-k中心聚類代碼


# -*- coding: utf-8 -*-
"""
Created on Mon Feb 18 14:59:53 2019

@author: Administrator
"""

#from pyclust import KMedoids #保留,用於切換函數
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import random

def im_txt(file):
    """
    讀取數據
    """
    data=np.loadtxt(file,dtype=np.float32)
    return data

def out_txt(outfile,line):
    f=open(outfile,"w")
    try:
        for i in line:
            f.write(str(i))
            f.write("\n")
        f.close()
    except:
        f.close()
        print("分類數據未保存!!!!")

def initianlize_centers(n_clusters):
    """初始化,生成隨機聚類中心"""
    global n_data
    centers=[]  #聚類中心位置信息例:[101,205,5,3,7]
    i=0
    while i<n_clusters:
        temp=random.randint(0,n_data-1)
        if temp not in centers:
            centers.append(temp)
            i=i+1
        else:
            pass
    return centers

def clus_process(centers,data):
    """根據聚類中心進行聚類"""
    result_clusters=[]
    centers=np.array(centers)
    """遍歷每個樣本"""
    for i in range(0,len(data)):
        uni_temp=[] #臨時存儲距離數據
        for j in centers:
            temp=np.sqrt(np.sum(np.square(data[i]-data[j])))
            uni_temp.append(temp)
        c_min=min(uni_temp) #距離最小值
        result_clusters.append(uni_temp.index(c_min))  #距離最小值所在位置即為歸屬簇
    return result_clusters

def chose_centers(result_clusters,n_clusters):
    centers=[]
    for i in range(0,n_clusters):  #逐個簇進行隨機
        temp=[]  #記錄每個簇樣本在data中的位置
        for j in range(0,len(result_clusters)):   #遍歷每個樣本 
            if result_clusters[j]==i:     #尋找簇i的樣本
                temp.append(j)
        try:
            c_temp=random.sample(temp,1)   #在樣本中隨機取一個值作為新的聚類中心
        except:
            print("sample bug")
            print(temp)
        centers.append(c_temp[0])
        
    return centers

def count_E(centers_new,data,result_clusters_new):
    """計算價值函數"""
    E=0
    for i in range(0,len(centers_new)):
        for j in range(0,len(data)):
            if result_clusters_new[j]==i:
                temp=np.sqrt(np.sum(np.square(data[j]-data[centers_new[i]])))
                E+=temp
    return E
            
def KMedoids(n_clusters,data,max_iter):
    """初始化"""
    centers=initianlize_centers(n_clusters)
    """根據隨機中心進行聚類"""
    result_clusters=clus_process(centers,data)
    """重新選擇聚類中心,並比較"""
    xie=0  #計數器
    E=5*5000
    """
    _old:用來記錄上一次的聚類結果
    _new:新一次聚類的結果
    無old和new:輸出結果
    """
    while xie<=max_iter:
        centers_new=chose_centers(result_clusters,n_clusters)  #新的聚類中心
        result_clusters_new=clus_process(centers,data)  #新的聚類結果
        """計算價值函數E"""
        E_new=count_E(centers_new,data,result_clusters_new)
        """價值函數變小,則更新聚類中心和聚類結果"""
        if E_new<E:
           centers=centers_new
           result_clusters=result_clusters_new
           E=E_new
           print("價值函數為:%s"%E)
           print("聚類中心:%s"%centers)
           xie=0
        """閾值計數器"""
        xie=xie+1
        if xie%10==0 and xie!=0:
            print(xie)

    return centers,result_clusters


def randomcolor(x):
    """隨機生成十六進制編碼"""
    colors=[]
    i=0

    while i<x:
        colorArr = ['1','2','3','4','5','6','7','8','9','A','B','C','D','E','F']
        color = ""
        j=0
        while j<6:
            color += colorArr[random.randint(0,14)]
            j=j+1
        color="#"+color
        if color in colors:
            continue
        else:
            colors.append(color)
            i=i+1
    return colors
                    
def main():
    global n_data
    file="text.txt"
    data=im_txt(file)
    n_data=len(data)
    '''准備可視化需要的降維數據'''
    data_TSNE = TSNE(learning_rate=100,n_iter=5000).fit_transform(data)
    
    '''對不同的k進行試探性K-medoids聚類並可視化'''
    plt.figure(figsize=(12,8))
    """聚類數"""
    k=18  ###
    centers,result_clusters = KMedoids(k,data,10) ###
    color=randomcolor(k)
    colors = ([color[k] for k in result_clusters])
    plt.subplot(222)
    plt.rcParams['figure.dpi'] = 300
    plt.scatter(data_TSNE[:,0],data_TSNE[:,1],s=10,c=colors)
    plt.title('K-medoids Resul of '.format(str(k)))
    out_txt("分類數數(ture).txt",result_clusters)

main()

  


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM