python fuzzy c-means demo


摘自:http://pythonhosted.org/scikit-fuzzy/auto_examples/plot_cmeans.html#example-plot-cmeans-py,加入了自己的理解!

# coding: utf-8

from __future__ import division, print_function
import numpy as np
import matplotlib.pyplot as plt
import skfuzzy as fuzz

colors = ['b', 'orange', 'g', 'r', 'c', 'm', 'y', 'k', 'Brown', 'ForestGreen']
#plt.figure(1)

# Define three cluster centers
centers = [[4, 2],
           [1, 7],
           [5, 6]]

# Define three cluster sigmas in x and y, respectively
sigmas = [[0.8, 0.3],
          [0.3, 0.5],
          [1.1, 0.7]]

# Generate test data
np.random.seed(42)  # Set seed for reproducibility
xpts = np.zeros(1)
ypts = np.zeros(1)
labels = np.zeros(1)

# 偽造3個高斯分布,以u和sigma作為特征分布
for i, ((xmu, ymu), (xsigma, ysigma)) in enumerate(zip(centers, sigmas)):
    xpts = np.hstack((xpts, np.random.standard_normal(200) * xsigma + xmu))
    ypts = np.hstack((ypts, np.random.standard_normal(200) * ysigma + ymu))
    labels = np.hstack((labels, np.ones(200) * i))

# Visualize the test data
fig0, ax0 = plt.subplots()
for label in range(3):
    ax0.plot(xpts[labels == label], ypts[labels == label], '.',
             color=colors[label])
ax0.set_title('Test data: 200 points x3 clusters.')
#plt.show()

# Set up the loop and plot
alldata = np.vstack((xpts, ypts))

#print alldata 

# Regenerate fuzzy model with 3 cluster centers - note that center ordering
# is random in this clustering algorithm, so the centers may change places
# 使用FCM的模型訓練,注意,聚集的結果在cntr里,我的機器上運行結果為:
'''
[ 5.26724628  6.14961671]
[ 1.01594428  6.98518109]
[ 3.95895105  2.05785626]
'''
cntr, u_orig, _, _, _, _, _ = fuzz.cluster.cmeans(
    alldata, 3, 2, error=0.005, maxiter=1000)

# Show 3-cluster model
fig2, ax2 = plt.subplots()
ax2.set_title('Trained model')
for j in range(3):
    ax2.plot(alldata[0, u_orig.argmax(axis=0) == j],
             alldata[1, u_orig.argmax(axis=0) == j], 'o',
             label='series ' + str(j))

# 將聚類的中心點標記在圖上             
for pt in cntr:
    print (pt)
    ax2.plot(pt[0], pt[1], 's')

ax2.legend()


# Generate uniformly sampled data spread across the range [0, 10] in x and y
newdata = np.random.uniform(0, 1, (1100, 2)) * 10

# Predict new cluster membership with `cmeans_predict` as well as
# `cntr` from the 3-cluster model
u, u0, d, jm, p, fpc = fuzz.cluster.cmeans_predict(newdata.T, cntr, 2, error=0.005, maxiter=1000)

# Plot the classified uniform data. Note for visualization the maximum
# membership value has been taken at each point (i.e. these are hardened,
# not fuzzy results visualized) but the full fuzzy result is the output
# from cmeans_predict.

'''
按照行求max的index,index范圍為0~2
u長成這樣子:
[[ 0.54256489  0.0631068   0.00291562 ...,  0.15580619  0.17543005
   0.15652909]
 [ 0.35176643  0.02712891  0.99530463 ...,  0.2065651   0.31637093
   0.22570475]
 [ 0.10566868  0.90976429  0.00177975 ...,  0.63762871  0.50819901
   0.61776617]]
最后返回像:
[0 2 1 ..., 2 2 2]
其實,u就是聚類的概率啊,特定列的行數值求和就是1哇!
下面返回的cluster_membership 其實就是聚類的結果,0表示聚在類別0,2表示聚集在類別2,...!
補充:(np.argmax([[1,2,3],[4,1,4],[2,8,9]], axis=0)) == [1,2,2]
'''
cluster_membership = np.argmax(u, axis=0)  # Hardening for visualization


fig3, ax3 = plt.subplots()
ax3.set_title('Random points classifed according to known centers')

# 將聚類預測的三類結果繪圖
for j in range(3):
    ax3.plot(newdata[cluster_membership == j, 0],
             newdata[cluster_membership == j, 1], 'o',
             label='series ' + str(j))
ax3.legend()
plt.show()

預測:

 

經過測試,是可以處理三維數據聚類的:

    centers = [[4, 2, 100],
           [1, 7, 200],
           [5, 6, 300]]
    sigmas = [[0.8, 0.3, 0.1],
              [0.3, 0.5, 0.2],
              [1.1, 0.7, 0.9]]
    # Generate test data
    np.random.seed(42)  # Set seed for reproducibility
    xpts = np.zeros(1)
    ypts = np.zeros(1)
    zpts = np.zeros(1)
    labels = np.zeros(1)

    print "*********************"
    for i, ((xmu, ymu, zmu), (xsigma, ysigma, zsigma)) in enumerate(zip(centers, sigmas)):
        xpts = np.hstack((xpts, np.random.standard_normal(200) * xsigma + xmu))
        ypts = np.hstack((ypts, np.random.standard_normal(200) * ysigma + ymu))
        zpts = np.hstack((zpts, np.random.standard_normal(200) * zsigma + zmu))
        labels = np.hstack((labels, np.ones(200) * i))
    alldata = np.vstack((xpts, ypts, zpts))
    cntr, u_orig, _, _, _, _, _ = fuzz.cluster.cmeans(alldata, 3, 2, error=0.005, maxiter=1000)

    print "*********************"
    for pt in cntr:
        print (pt)
    print "*********************"

結果為:

*********************
*********************
[  3.95666441   2.02029976  99.72164117]
[   5.00568412    6.05765152  300.09246382]
[   1.00252037    7.06293498  199.99320473]
*********************

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM