Python實現PCA


PCA的主要方法是:奇異值分解。

具體內容見如下鏈接:

https://www.cnblogs.com/leftnoteasy/archive/2011/01/19/svd-and-applications.html

數據集

鏈接:https://pan.baidu.com/s/1ObxOER_eXsfZa4qQNHxUsw
提取碼:4zga

代碼

import numpy as np
import pandas as pd
import scipy.io as spio
import matplotlib.pyplot as plt
from scipy import misc      # 圖片操作

def featureNormalize(X):
    n = X.shape[1]
    mu = np.mean(X,axis=0)
    sigma = np.std(X,axis=0)
    
    for i in range(n):
        X[:,i] = (X[:,i] - mu[i]) / sigma[i]
    return X,mu,sigma

def plot_data_2d(X,marker):
    plt.plot(X[:,0],X[:,1],marker) 
    return plt

def drawline(plt,p1,p2,line_type):
    plt.plot(np.array([p1[0],p2[0]]),np.array([p1[1],p2[1]]),line_type) 

def projectData(X_norm,U,K):
    U_reduce = U[:,0:K]
    return np.dot(X_norm,U_reduce)

def display_imageData(imgData):
    sum = 0
    '''
    顯示100個數(若是一個一個繪制將會非常慢,可以將要畫的圖片整理好,放到一個矩陣中,顯示這個矩陣即可)
    - 初始化一個二維數組
    - 將每行的數據調整成圖像的矩陣,放進二維數組
    - 顯示即可
    '''
    m,n = imgData.shape
    width = np.int32(np.round(np.sqrt(n)))
    height = np.int32(n/width);
    rows_count = np.int32(np.floor(np.sqrt(m)))
    cols_count = np.int32(np.ceil(m/rows_count))
    pad = 1
    display_array = -np.ones((pad+rows_count*(height+pad),pad+cols_count*(width+pad)))
    for i in range(rows_count):
        for j in range(cols_count):
            max_val = np.max(np.abs(imgData[sum,:]))
            display_array[pad+i*(height+pad):pad+i*(height+pad)+height,pad+j*(width+pad):pad+j*(width+pad)+width] = imgData[sum,:].reshape(height,width,order="F")/max_val    # order=F指定以列優先,在matlab中是這樣的,python中需要指定,默認以行
            sum += 1
    plt.figure(figsize=(10,8))        
    plt.imshow(display_array,cmap='gray')   #顯示灰度圖像
    plt.axis('off')
    plt.show()

def recoverData(Z,U,K):
    X_rec = np.zeros((Z.shape[0],U.shape[0]))
    U_recude = U[:,0:K]
    X_rec = np.dot(Z,np.transpose(U_recude))  # 還原數據(近似)
    return X_rec

def PCA_Face():
    print("加載圖像數據")
    data = spio.loadmat("data_faces.mat")
    X = data['X']
    m,n = X.shape
    #print(pd.DataFrame(X))
    display_imageData(X[0:100,:])
    
    X_norm,mu,sigma = featureNormalize(X)
    Sigma = np.dot(np.transpose(X_norm),X_norm)/m  # 求Sigma
    U,S,V = np.linalg.svd(Sigma)                   # 奇異值分解
    display_imageData(np.transpose(U[:,0:100]))     # 顯示U的數據
    
    K = 100    # 降維100維(原先是32*32=1024維的)
    Z = projectData(X_norm, U, K)
    print (u'投影之后Z向量的大小:%d %d' %Z.shape)
    
    print (u'顯示降維之后的數據......')
    X_rec = recoverData(Z, U, K) # 恢復數據
    display_imageData(X_rec[0:100,:])

#數據降維
def PCA_2D():
    data = spio.loadmat("pcadata.mat")
    X = data['X']
    #print(X)
    m,n = X.shape
    #畫圖
    plt = plot_data_2d(X,'o')
    plt.show()
    
    X_copy = X.copy()
    X_norm,mu,sigma = featureNormalize(X_copy)
    #print(X_norm,mu,sigma)
    #plot_data_2d(X_norm,'o') #畫歸一化數據
    #plt.show()
    
    Sigma = np.dot(np.transpose(X_norm),X_norm) / m
    U,S,V = np.linalg.svd(Sigma)
    #print(U,S,V)
    
    plt = plot_data_2d(X,'o')
    drawline(plt, mu, mu+S[0]*(U[:,0]), 'r--')
    plt.show()
    
    K = 1 #設置降維維數, 這里是降了 1 維
    Z = projectData(X_norm,U,K) #投影
    #print(Z)   #能觀察到數據以及是1維的了, 說明成功將數據降到了一維
    
    # 恢復數據
    X_rec = recoverData(Z,U,K)    # 恢復   此處的 U 等於 V的轉置
    plt = plot_data_2d(X_norm,'bo')
    plot_data_2d(X_rec,'ro')
    for i in range(X_norm.shape[0]):
        drawline(plt, X_norm[i,:], X_rec[i,:], '--k')
    plt.axis('square')
    plt.show()  
    
    
    
if __name__ == "__main__":
    PCA_2D()
    PCA_Face()

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM