PCA的主要方法是:奇異值分解。
具體內容見如下鏈接:
https://www.cnblogs.com/leftnoteasy/archive/2011/01/19/svd-and-applications.html
數據集
鏈接:https://pan.baidu.com/s/1ObxOER_eXsfZa4qQNHxUsw
提取碼:4zga
代碼
import numpy as np import pandas as pd import scipy.io as spio import matplotlib.pyplot as plt from scipy import misc # 圖片操作 def featureNormalize(X): n = X.shape[1] mu = np.mean(X,axis=0) sigma = np.std(X,axis=0) for i in range(n): X[:,i] = (X[:,i] - mu[i]) / sigma[i] return X,mu,sigma def plot_data_2d(X,marker): plt.plot(X[:,0],X[:,1],marker) return plt def drawline(plt,p1,p2,line_type): plt.plot(np.array([p1[0],p2[0]]),np.array([p1[1],p2[1]]),line_type) def projectData(X_norm,U,K): U_reduce = U[:,0:K] return np.dot(X_norm,U_reduce) def display_imageData(imgData): sum = 0 ''' 顯示100個數(若是一個一個繪制將會非常慢,可以將要畫的圖片整理好,放到一個矩陣中,顯示這個矩陣即可) - 初始化一個二維數組 - 將每行的數據調整成圖像的矩陣,放進二維數組 - 顯示即可 ''' m,n = imgData.shape width = np.int32(np.round(np.sqrt(n))) height = np.int32(n/width); rows_count = np.int32(np.floor(np.sqrt(m))) cols_count = np.int32(np.ceil(m/rows_count)) pad = 1 display_array = -np.ones((pad+rows_count*(height+pad),pad+cols_count*(width+pad))) for i in range(rows_count): for j in range(cols_count): max_val = np.max(np.abs(imgData[sum,:])) display_array[pad+i*(height+pad):pad+i*(height+pad)+height,pad+j*(width+pad):pad+j*(width+pad)+width] = imgData[sum,:].reshape(height,width,order="F")/max_val # order=F指定以列優先,在matlab中是這樣的,python中需要指定,默認以行 sum += 1 plt.figure(figsize=(10,8)) plt.imshow(display_array,cmap='gray') #顯示灰度圖像 plt.axis('off') plt.show() def recoverData(Z,U,K): X_rec = np.zeros((Z.shape[0],U.shape[0])) U_recude = U[:,0:K] X_rec = np.dot(Z,np.transpose(U_recude)) # 還原數據(近似) return X_rec def PCA_Face(): print("加載圖像數據") data = spio.loadmat("data_faces.mat") X = data['X'] m,n = X.shape #print(pd.DataFrame(X)) display_imageData(X[0:100,:]) X_norm,mu,sigma = featureNormalize(X) Sigma = np.dot(np.transpose(X_norm),X_norm)/m # 求Sigma U,S,V = np.linalg.svd(Sigma) # 奇異值分解 display_imageData(np.transpose(U[:,0:100])) # 顯示U的數據 K = 100 # 降維100維(原先是32*32=1024維的) Z = projectData(X_norm, U, K) print (u'投影之后Z向量的大小:%d %d' %Z.shape) print (u'顯示降維之后的數據......') X_rec = recoverData(Z, U, K) # 恢復數據 display_imageData(X_rec[0:100,:]) #數據降維 def PCA_2D(): data = spio.loadmat("pcadata.mat") X = data['X'] #print(X) m,n = X.shape #畫圖 plt = plot_data_2d(X,'o') plt.show() X_copy = X.copy() X_norm,mu,sigma = featureNormalize(X_copy) #print(X_norm,mu,sigma) #plot_data_2d(X_norm,'o') #畫歸一化數據 #plt.show() Sigma = np.dot(np.transpose(X_norm),X_norm) / m U,S,V = np.linalg.svd(Sigma) #print(U,S,V) plt = plot_data_2d(X,'o') drawline(plt, mu, mu+S[0]*(U[:,0]), 'r--') plt.show() K = 1 #設置降維維數, 這里是降了 1 維 Z = projectData(X_norm,U,K) #投影 #print(Z) #能觀察到數據以及是1維的了, 說明成功將數據降到了一維 # 恢復數據 X_rec = recoverData(Z,U,K) # 恢復 此處的 U 等於 V的轉置 plt = plot_data_2d(X_norm,'bo') plot_data_2d(X_rec,'ro') for i in range(X_norm.shape[0]): drawline(plt, X_norm[i,:], X_rec[i,:], '--k') plt.axis('square') plt.show() if __name__ == "__main__": PCA_2D() PCA_Face()