使用 scikit-learn 計算過程
In [64]: data Out[64]: matrix([[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2. , 1. , 1.5, 1.1], [2.4, 0.7, 2.9, 2.2, 3. , 2.7, 1.6, 1.1, 1.6, 0.9]]) In [65]: from sklearn.decomposition import PCA In [66]: pca=PCA(n_components=1) In [69]: newData = pca.fit_transform(data.T) In [70]: newData Out[70]: array([[-0.82797019], [ 1.77758033], [-0.99219749], [-0.27421042], [-1.67580142], [-0.9129491 ], [ 0.09910944], [ 1.14457216], [ 0.43804614], [ 1.22382056]])
使用 numpy 計算的過程
In [11]: data Out[11]: array([[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2. , 1. , 1.5, 1.1], [2.4, 0.7, 2.9, 2.2, 3. , 2.7, 1.6, 1.1, 1.6, 0.9]]) In [12]: data = np.mat(data) In [13]: data Out[13]: matrix([[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2. , 1. , 1.5, 1.1], [2.4, 0.7, 2.9, 2.2, 3. , 2.7, 1.6, 1.1, 1.6, 0.9]]) In [16]: data_mean = data - data.mean(axis=1) In [17]: data_mean Out[17]: matrix([[ 0.69, -1.31, 0.39, 0.09, 1.29, 0.49, 0.19, -0.81, -0.31, -0.71], [ 0.49, -1.21, 0.99, 0.29, 1.09, 0.79, -0.31, -0.81, -0.31, -1.01]]) #求協方差矩陣 In [31]: data_cov = np.cov(data_mean) In [32]: data_cov Out[32]: array([[0.61655556, 0.61544444], [0.61544444, 0.71655556]]) #求特征向量和特征值 In [34]: tzz,tzxl = np.linalg.eig(data_cov) Out[33]: (array([0.0490834 , 1.28402771]), array([[-0.73517866, -0.6778734 ], [ 0.6778734 , -0.73517866]])) In [35]: tzz Out[35]: array([0.0490834 , 1.28402771]) #選取特征值最大的特征向量,(注意特征向量是 列,不是行) In [54]: xl = tzxl.T[1] In [55]: xl Out[55]: array([-0.6778734 , -0.73517866]) In [63]: data_mean.T.__mul__(np.mat(xl).T) Out[63]: matrix([[-0.82797019], [ 1.77758033], [-0.99219749], [-0.27421042], [-1.67580142], [-0.9129491 ], [ 0.09910944], [ 1.14457216], [ 0.43804614], [ 1.22382056]])