1 # 創建數據集,5*7的矩陣 2 def loadExData(): 3 return [[1,1,1,0,0], 4 [2,2,2,0,0], 5 [1,1,1,0,0], 6 [5,5,5,0,0], 7 [1,1,0,2,2], 8 [0,0,0,3,3], 9 [0,0,0,1,1]]; 10 # 對矩陣進行SVD分解 11 from numpy import *; 12 def svd_(): 13 Data=loadExData(); 14 U,Sigma,VT=linalg.svd(Data); 15 print('SVD分解Sigma的結果為:',Sigma); 16 return U,Sigma,VT; 17 """ 18 array([ 9.72140007e+00, 5.29397912e+00, 6.84226362e-01, 19 1.52344501e-15, 2.17780259e-16]) 20 可以看到最后兩個值很小,於是就可以將最后兩個值去掉了 21 """ 22 # 近似重構原始矩陣 23 def reconstructMat(): 24 U,Sigma,VT=svd_(); 25 Sig3=mat([[Sigma[0],0,0],[0,Sigma[1],0],[0,0,Sigma[2]]]); 26 reconMat=U[:,:3]*Sig3*VT[:3,:]; 27 return reconMat;
重構的矩陣結果如下:
1 >>> reconMat 2 matrix([[ 1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 3 -1.51788304e-17, -1.02999206e-17], 4 [ 2.00000000e+00, 2.00000000e+00, 2.00000000e+00, 5 1.73472348e-18, 1.12757026e-17], 6 [ 1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 7 7.61977287e-16, 7.66747776e-16], 8 [ 5.00000000e+00, 5.00000000e+00, 5.00000000e+00, 9 6.59194921e-17, 9.02056208e-17], 10 [ 1.00000000e+00, 1.00000000e+00, -7.21644966e-16, 11 2.00000000e+00, 2.00000000e+00], 12 [ 1.66533454e-16, 1.30451205e-15, -8.88178420e-16, 13 3.00000000e+00, 3.00000000e+00], 14 [ 6.24500451e-17, 4.57966998e-16, -3.33066907e-16, 15 1.00000000e+00, 1.00000000e+00]])
原始矩陣為:
1 >>> loadExData() 2 [[1, 1, 1, 0, 0],
[2, 2, 2, 0, 0],
[1, 1, 1, 0, 0],
[5, 5, 5, 0, 0],
[1, 1, 0, 2, 2],
[0, 0, 0, 3, 3],
[0, 0, 0, 1, 1]]
主要問題:怎樣尋找奇異值的個數??
有很多啟發式策略,兩種典型的方法是:(1)保留矩陣中90%的能量信息,奇異值的平方之和為總能量;(2)保留矩陣中前2000或3000個奇異值,當有成千上萬個奇異值時;