package com.excellence.splitsentence; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import org.jblas.ComplexDoubleMatrix; import org.jblas.ComplexFloatMatrix; import org.jblas.DoubleMatrix; import org.jblas.Eigen; import org.jblas.FloatMatrix; import com.mongodb.BasicDBList; import com.mongodb.DB; import com.mongodb.DBCollection; import com.mongodb.DBCursor; import com.mongodb.MongoClient; import com.mongodb.MongoCredential; import com.mongodb.ServerAddress; public class PCA { /** * Reduce matrix dimension 減少矩陣維度 * @param source 源矩陣 * @param dimension 目標維度 * @return Target matrix 返回目標矩陣 */ public static FloatMatrix dimensionReduction(FloatMatrix source, int dimension) { //C=X*X^t/m 矩陣*矩陣^異或/列數 FloatMatrix covMatrix = source.mmul(source.transpose()).div(source.columns); ComplexFloatMatrix eigVal = Eigen.eigenvalues(covMatrix); ComplexFloatMatrix[] eigVectorsVal = Eigen.eigenvectors(covMatrix); ComplexFloatMatrix eigVectors = eigVectorsVal[0]; //通過特征值將符號向量從大到小排序 List<PCABean> beans = new ArrayList<PCA.PCABean>(); for (int i = 0; i < eigVectors.columns; i++) { beans.add(new PCABean(eigVal.get(i).real(), eigVectors.getColumn(i))); } Collections.sort(beans); FloatMatrix newVec = new FloatMatrix(dimension, beans.get(0).vector.rows); for (int i = 0; i < dimension; i++) { ComplexFloatMatrix dm = beans.get(i).vector; FloatMatrix real = dm.getReal(); newVec.putRow(i, real); } return newVec.mmul(source); } static class PCABean implements Comparable<PCABean> { float eigenValue; ComplexFloatMatrix vector; public PCABean(Float eigenValue, ComplexFloatMatrix vector) { super(); this.eigenValue = eigenValue; this.vector = vector; } @Override public int compareTo(PCABean o) { return Float.compare(o.eigenValue, eigenValue); } @Override public String toString() { return "PCABean [eigenValue=" + eigenValue + ", vector=" + vector + "]"; } }
}
如何調用?
float[] vector = docvector.getElementArray();
FloatMatrix d = new FloatMatrix(vector);
FloatMatrix result = PCA.dimensionReduction(d, 10);