20171029機器學習之特征值選擇


在我們對於有很多特征值數據處理時,往往需要找到特征值對於結果Y權重最大的幾個,便於做降維。

於是我們可以用以下這段代碼:

GitHub:https://github.com/chenjunhaolefa/AI/blob/master/MachineLearning/FeatureSelection.py

# coding=utf-8
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.ensemble import ExtraTreesClassifier

#解決畫圖產生的中文亂碼問題
mpl.rcParams['font.sans-serif']=[u'simHei']
mpl.rcParams['axes.unicode_minus']=False

#測試一下創建了X和Y的矩陣
X = np.random.random((3,3))
Y = np.arange(1,4)
print X,Y

def FeatureSelection(X,Y):
    # Build a classification task using 3 informative features
    '''
    X, Y = make_classification(n_samples=10,  #該函數負責創建一個自定義的矩陣(X->Y)的關系
                               n_features=10,
                               n_informative=3,
                               n_redundant=0,
                               n_repeated=0,
                               n_classes=2,
                               random_state=0,
                               shuffle=False)'''
    # Build a forest and compute the feature importances
    forest = ExtraTreesClassifier(n_estimators=10,random_state=0) #創建一個額外樹
    #forest = RandomForestClassifier (n_estimators = 10)  #創建一個隨機樹

    #計算X特征值對於Y的影響,並排序出來
    forest.fit(X, Y)
    importances = forest.feature_importances_
    std = np.std([tree.feature_importances_ for tree in forest.estimators_],
                 axis=0)
    indices = np.argsort(importances)[::-1]
    print indices
    # Print the feature ranking
    print(u"特征排名 :")

    for f in range(X.shape[1]):
        print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))

    # Plot the feature importances of the forest
    plt.figure()
    plt.title(u"特征選擇")
    plt.bar(range(X.shape[1]), importances[indices],
           color="r", yerr=std[indices], align="center")
    plt.xticks(range(X.shape[1]), indices)
    plt.xlim([-1, X.shape[1]])
    plt.show()

FeatureSelection(X,Y)

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM