import pandas as pd import xgboost as xgb import operator from matplotlib import pylab as plt def ceate_feature_map(features): outfile = open('xgb.fmap', 'w') i = 0 for feat in features: outfile.write('{0}\t{1}\tq\n'.format(i, feat)) i = i + 1 outfile.close() def get_data(): train = pd.read_csv("../input/train.csv") features = list(train.columns[2:]) y_train = train.Hazard for feat in train.select_dtypes(include=['object']).columns: m = train.groupby([feat])['Hazard'].mean() train[feat].replace(m,inplace=True) x_train = train[features] return features, x_train, y_train def get_data2(): from sklearn.datasets import load_iris #獲取數據 iris = load_iris() x_train=pd.DataFrame(iris.data) features=["sepal_length","sepal_width","petal_length","petal_width"] x_train.columns=features y_train=pd.DataFrame(iris.target) return features, x_train, y_train #features, x_train, y_train = get_data() features, x_train, y_train = get_data2() ceate_feature_map(features) xgb_params = {"objective": "reg:linear", "eta": 0.01, "max_depth": 8, "seed": 42, "silent": 1} num_rounds = 1000 dtrain = xgb.DMatrix(x_train, label=y_train) gbdt = xgb.train(xgb_params, dtrain, num_rounds) importance = gbdt.get_fscore(fmap='xgb.fmap') importance = sorted(importance.items(), key=operator.itemgetter(1)) df = pd.DataFrame(importance, columns=['feature', 'fscore']) df['fscore'] = df['fscore'] / df['fscore'].sum() plt.figure() df.plot() df.plot(kind='barh', x='feature', y='fscore', legend=False, figsize=(16, 10)) plt.title('XGBoost Feature Importance') plt.xlabel('relative importance') plt.gcf().savefig('feature_importance_xgb.png')
根據結構分數的增益情況計算出來選擇哪個特征的哪個分割點,某個特征的重要性,就是它在所有樹中出現的次數之和。
參考:https://blog.csdn.net/q383700092/article/details/53698760
另外:使用xgboost,遇到一個問題

D:\Program\Python3.5\lib\site-packages\sklearn\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
"This module will be removed in 0.20.", DeprecationWarning)
先卸載原先版本的xgboost, pip uninstall xgboost
然后下載安裝新版本的xgboost,地址:https://www.lfd.uci.edu/~gohlke/pythonlibs/#xgboost
命令:pip install xgboost-0.6-cp35-none-win_amd64.whl