本文主要介紹lightGBM中的幾個常見的畫圖函數:
- plot_metric()函數(可以輔助我們判斷是否過擬合)
- plot_importance()函數(可以輔助我們進行特征選擇)
- plot_tree()函數(可選)
- create_tree_digraph()函數(可選)
# -*- coding: utf-8 -*- import lightgbm as lgb import numpy as np import matplotlib.pyplot as plt print('制造數據...') x_train = np.random.random((1000, 10)) y_train = np.random.rand(1000) > 0.5 x_test = np.random.random((100, 10)) y_test = np.random.randn(100) > 0.5 # 導入到lightgbm矩陣 lgb_train = lgb.Dataset(x_train, y_train) lgb_test = lgb.Dataset(x_test, y_test, reference=lgb_train) # 設置參數 params = { 'num_leaves': 5, 'metric': ('auc', 'logloss'), # 可以設置多個評價指標 'verbose': 0 } # if (evals_result and gbm) not in locbals(): # global evals_result,gbm # 如果是局部變量的話,推薦把他們變成全局變量,這樣plot的代碼位置不受限制 evals_result = {} # 記錄訓練結果所用 print('開始訓練...') # train gbm = lgb.train(params, lgb_train, num_boost_round=100, valid_sets=[lgb_train, lgb_test], evals_result=evals_result, # 非常重要的參數,一定要明確設置,輸出的結果是上面一個參數valid_sets配置的值 verbose_eval=10) print(evals_result) print('畫出訓練結果...') ax = lgb.plot_metric(evals_result, metric='auc') # metric的值與之前的params里面的值對應 plt.show() print('畫特征重要性排序...') lgb.plot_importance(gbm, max_num_features=10) # max_features表示最多展示出前10個重要性特征,可以自行設置 plt.show() print('Plot 3th tree...') # 畫出決策樹,其中的第三顆 lgb.plot_tree(gbm, tree_index=3, figsize=(20, 8), show_info=['split_gain']) plt.show() print('導出決策樹的pdf圖像到本地') # 這里需要安裝graphviz應用程序和python安裝包 graph = lgb.create_tree_digraph(gbm, tree_index=3, name='Tree3') graph.render(view=True)