https://blog.csdn.net/qq_34739497/article/details/80508262
Yellowbrick 是一套名為「Visualizers」的視覺診斷工具,它擴展了 Scikit-Learn API 以允許我們監督模型的選擇過程。簡而言之,Yellowbrick 將 Scikit-Learn 與 Matplotlib 結合在一起,並以傳統 Scikit-Learn 的方式對模型進行可視化。
-
可視化器
可視化器(Visualizers)是一種從數據中學習的估計器,其主要目標是創建可理解模型選擇過程的可視化。在 Scikit-Learn 的術語中,它們類似於轉換器(transformer),其在可視化數據空間或包裝模型估計器上類似「ModelCV」(例如 RidgeCV 和 LassoCV)方法的過程。Yellowbrick 的主要目標是創建一個類似於 Scikit-Learn 的 API,其中一些流行的可視化器包括:特征可視化
- Rank Features:單個或成對特征排序以檢測關系
- Radial Visualization:圍繞圓形圖分離實例
- PCA Projection:基於主成分分析映射實例
- Manifold Visualization:通過流形學習實現高維可視化
- Feature Importances:基於模型性能對特征進行排序
- Recursive Feature Elimination:按重要性搜索最佳特征子集
- Scatter and Joint Plots:通過特征選擇直接進行數據可視化
分類可視化
- Class Balance:了解類別分布如何影響模型
- Class Prediction Error:展示分類的誤差與主要來源
- Classification Report:可視化精度、召回率和 F1 分數的表征
- ROC/AUC Curves:受試者工作曲線和曲線下面積
- Confusion Matrices:類別決策制定的視覺描述
- Discrimination Threshold:搜索最佳分離二元類別的閾值
回歸可視化
- Prediction Error Plots:沿着目標域尋找模型崩潰的原因
- Residuals Plot:以殘差的方式展示訓練和測試數據中的差異
- Alpha Selection:展示 alpha 的選擇如何影響正則化
聚類可視化
- K-Elbow Plot:使用肘法(elbow method)和多個指標來選擇 k
- Silhouette Plot:通過可視化輪廓系數值來選擇 k
模型選擇可視化
- Validation Curve:對模型的單個超參數進行調整
- Learning Curve:展示模型是否能從更多的數據或更低的復雜性中受益
文本可視化
- Term Frequency:可視化語料庫中詞項的頻率分布
- t-SNE Corpus Visualization:使用隨機近鄰嵌入來投影文檔
實例
#特征之間協方差可視化 from yellowbrick.features import Rank2D from sklearn.datasets import load_iris data=load_iris() visualizer = Rank2D(features=data['feature_names'], algorithm='covariance') visualizer.fit(data['data'], data['target']) # Fit the data to the visualizer visualizer.transform(data['data']) # Transform the data visualizer.poof() # Draw/show/poof the data
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8

#梯度提升樹中特征重要性可視化 import matplotlib.pyplot as plt from sklearn.ensemble import GradientBoostingClassifier from yellowbrick.features import FeatureImportances from sklearn.datasets import load_iris data=load_iris() fig = plt.figure() ax = fig.add_subplot() viz = FeatureImportances(GradientBoostingClassifier(), relative=False) viz.fit(data['data'],data['target']) # Fit the data to the visualizer viz.poof() # Draw/show/poof the data
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11

#線性支持向量機ROC曲線可視化 from sklearn.svm import LinearSVC from yellowbrick.classifier import ROCAUC model = LinearSVC() model.fit(data['data'],data['target']) visualizer = ROCAUC(model) visualizer.score(data['data'],data['target']) visualizer.poof()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9

#主成分分析二維降維可視化 from yellowbrick.features.pca import PCADecomposition visualizer = PCADecomposition(scale=True, center=False, color="g", proj_dim=2) visualizer.fit_transform(data['data'],data['target']) visualizer.poof()
- 1
- 2
- 3
- 4
- 5
- 6

#線性支持向量機准確率、召回率、f1-score可視化 from sklearn.svm import LinearSVC from yellowbrick.classifier import ClassificationReport from sklearn.model_selection import train_test_split model = LinearSVC() X_train, X_test, y_train, y_test = train_test_split(data['data'],data['target'], test_size=0.2) visualizer = ClassificationReport(model, classes=data['target_names']) visualizer.fit(X_train, y_train) # Fit the visualizer and the model visualizer.score(X_test, y_test) # Evaluate the model on the test data g = visualizer.poof() # Draw/show/poof the data
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10

#alpha 的選擇如何影響正則化可視化 import numpy as np from sklearn.linear_model import LassoCV from yellowbrick.regressor import AlphaSelection # Create a list of alphas to cross-validate against alphas = np.logspace(-10, 1, 400)#以10為底對數,-10到1分成400份 # Instantiate the linear model and visualizer model = LassoCV(alphas=alphas) visualizer = AlphaSelection(model) visualizer.fit(data['data'],data['target']) g = visualizer.poof()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15

#肘部法則選擇最佳聚類的k from sklearn.cluster import MiniBatchKMeans from yellowbrick.cluster import KElbowVisualizer # Instantiate the clustering model and visualizer visualizer = KElbowVisualizer(MiniBatchKMeans(), k=(4,12)) visualizer.fit(data["data"]) # Fit the training data to the visualizer visualizer.poof() # Draw/show/poof the data
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10

#訓練集數量對模型表現可視化 import numpy as np from sklearn.naive_bayes import MultinomialNB from sklearn.model_selection import StratifiedKFold from yellowbrick.model_selection import LearningCurve # Create the learning curve visualizer cv = StratifiedKFold(12)#k折交叉切分 sizes = np.linspace(0.3, 1.0, 10) viz = LearningCurve( MultinomialNB(), cv=cv, train_sizes=sizes, scoring='f1_weighted', n_jobs=4 ) # Fit and poof the visualizer viz.fit(data['data'],data['target']) viz.poof()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19

