Python的貝葉斯網絡學習庫pgmpy介紹和使用
pgmpy
Parameter learning: Given a set of data samples and a DAG that captures the dependencies between the variables, estimate the (conditional) probability distributions of the individual variables.
Structure learning: Given a set of data samples, estimate a DAG that captures the dependencies between the variables.
pgmpy.org
github.com/pgmpy/pgmpy_notebook/blob/master/blob/master/notebooks
代碼記錄
包
""" 學習鏈接 : http://pgmpy.org/ https://github.com/pgmpy/pgmpy_notebook/blob/master/notebooks/9.%20Learning%20Bayesian%20Networks%20from%20Data.ipynb """ # ====================BN模型========================= # 貝葉斯模型 from pgmpy.models import BayesianModel # ====================參數學習========================= # 參數估計 from pgmpy.estimators import ParameterEstimator # MLE參數估計 from pgmpy.estimators import MaximumLikelihoodEstimator # Bayesian參數估計 from pgmpy.estimators import BayesianEstimator # ====================結構學習========================= # ========評分搜索========================= # 評分 from pgmpy.estimators import BdeuScore, K2Score, BicScore # 窮舉搜索 from pgmpy.estimators import ExhaustiveSearch # 爬山搜索 from pgmpy.estimators import HillClimbSearch # ======== 約束 ========================= from pgmpy.estimators import ConstraintBasedEstimator # 獨立性 from pgmpy.independencies import Independencies # ======== 混合 ========================= from pgmpy.estimators import MmhcEstimator # ==================== 通用庫 ========================= import pandas as pd import numpy as np
parameter Learning
def parameterLearning(): data = pd.DataFrame(data={'fruit': ["banana", "apple", "banana", "apple", "banana","apple", "banana", "apple", "apple", "apple", "banana", "banana", "apple", "banana",], 'tasty': ["yes", "no", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "no", "no", "no"], 'size': ["large", "large", "large", "small", "large", "large", "large", "small", "large", "large", "large", "large", "small", "small"]}) model = BayesianModel([('fruit', 'tasty'), ('size', 'tasty')]) # fruit -> tasty <- size print("========================================================") pe = ParameterEstimator(model, data) print("\n", pe.state_counts('fruit')) # unconditional print("\n", pe.state_counts('size')) # unconditional print("\n", pe.state_counts('tasty')) # conditional on fruit and size print("========================================================") mle = MaximumLikelihoodEstimator(model, data) print(mle.estimate_cpd('fruit')) # unconditional print(mle.estimate_cpd('tasty')) # conditional print("========================================================") est = BayesianEstimator(model, data) print(est.estimate_cpd('tasty', prior_type='BDeu', equivalent_sample_size=10)) # Setting equivalent_sample_size to 10 means # that for each parent configuration, we add the equivalent of 10 uniform samples # (here: +5 small bananas that are tasty and +5 that aren't). print("========================================================") # Calibrate all CPDs of `model` using MLE: model.fit(data, estimator=MaximumLikelihoodEstimator) print("========================================================") # generate data data = pd.DataFrame(np.random.randint(low=0, high=2, size=(5000, 4))