Python的贝叶斯网络学习库pgmpy介绍和使用
pgmpy
Parameter learning: Given a set of data samples and a DAG that captures the dependencies between the variables, estimate the (conditional) probability distributions of the individual variables.
Structure learning: Given a set of data samples, estimate a DAG that captures the dependencies between the variables.
pgmpy.org
github.com/pgmpy/pgmpy_notebook/blob/master/blob/master/notebooks
代码记录
包
""" 学习链接 : http://pgmpy.org/ https://github.com/pgmpy/pgmpy_notebook/blob/master/notebooks/9.%20Learning%20Bayesian%20Networks%20from%20Data.ipynb """ # ====================BN模型========================= # 贝叶斯模型 from pgmpy.models import BayesianModel # ====================参数学习========================= # 参数估计 from pgmpy.estimators import ParameterEstimator # MLE参数估计 from pgmpy.estimators import MaximumLikelihoodEstimator # Bayesian参数估计 from pgmpy.estimators import BayesianEstimator # ====================结构学习========================= # ========评分搜索========================= # 评分 from pgmpy.estimators import BdeuScore, K2Score, BicScore # 穷举搜索 from pgmpy.estimators import ExhaustiveSearch # 爬山搜索 from pgmpy.estimators import HillClimbSearch # ======== 约束 ========================= from pgmpy.estimators import ConstraintBasedEstimator # 独立性 from pgmpy.independencies import Independencies # ======== 混合 ========================= from pgmpy.estimators import MmhcEstimator # ==================== 通用库 ========================= import pandas as pd import numpy as np
parameter Learning
def parameterLearning(): data = pd.DataFrame(data={'fruit': ["banana", "apple", "banana", "apple", "banana","apple", "banana", "apple", "apple", "apple", "banana", "banana", "apple", "banana",], 'tasty': ["yes", "no", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "no", "no", "no"], 'size': ["large", "large", "large", "small", "large", "large", "large", "small", "large", "large", "large", "large", "small", "small"]}) model = BayesianModel([('fruit', 'tasty'), ('size', 'tasty')]) # fruit -> tasty <- size print("========================================================") pe = ParameterEstimator(model, data) print("\n", pe.state_counts('fruit')) # unconditional print("\n", pe.state_counts('size')) # unconditional print("\n", pe.state_counts('tasty')) # conditional on fruit and size print("========================================================") mle = MaximumLikelihoodEstimator(model, data) print(mle.estimate_cpd('fruit')) # unconditional print(mle.estimate_cpd('tasty')) # conditional print("========================================================") est = BayesianEstimator(model, data) print(est.estimate_cpd('tasty', prior_type='BDeu', equivalent_sample_size=10)) # Setting equivalent_sample_size to 10 means # that for each parent configuration, we add the equivalent of 10 uniform samples # (here: +5 small bananas that are tasty and +5 that aren't). print("========================================================") # Calibrate all CPDs of `model` using MLE: model.fit(data, estimator=MaximumLikelihoodEstimator) print("========================================================") # generate data data = pd.DataFrame(np.random.randint(low=0, high=2, size=(5000, 4))