#決策樹算法的原理是一系列if_else的邏輯迭代。適用於對數據進行分類和回歸,優點是對於數據的本身要求不高,直觀容易理解,缺點是容易過擬合和泛化能力不強。對於回歸而言,不能外推。
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
cancer=load_breast_cancer()
x_train,x_test,y_train,y_test=train_test_split(cancer.data,cancer.target,stratify=cancer,random_state=42)
tree=DecisionTreeClassifier()
tree.fit(x_train,y_train)
tree.score(x_train,y_train)
tree.score(x_test,y_test)
#結果顯示過擬合,預剪枝max_depth
tree04=DecisionTreeClassifier(max_depth=4,random_state=0)
tree04.fit(x_train,y_train)
tree04.score(x_train,y_train)
tree04.score(x_test,y_test)
#針對決策樹缺點的集成