##導入包
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_wine
##導入數據集
wine=load_wine()
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
Xtrain,Xtest,Ytrain,Ytest=train_test_split(wine.data,wine.target,test_size=0.3)
#決策樹模型
clf=DecisionTreeClassifier(random_state=0)
#隨機森林模型
rfc=RandomForestClassifier(random_state=0)
##訓練集訓練
clf=clf.fit(Xtrain,Ytrain)
rfc=rfc.fit(Xtrain,Ytrain)
##測試集測試效果得分
score_c=clf.score(Xtest,Ytest)
score_r=rfc.score(Xtest,Ytest)
print("Single Tree:{}".format(score_c)
,"Random forest:{}".format(score_r))
##交叉驗證,
##將數據集划分為n份,依次取每一份做測試集,n-1份做訓練集,多次訓練模型以觀測模型的穩定性
rfc_1=[]
clf_1=[]
for i in range(10):
rfc=RandomForestClassifier(n_estimators=25)
rfc_s=cross_val_score(rfc,wine.data,wine.target,cv=10).mean() ##cv=10,將數據集分為10份進行測試,數據集傳的是完整數據集,不需要分割成測試和訓練
rfc_1.append(rfc_s)
clf=DecisionTreeClassifier()
clf_s=cross_val_score(clf,wine.data,wine.target,cv=10).mean()
clf_1.append(clf_s)
plt.plot(range(1,11),rfc_1,label="Random Forest")
plt.plot(range(1,11),clf_1,label="Decision Tree")
plt.legend()
plt.show()