Python實現天氣決策樹模型


本次作業為實現天氣預測的樹模型,圖部分沒有實現,但是,框架部分實現了。

操作系統:win 10

編輯環境:anaconda

Python版本:3.6

先給出代碼:

from math import log
import operator

def calcShannonEnt(dataSet): #計算數據的熵
    numEntries=len(dataSet) #數據條數
    labelCounts={}
    for featVec in dataSet:
        currentLabel=featVec[-1]#每一行最后一個字(類別)
        if currentLabel not in labelCounts.keys():
            labelCounts[currentLabel]=0
        labelCounts[currentLabel]+=1 #統計有多少個類似以及每個類的數量
    shannonEnt=0
    for key in labelCounts:
        prob=float(labelCounts[key])/numEntries #計算單個類的熵值
        shannonEnt-=prob*log(prob,2)#累加每個類的熵值
        return shannonEnt
    
def createDataSet1(): #創建示例數據
    dataSet = [['晴天','高溫','中濕','無風','不宜'],
               ['晴天','高溫','中濕','有風','不宜'],
               ['多雲','高溫','低濕','無風','適宜'],
               ['雨天','低溫','高濕','無風','適宜'],
               ['雨天','低溫','低濕','無風','適宜'],
               ['雨天','低溫','低濕','有風','不宜'],
               ['多雲','低溫','低濕','有風','適宜'],
               ['晴天','中溫','高濕','無風','不宜'],
               ['晴天','低溫','低濕','無風','適宜'],
               ['雨天','中溫','低濕','無風','適宜'],
               ['晴天','中溫','低濕','有風','適宜'],
               ['多雲','中溫','中濕','有風','適宜'],
               ['多雲','高溫','低濕','無風','適宜'],
               ['雨天','中溫','低濕','有風','不宜']]
    labels = ['天氣','溫度','濕度','風況']#四個特征
    return dataSet,labels

def splitDataSet(dataSet,axis,value):#按某個特征分類后的數據
    retDataSet=[]
    for featVec in dataSet:
        if featVec[axis]==value:
            reducedFeatVec = featVec[:axis]
            reducedFeatVec.extend(featVec[axis+1:])
            retDataSet.append(reducedFeatVec)
    return retDataSet

def chooseBestFeatureToSplit(dataSet): #選擇最優的分類特征
    numFeatures = len(dataSet[0])-1
    baseEntropy = calcShannonEnt(dataSet)#原始的熵
    bestInfoGain = 0
    bestFeature = -1
    for i in range(numFeatures):
        featList = [example[i] for example in dataSet]
        uniqueVals = set(featList)
        newEntropy = 0
        for value in uniqueVals:
            subDataSet = splitDataSet(dataSet,i,value)
            prob = len(subDataSet)/float(len(dataSet))
            newEntropy +=prob*calcShannonEnt(subDataSet)#按特征分類后的熵
        infoGain = baseEntropy - newEntropy #原始熵與按特征分類后的熵的差值
        if (infoGain>bestInfoGain):#若按某特征划分后,熵值減少的最大,則次特征為最優分類特征
           bestInfoGain=infoGain
           bestFeature = i
    return bestFeature
 
def majorityCnt(classList):#按分類后類別數量排序:
    classCount={}
    for vote in classList:
        if vote not in classCount.keys():
            classCount[vote]=0
        classCount[vote]+=1
    sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
    return sortedClassCount[0][0]

def createTree(dataSet,labels):
    classList=[example[-1] for example in dataSet] #類別
    if classList.count(classList[0])==len(classList):
       return classList[0]
    if len(dataSet[0])==1:
        return majorityCnt(classList)
    bestFeat=chooseBestFeatureToSplit(dataSet)#選擇最優特征
    bestFeatLabel=labels[bestFeat]
    myTree={bestFeatLabel:{}}#分類結果以字典的形式保存
    del(labels[bestFeat])
    featValues=[example[bestFeat] for example in dataSet]
    uniqueVals=set(featValues)
    for value in uniqueVals:
        subLabels=labels[:]
        myTree[bestFeatLabel][value]=createTree(splitDataSet\
                                               (dataSet,bestFeat,value),subLabels)
    return myTree

if __name__=='__main__':
    dataSet, labels=createDataSet1()#創造示例數據
    print(createTree(dataSet,labels))#輸出決策樹模型

其實現結果為:

手動畫出模型為:

另外,看看到一個利用自帶函數的一個寫法,筆者還沒有實現,希望大家集思廣益:

https://zhuanlan.zhihu.com/p/25428390

本文參考鏈接:

http://blog.csdn.net/csqazwsxedc/article/details/65697652

http://blog.csdn.net/liz_zhong/article/details/51448218

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM