Adaboost算法實現


July已經把Adaboost的實現步驟講述得非常清楚,我就不再贅述,這里用Python實現了一下。

# coding=utf-8
__author__ = "orisun"

import numpy as np
import sys


class OneDimClassifier(object):

    def __init__(self, x, y):
        self.x = x
        self.y = y
        self.split = 0
        # direct = True時:小於分割線的為正樣本,大於分割線的為負樣本
        self.direct = True

    def train(self, w):
        '''根據各樣本的分類及權重,選擇使總誤差最小的分割線
        '''
        # 將x,y和w進行綁定
        sx = np.concatenate((self.x, self.y.reshape(
            (self.y.shape[0], 1)), w.reshape((w.shape[0], 1))), axis=1)
        # 樣本排序
        sx = [sx[i] for i in np.argsort(sx, axis=0)[:, 0]]

        prevY = sx[0][1]
        minErr = sys.float_info.max
        for i in xrange(1, len(sx)):
            # 嘗試在每一個y值發生變化的點上進行分割
            if sx[i][1] != prevY:
                err = 0.0
                d = True
                for j in xrange(i):
                    if sx[j][1] != 1:
                        err += sx[j][2]
                for j in xrange(i, len(sx)):
                    if sx[j][1] != -1:
                        err += sx[j][2]
                if err > 0.5:
                    err = 1.0 - err
                    d = False
                if err < minErr:
                    self.split = (sx[i][0] + sx[i - 1][0]) / 2
                    minErr = err
                    self.direct = d
            prevY = sx[i][1]

        print 'split=', self.split
        return minErr

    def predict(self, x):
        pre_y = np.zeros(x.shape[0])
        factor = 1 if self.direct else -1
        for i in xrange(x.shape[0]):
            if x[i][0] <= self.split:
                pre_y[i] = 1 * factor
            else:
                pre_y[i] = -1 * factor
        return pre_y


class Adboost(object):

    def __init__(self, x, y, WeakClassifier, M):
        # 輸入樣本的特征向量
        self.x = np.array(x)
        # 輸入樣本的分類標識,用1或-1表示
        self.y = np.array(y)
        # 初始化每個樣本的權重
        self.w = np.array([1.0 / self.x.shape[0]
                           for i in xrange(self.x.shape[0])])
        # 弱分類器的構建器
        self.WeakClassifier = WeakClassifier
        # 弱分類器的數目上限
        self.M = M
        # 實際使用的弱分類器的數目
        self.Q = 0
        # 弱分類器集合
        self.G = []
        # 各個弱分類器的權重
        self.alpha = []

    def predict(self, x):
        '''預測分類
        '''
        if self.Q <= 0:
            raise Exception("have not train before predict")
        pre_y = np.zeros(x.shape[0])
        for i in xrange(self.Q):
            pre_y += self.G[i].predict(x) * self.alpha[i]
        return np.sign(pre_y)

    def train(self):
        '''訓練各個弱分類器及其權重
        '''
        for i in xrange(self.M):
            # 用WeakClassifier初始化第一個弱分類器
            self.G.append(self.WeakClassifier(self.x, self.y))
            # 用當前各個樣本的權重訓練當前的弱分類器,並返回錯誤率
            e = self.G[i].train(self.w)
            # e不能等於0.5
            while e == 0.5:
                e += np.random.uniform(-0.1, 0.1)
            # 計算當前分類器的權重
            a = 1.0 / 2.0 * np.log((1 - e) / e)
            self.alpha.append(a)
            # 用當前的分類器預測每一個樣本的分類
            pre_y = self.G[i].predict(self.x)
            # 計算下一輪中各個樣本的權重
            self.w *= np.exp(-a * self.y * pre_y)
            # 對權重進行歸一化,使其是一個概率分布
            self.w /= self.w.sum()
            self.Q = i + 1
            errnum = (self.y != self.predict(self.x)).sum()
            if errnum == 0:
                print self.Q, "week classifiers is enough to make the error of train set to zero"
                break
        # 返回在訓練集上的錯誤率
        return 1.0 * (self.y != self.predict(self.x)).sum() / self.x.shape[0]

if __name__ == '__main__':
    x = [[0], [2], [4], [6], [8], [1],  [3],  [5],  [7],  [9]]
    y = [1,    1,   -1,  1,   1,   1,    -1,   -1,   1,    -1]
    boost = Adboost(x, y, OneDimClassifier, 5)
    errratio = boost.train()
    print 'week classifier weight:', boost.alpha
    print 'error ratio:', errratio
    print 'tag of 4.3 is', boost.predict(np.array([[4.3]]))

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM