python實現支持向量機之具體實現


代碼來源: https://github.com/eriklindernoren/ML-From-Scratch

支持向量機代碼:

from __future__ import division, print_function
import numpy as np
import cvxopt
from mlfromscratch.utils import train_test_split, normalize, accuracy_score
from mlfromscratch.utils.kernels import *
from mlfromscratch.utils import Plot

# Hide cvxopt output
cvxopt.solvers.options['show_progress'] = False

class SupportVectorMachine(object):
    """The Support Vector Machine classifier.
    Uses cvxopt to solve the quadratic optimization problem.

    Parameters:
    -----------
    C: float
        Penalty term.
    kernel: function
        Kernel function. Can be either polynomial, rbf or linear.
    power: int
        The degree of the polynomial kernel. Will be ignored by the other
        kernel functions.
    gamma: float
        Used in the rbf kernel function.
    coef: float
        Bias term used in the polynomial kernel function.
    """
    def __init__(self, C=1, kernel=rbf_kernel, power=4, gamma=None, coef=4):
        #系數C
        self.C = C
        #核函數
        self.kernel = kernel
        self.power = power
        self.gamma = gamma
        self.coef = coef
        self.lagr_multipliers = None
        self.support_vectors = None
        self.support_vector_labels = None
        self.intercept = None

    def fit(self, X, y):
        #樣本數,特征數
        n_samples, n_features = np.shape(X)
        
        # Set gamma to 1/n_features by default
        if not self.gamma:
            self.gamma = 1 / n_features

        # Initialize kernel method with parameters
        #初始化核函數的一些參數
        self.kernel = self.kernel(
            power=self.power,
            gamma=self.gamma,
            coef=self.coef)

        # Calculate kernel matrix
        #計算核矩陣
        kernel_matrix = np.zeros((n_samples, n_samples))
        for i in range(n_samples):
            for j in range(n_samples):
                kernel_matrix[i, j] = self.kernel(X[i], X[j])

        # Define the quadratic optimization problem
        #cvxopt是凸優化庫
        #cvxopt.matrix(array,dims)用於將array按dims進行重新排列
        #np.outer用於計算兩個向量的外積
        P = cvxopt.matrix(np.outer(y, y) * kernel_matrix, tc='d')
        q = cvxopt.matrix(np.ones(n_samples) * -1)
        A = cvxopt.matrix(y, (1, n_samples), tc='d')
        b = cvxopt.matrix(0, tc='d')

        if not self.C:
            #Numpy.identity()輸入n為行數或列數,返回一個n*n的對角陣,
            #對角線元素為1,其余為0。dtype可選,默認為float格式。
            G = cvxopt.matrix(np.identity(n_samples) * -1)
            h = cvxopt.matrix(np.zeros(n_samples))
        else:
            G_max = np.identity(n_samples) * -1
            G_min = np.identity(n_samples)
            G = cvxopt.matrix(np.vstack((G_max, G_min)))
            h_max = cvxopt.matrix(np.zeros(n_samples))
            h_min = cvxopt.matrix(np.ones(n_samples) * self.C)
            h = cvxopt.matrix(np.vstack((h_max, h_min)))

        # Solve the quadratic optimization problem using cvxopt
        minimization = cvxopt.solvers.qp(P, q, G, h, A, b)

        # Lagrange multipliers
        #np.ravel用於降維
        lagr_mult = np.ravel(minimization['x'])

        # Extract support vectors
        # Get indexes of non-zero lagr. multipiers
        idx = lagr_mult > 1e-7
        # Get the corresponding lagr. multipliers
        self.lagr_multipliers = lagr_mult[idx]
        # Get the samples that will act as support vectors
        self.support_vectors = X[idx]
        # Get the corresponding labels
        self.support_vector_labels = y[idx]

        # Calculate intercept with first support vector
        self.intercept = self.support_vector_labels[0]
        for i in range(len(self.lagr_multipliers)):
            self.intercept -= self.lagr_multipliers[i] * self.support_vector_labels[
                i] * self.kernel(self.support_vectors[i], self.support_vectors[0])

    def predict(self, X):
        y_pred = []
        # Iterate through list of samples and make predictions
        for sample in X:
            prediction = 0
            # Determine the label of the sample by the support vectors
            for i in range(len(self.lagr_multipliers)):
                prediction += self.lagr_multipliers[i] * self.support_vector_labels[
                    i] * self.kernel(self.support_vectors[i], sample)
            prediction += self.intercept
            y_pred.append(np.sign(prediction))
        return np.array(y_pred)

其中使用到的部分函數:

Cvxopt.solvers.qp(P,q,G,h,A,b)

標准形式:

核函數定義:

import numpy as np


def linear_kernel(**kwargs):
    def f(x1, x2):
        return np.inner(x1, x2)
    return f


def polynomial_kernel(power, coef, **kwargs):
    def f(x1, x2):
        return (np.inner(x1, x2) + coef)**power
    return f


def rbf_kernel(gamma, **kwargs):
    def f(x1, x2):
        distance = np.linalg.norm(x1 - x2) ** 2
        return np.exp(-gamma * distance)
    return f

np.inner()用於返回兩個向量的內積。np.linalg.norm()用於求范數,ord參數指定使用的范數,如果沒有指定,則是求整體矩陣元素平方和再開根號。

運行主代碼:

from __future__ import division, print_function
import numpy as np
from sklearn import datasets

# Import helper functions
import sys
sys.path.append("/content/drive/My Drive/learn/ML-From-Scratch/")
from mlfromscratch.utils import train_test_split, normalize, accuracy_score, Plot
from mlfromscratch.utils.kernels import *
from mlfromscratch.supervised_learning import SupportVectorMachine

def main():
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = -1
    y[y == 2] = 1
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Support Vector Machine", accuracy=accuracy)


if __name__ == "__main__":
    main()

使用的數據集是sklearn自帶的鳶尾花數據集,划分后的訓練集大小為:

(67, 4) (67,)

需要注意的是鳶尾花數據集中有三類,分別是0,1,2類,這里只提取了其中的第1,2類,並且對樣本進行了標准化,對標簽重新進行編號,記為+1和-1.

部分數據及對應標簽如下:

[[0.69198788 0.34599394 0.58626751 0.24027357]
 [0.70779525 0.31850786 0.60162596 0.1887454 ]
 [0.73239618 0.38547167 0.53966034 0.15418867]
 [0.69299099 0.34199555 0.60299216 0.19799743]
 [0.72712585 0.26661281 0.60593821 0.18178146]]
[ 1 -1 -1  1  1]

這里使用的核函數是多項式核函數,上面已經給出了其具體代碼,這里設定power=4, coef=1。

以該輸入為例,我們一步步探索整個支持向量機運行的過程:

(1)fit()函數傳入X_train和y_train,n_samples=67,n_features=4

(2)這里的gamma用不上,定義了多項式核函數:(x1⋅x2+1)**4

(3)計算核矩陣,其大小是(67,67)

(4)計算P,q,A,b,大小分別是

P: (67, 67)
q: (67, 1)
A: (1, 67)
b: (1, 1)

(5)計算G,h

G: (134, 67)
h: (134, 1)

(6)計算minimization

{
'x': <67x1 matrix, tc='d'>, 
'y': <1x1 matrix, tc='d'>, 
's': <134x1 matrix, tc='d'>, 
'z': <134x1 matrix, tc='d'>, 
'status': 'optimal', 
'gap': 6.025067170026107e-06, 
'relative gap': 2.658291994898124e-07, 
'primal objective': -22.66518193482733, 
'dual objective': -22.6651879598945, 
'primal infeasibility': 5.244963534670188e-16, 
'dual infeasibility': 1.4242235222101717e-14, 
'primal slack': 1.992950796227594e-08, 
'dual slack': 2.0187091588623967e-08, 
'iterations': 8
}

接着取出鍵為x的那一項<67x1 matrix, tc='d'>,再分別計算以下項:

lagr_mult: [9.99999993e-01 3.11335122e-07 5.76626923e-08 9.99999986e-01
 9.99999960e-01 8.40647046e-09 1.62550393e-08 9.99999990e-01
 8.80788872e-09 7.47600294e-09 9.99999993e-01 1.38140431e-08
 9.99999841e-01 1.84408877e-08 4.45365428e-08 9.99999993e-01
 9.99999994e-01 7.31294539e-09 9.99999981e-01 2.81302345e-01
 4.29885349e-08 9.99999993e-01 6.50674166e-08 1.53345935e-08
 9.99999997e-01 9.99999980e-01 1.60572220e-08 4.62959213e-09
 1.74974625e-08 9.99999994e-01 6.71173485e-09 1.57489685e-08
 9.99999965e-01 9.99999988e-01 1.01875501e-08 3.19854125e-01
 1.93465392e-08 9.99999991e-01 9.99999969e-01 1.58210493e-08
 9.99999992e-01 6.53854743e-08 9.49484746e-09 9.99999991e-01
 9.99999975e-01 1.33182181e-08 2.74386270e-07 9.99999985e-01
 9.58193356e-09 1.42607893e-08 1.44826324e-08 9.99999988e-01
 9.99999991e-01 1.05018808e-08 9.99999983e-01 8.02961322e-09
 6.01155709e-01 3.54164542e-07 9.99999994e-01 9.99999972e-01
 3.49899863e-08 1.56995796e-08 9.99999986e-01 9.99999949e-01
 1.95124926e-08 1.13750234e-07 3.24069998e-09]
idx: [ True  True False  True  True False False  True False False  True False
  True False False  True  True False  True  True False  True False False
  True  True False False False  True False False  True  True False  True
 False  True  True False  True False False  True  True False  True  True
 False False False  True  True False  True False  True  True  True  True
 False False  True  True False  True False]
self.lagr_multipliers: [9.99999993e-01 3.11335122e-07 9.99999986e-01 9.99999960e-01
 9.99999990e-01 9.99999993e-01 9.99999841e-01 9.99999993e-01
 9.99999994e-01 9.99999981e-01 2.81302345e-01 9.99999993e-01
 9.99999997e-01 9.99999980e-01 9.99999994e-01 9.99999965e-01
 9.99999988e-01 3.19854125e-01 9.99999991e-01 9.99999969e-01
 9.99999992e-01 9.99999991e-01 9.99999975e-01 2.74386270e-07
 9.99999985e-01 9.99999988e-01 9.99999991e-01 9.99999983e-01
 6.01155709e-01 3.54164542e-07 9.99999994e-01 9.99999972e-01
 9.99999986e-01 9.99999949e-01 1.13750234e-07]
self.support_vectors: [[0.72785195 0.32870733 0.56349829 0.21131186]
 [0.71491405 0.30207636 0.59408351 0.21145345]
 [0.72460233 0.37623583 0.54345175 0.19508524]
 [0.76467269 0.31486523 0.53976896 0.15743261]
 [0.73122464 0.31338199 0.56873028 0.20892133]
 [0.71562645 0.3523084  0.56149152 0.22019275]
 [0.71366557 0.28351098 0.61590317 0.17597233]
 [0.71066905 0.35533453 0.56853524 0.21320072]
 [0.73089855 0.30454106 0.58877939 0.1624219 ]
 [0.73544284 0.35458851 0.55158213 0.1707278 ]
 [0.71524936 0.40530797 0.53643702 0.19073316]
 [0.71171214 0.35002236 0.57170319 0.21001342]
 [0.70779525 0.31850786 0.60162596 0.1887454 ]
 [0.73659895 0.33811099 0.56754345 0.14490471]
 [0.72366005 0.32162669 0.58582004 0.17230001]
 [0.72634846 0.38046824 0.54187901 0.18446945]
 [0.73081412 0.34743622 0.56308629 0.16772783]
 [0.75519285 0.33928954 0.53629637 0.16417236]
 [0.75384916 0.31524601 0.54825394 0.17818253]
 [0.72155725 0.32308533 0.56001458 0.24769876]
 [0.70631892 0.37838513 0.5675777  0.18919257]
 [0.71529453 0.31790868 0.59607878 0.17882363]
 [0.73260391 0.36029701 0.55245541 0.1681386 ]
 [0.70558934 0.32722984 0.58287815 0.23519645]
 [0.69299099 0.34199555 0.60299216 0.19799743]
 [0.73350949 0.35452959 0.55013212 0.18337737]
 [0.73337886 0.32948905 0.54206264 0.24445962]
 [0.74714194 0.33960997 0.54337595 0.17659719]
 [0.71576546 0.30196356 0.59274328 0.21249287]
 [0.69589887 0.34794944 0.57629125 0.25008866]
 [0.69333409 0.38518561 0.57777841 0.1925928 ]
 [0.69595601 0.3427843  0.59208198 0.21813547]
 [0.70610474 0.3258945  0.59747324 0.1955367 ]
 [0.76923077 0.30769231 0.53846154 0.15384615]
 [0.73923462 0.37588201 0.52623481 0.187941  ]]
self.support_vector_labels: [ 1  1 -1 -1  1  1  1  1  1 -1 -1  1 -1 -1  1 -1 -1 -1 -1  1 -1  1 -1  1
  1 -1  1 -1  1  1 -1  1  1 -1 -1]

最后一步:

  # Calculate intercept with first support vector
        self.intercept = self.support_vector_labels[0]
        for i in range(len(self.lagr_multipliers)):
            self.intercept -= self.lagr_multipliers[i] * self.support_vector_labels[
                i] * self.kernel(self.support_vectors[i], self.support_vectors[0])

需要好多數學知識以及矩陣之間的運算,還是有很多不理解其這么計算的原因,再慢慢摸索了。

結果:

Accuracy: 0.8787878787878788

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM