代碼來源: https://github.com/eriklindernoren/ML-From-Scratch
支持向量機代碼:
from __future__ import division, print_function import numpy as np import cvxopt from mlfromscratch.utils import train_test_split, normalize, accuracy_score from mlfromscratch.utils.kernels import * from mlfromscratch.utils import Plot # Hide cvxopt output cvxopt.solvers.options['show_progress'] = False class SupportVectorMachine(object): """The Support Vector Machine classifier. Uses cvxopt to solve the quadratic optimization problem. Parameters: ----------- C: float Penalty term. kernel: function Kernel function. Can be either polynomial, rbf or linear. power: int The degree of the polynomial kernel. Will be ignored by the other kernel functions. gamma: float Used in the rbf kernel function. coef: float Bias term used in the polynomial kernel function. """ def __init__(self, C=1, kernel=rbf_kernel, power=4, gamma=None, coef=4): #系數C self.C = C #核函數 self.kernel = kernel self.power = power self.gamma = gamma self.coef = coef self.lagr_multipliers = None self.support_vectors = None self.support_vector_labels = None self.intercept = None def fit(self, X, y): #樣本數,特征數 n_samples, n_features = np.shape(X) # Set gamma to 1/n_features by default if not self.gamma: self.gamma = 1 / n_features # Initialize kernel method with parameters #初始化核函數的一些參數 self.kernel = self.kernel( power=self.power, gamma=self.gamma, coef=self.coef) # Calculate kernel matrix #計算核矩陣 kernel_matrix = np.zeros((n_samples, n_samples)) for i in range(n_samples): for j in range(n_samples): kernel_matrix[i, j] = self.kernel(X[i], X[j]) # Define the quadratic optimization problem #cvxopt是凸優化庫 #cvxopt.matrix(array,dims)用於將array按dims進行重新排列 #np.outer用於計算兩個向量的外積 P = cvxopt.matrix(np.outer(y, y) * kernel_matrix, tc='d') q = cvxopt.matrix(np.ones(n_samples) * -1) A = cvxopt.matrix(y, (1, n_samples), tc='d') b = cvxopt.matrix(0, tc='d') if not self.C: #Numpy.identity()輸入n為行數或列數,返回一個n*n的對角陣, #對角線元素為1,其余為0。dtype可選,默認為float格式。 G = cvxopt.matrix(np.identity(n_samples) * -1) h = cvxopt.matrix(np.zeros(n_samples)) else: G_max = np.identity(n_samples) * -1 G_min = np.identity(n_samples) G = cvxopt.matrix(np.vstack((G_max, G_min))) h_max = cvxopt.matrix(np.zeros(n_samples)) h_min = cvxopt.matrix(np.ones(n_samples) * self.C) h = cvxopt.matrix(np.vstack((h_max, h_min))) # Solve the quadratic optimization problem using cvxopt minimization = cvxopt.solvers.qp(P, q, G, h, A, b) # Lagrange multipliers #np.ravel用於降維 lagr_mult = np.ravel(minimization['x']) # Extract support vectors # Get indexes of non-zero lagr. multipiers idx = lagr_mult > 1e-7 # Get the corresponding lagr. multipliers self.lagr_multipliers = lagr_mult[idx] # Get the samples that will act as support vectors self.support_vectors = X[idx] # Get the corresponding labels self.support_vector_labels = y[idx] # Calculate intercept with first support vector self.intercept = self.support_vector_labels[0] for i in range(len(self.lagr_multipliers)): self.intercept -= self.lagr_multipliers[i] * self.support_vector_labels[ i] * self.kernel(self.support_vectors[i], self.support_vectors[0]) def predict(self, X): y_pred = [] # Iterate through list of samples and make predictions for sample in X: prediction = 0 # Determine the label of the sample by the support vectors for i in range(len(self.lagr_multipliers)): prediction += self.lagr_multipliers[i] * self.support_vector_labels[ i] * self.kernel(self.support_vectors[i], sample) prediction += self.intercept y_pred.append(np.sign(prediction)) return np.array(y_pred)
其中使用到的部分函數:
Cvxopt.solvers.qp(P,q,G,h,A,b)
標准形式:
核函數定義:
import numpy as np def linear_kernel(**kwargs): def f(x1, x2): return np.inner(x1, x2) return f def polynomial_kernel(power, coef, **kwargs): def f(x1, x2): return (np.inner(x1, x2) + coef)**power return f def rbf_kernel(gamma, **kwargs): def f(x1, x2): distance = np.linalg.norm(x1 - x2) ** 2 return np.exp(-gamma * distance) return f
np.inner()用於返回兩個向量的內積。np.linalg.norm()用於求范數,ord參數指定使用的范數,如果沒有指定,則是求整體矩陣元素平方和再開根號。
運行主代碼:
from __future__ import division, print_function import numpy as np from sklearn import datasets # Import helper functions import sys sys.path.append("/content/drive/My Drive/learn/ML-From-Scratch/") from mlfromscratch.utils import train_test_split, normalize, accuracy_score, Plot from mlfromscratch.utils.kernels import * from mlfromscratch.supervised_learning import SupportVectorMachine def main(): data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = -1 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Support Vector Machine", accuracy=accuracy) if __name__ == "__main__": main()
使用的數據集是sklearn自帶的鳶尾花數據集,划分后的訓練集大小為:
(67, 4) (67,)
需要注意的是鳶尾花數據集中有三類,分別是0,1,2類,這里只提取了其中的第1,2類,並且對樣本進行了標准化,對標簽重新進行編號,記為+1和-1.
部分數據及對應標簽如下:
[[0.69198788 0.34599394 0.58626751 0.24027357] [0.70779525 0.31850786 0.60162596 0.1887454 ] [0.73239618 0.38547167 0.53966034 0.15418867] [0.69299099 0.34199555 0.60299216 0.19799743] [0.72712585 0.26661281 0.60593821 0.18178146]] [ 1 -1 -1 1 1]
這里使用的核函數是多項式核函數,上面已經給出了其具體代碼,這里設定power=4, coef=1。
以該輸入為例,我們一步步探索整個支持向量機運行的過程:
(1)fit()函數傳入X_train和y_train,n_samples=67,n_features=4
(2)這里的gamma用不上,定義了多項式核函數:(x1⋅x2+1)**4
(3)計算核矩陣,其大小是(67,67)
(4)計算P,q,A,b,大小分別是
P: (67, 67) q: (67, 1) A: (1, 67) b: (1, 1)
(5)計算G,h
G: (134, 67)
h: (134, 1)
(6)計算minimization
{ 'x': <67x1 matrix, tc='d'>, 'y': <1x1 matrix, tc='d'>, 's': <134x1 matrix, tc='d'>, 'z': <134x1 matrix, tc='d'>, 'status': 'optimal', 'gap': 6.025067170026107e-06, 'relative gap': 2.658291994898124e-07, 'primal objective': -22.66518193482733, 'dual objective': -22.6651879598945, 'primal infeasibility': 5.244963534670188e-16, 'dual infeasibility': 1.4242235222101717e-14, 'primal slack': 1.992950796227594e-08, 'dual slack': 2.0187091588623967e-08, 'iterations': 8 }
接着取出鍵為x的那一項<67x1 matrix, tc='d'>,再分別計算以下項:
lagr_mult: [9.99999993e-01 3.11335122e-07 5.76626923e-08 9.99999986e-01 9.99999960e-01 8.40647046e-09 1.62550393e-08 9.99999990e-01 8.80788872e-09 7.47600294e-09 9.99999993e-01 1.38140431e-08 9.99999841e-01 1.84408877e-08 4.45365428e-08 9.99999993e-01 9.99999994e-01 7.31294539e-09 9.99999981e-01 2.81302345e-01 4.29885349e-08 9.99999993e-01 6.50674166e-08 1.53345935e-08 9.99999997e-01 9.99999980e-01 1.60572220e-08 4.62959213e-09 1.74974625e-08 9.99999994e-01 6.71173485e-09 1.57489685e-08 9.99999965e-01 9.99999988e-01 1.01875501e-08 3.19854125e-01 1.93465392e-08 9.99999991e-01 9.99999969e-01 1.58210493e-08 9.99999992e-01 6.53854743e-08 9.49484746e-09 9.99999991e-01 9.99999975e-01 1.33182181e-08 2.74386270e-07 9.99999985e-01 9.58193356e-09 1.42607893e-08 1.44826324e-08 9.99999988e-01 9.99999991e-01 1.05018808e-08 9.99999983e-01 8.02961322e-09 6.01155709e-01 3.54164542e-07 9.99999994e-01 9.99999972e-01 3.49899863e-08 1.56995796e-08 9.99999986e-01 9.99999949e-01 1.95124926e-08 1.13750234e-07 3.24069998e-09] idx: [ True True False True True False False True False False True False True False False True True False True True False True False False True True False False False True False False True True False True False True True False True False False True True False True True False False False True True False True False True True True True False False True True False True False] self.lagr_multipliers: [9.99999993e-01 3.11335122e-07 9.99999986e-01 9.99999960e-01 9.99999990e-01 9.99999993e-01 9.99999841e-01 9.99999993e-01 9.99999994e-01 9.99999981e-01 2.81302345e-01 9.99999993e-01 9.99999997e-01 9.99999980e-01 9.99999994e-01 9.99999965e-01 9.99999988e-01 3.19854125e-01 9.99999991e-01 9.99999969e-01 9.99999992e-01 9.99999991e-01 9.99999975e-01 2.74386270e-07 9.99999985e-01 9.99999988e-01 9.99999991e-01 9.99999983e-01 6.01155709e-01 3.54164542e-07 9.99999994e-01 9.99999972e-01 9.99999986e-01 9.99999949e-01 1.13750234e-07] self.support_vectors: [[0.72785195 0.32870733 0.56349829 0.21131186] [0.71491405 0.30207636 0.59408351 0.21145345] [0.72460233 0.37623583 0.54345175 0.19508524] [0.76467269 0.31486523 0.53976896 0.15743261] [0.73122464 0.31338199 0.56873028 0.20892133] [0.71562645 0.3523084 0.56149152 0.22019275] [0.71366557 0.28351098 0.61590317 0.17597233] [0.71066905 0.35533453 0.56853524 0.21320072] [0.73089855 0.30454106 0.58877939 0.1624219 ] [0.73544284 0.35458851 0.55158213 0.1707278 ] [0.71524936 0.40530797 0.53643702 0.19073316] [0.71171214 0.35002236 0.57170319 0.21001342] [0.70779525 0.31850786 0.60162596 0.1887454 ] [0.73659895 0.33811099 0.56754345 0.14490471] [0.72366005 0.32162669 0.58582004 0.17230001] [0.72634846 0.38046824 0.54187901 0.18446945] [0.73081412 0.34743622 0.56308629 0.16772783] [0.75519285 0.33928954 0.53629637 0.16417236] [0.75384916 0.31524601 0.54825394 0.17818253] [0.72155725 0.32308533 0.56001458 0.24769876] [0.70631892 0.37838513 0.5675777 0.18919257] [0.71529453 0.31790868 0.59607878 0.17882363] [0.73260391 0.36029701 0.55245541 0.1681386 ] [0.70558934 0.32722984 0.58287815 0.23519645] [0.69299099 0.34199555 0.60299216 0.19799743] [0.73350949 0.35452959 0.55013212 0.18337737] [0.73337886 0.32948905 0.54206264 0.24445962] [0.74714194 0.33960997 0.54337595 0.17659719] [0.71576546 0.30196356 0.59274328 0.21249287] [0.69589887 0.34794944 0.57629125 0.25008866] [0.69333409 0.38518561 0.57777841 0.1925928 ] [0.69595601 0.3427843 0.59208198 0.21813547] [0.70610474 0.3258945 0.59747324 0.1955367 ] [0.76923077 0.30769231 0.53846154 0.15384615] [0.73923462 0.37588201 0.52623481 0.187941 ]] self.support_vector_labels: [ 1 1 -1 -1 1 1 1 1 1 -1 -1 1 -1 -1 1 -1 -1 -1 -1 1 -1 1 -1 1 1 -1 1 -1 1 1 -1 1 1 -1 -1]
最后一步:
# Calculate intercept with first support vector self.intercept = self.support_vector_labels[0] for i in range(len(self.lagr_multipliers)): self.intercept -= self.lagr_multipliers[i] * self.support_vector_labels[ i] * self.kernel(self.support_vectors[i], self.support_vectors[0])
需要好多數學知識以及矩陣之間的運算,還是有很多不理解其這么計算的原因,再慢慢摸索了。
結果:
Accuracy: 0.8787878787878788