本文通過遷移學習將訓練好的VGG16模型應用到圖像的多標簽分類問題中。該項目數據來自於Kaggle,每張圖片可同時屬於多個標簽。模型的准確度使用F score進行量化,如下表所示:
標簽 | 預測為Positive(1) | 預測為Negative(0) |
---|---|---|
真值為Positive(1) | TP | FN |
真值為Negative(0) | FP | TN |
例如真實標簽是(1,0,1,1,0,0), 預測標簽是(1,1,0,1,1,0), 則TP=2, FN=1, FP=2, TN=1。$$Precision=\frac{TP}{TP+FP},\text{ }Recall=\frac{TP}{TP+FN},\text{ }F{\_}score=\frac{(1+\beta^2)*Presicion*Recall}{Recall+\beta^2*Precision}$$其中$\beta$越小,F score中Precision的權重越大,$\beta$等於0時F score就變為Precision;$\beta$越大,F score中Recall的權重越大,$\beta$趨於無窮大時F score就變為Recall。可以在Keras中自定義該函數(y_pred表示預測概率):
from tensorflow.keras import backend # calculate fbeta score for multi-label classification def fbeta(y_true, y_pred, beta=2): # clip predictions y_pred = backend.clip(y_pred, 0, 1) # calculate elements for each sample tp = backend.sum(backend.round(backend.clip(y_true * y_pred, 0, 1)), axis=1) fp = backend.sum(backend.round(backend.clip(y_pred - y_true, 0, 1)), axis=1) fn = backend.sum(backend.round(backend.clip(y_true - y_pred, 0, 1)), axis=1) # calculate precision p = tp / (tp + fp + backend.epsilon()) # calculate recall r = tp / (tp + fn + backend.epsilon()) # calculate fbeta, averaged across samples bb = beta ** 2 fbeta_score = backend.mean((1 + bb) * (p * r) / (bb * p + r + backend.epsilon())) return fbeta_score
此外在損失函數的使用上多標簽分類和多類別(multi-class)分類也有區別,多標簽分類使用binary_crossentropy,假設一個樣本的真實標簽是(1,0,1,1,0,0),預測概率是(0.2, 0.3, 0.4, 0.7, 0.9, 0.2): $$binary{\_}crossentropy\text{ }loss=-(\ln 0.2 + \ln 0.7 + \ln 0.4 + \ln 0.7 + \ln 0.1 + \ln 0.8)/6=0.96$$另外多標簽分類輸出層的激活函數選擇sigmoid而非softmax。模型架構如下所示:
from tensorflow.keras.layers import Dense, Flatten from tensorflow.keras.optimizers import Adam from tensorflow.keras.applications.vgg16 import VGG16 from tensorflow.keras.models import Model def define_model(in_shape=(128, 128, 3), out_shape=17): # load model base_model = VGG16(weights='imagenet', include_top=False, input_shape=in_shape) # mark loaded layers as not trainable for layer in base_model.layers: layer.trainable = False # make the last block trainable tune_layers = [layer.name for layer in base_model.layers if layer.name.startswith('block5_')] for layer_name in tune_layers: base_model.get_layer(layer_name).trainable = True # add new classifier layers flat1 = Flatten()(base_model.layers[-1].output) class1 = Dense(128, activation='relu', kernel_initializer='he_uniform')(flat1) output = Dense(out_shape, activation='sigmoid')(class1) # define new model model = Model(inputs=base_model.input, outputs=output) # compile model opt = Adam(learning_rate=1e-3) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[fbeta]) model.summary() return model
從Kaggle網站上下載數據並解壓,將其處理成可被模型讀取的數據格式

from os import listdir from numpy import zeros, asarray, savez_compressed from pandas import read_csv from tensorflow.keras.preprocessing.image import load_img, img_to_array # create a mapping of tags to integers given the loaded mapping file def create_tag_mapping(mapping_csv): labels = set() # create a set of all known tags for i in range(len(mapping_csv)): tags = mapping_csv['tags'][i].split(' ') # convert spaced separated tags into an array of tags labels.update(tags) # add tags to the set of known labels labels = sorted(list(labels)) # convert set of labels to a sorted list # dict that maps labels to integers, and the reverse labels_map = {labels[i]:i for i in range(len(labels))} inv_labels_map = {i:labels[i] for i in range(len(labels))} return labels_map, inv_labels_map # create a mapping of filename to a list of tags def create_file_mapping(mapping_csv): mapping = dict() for i in range(len(mapping_csv)): name, tags = mapping_csv['image_name'][i], mapping_csv['tags'][i] mapping[name] = tags.split(' ') return mapping # create a one hot encoding for one list of tags def one_hot_encode(tags, mapping): encoding = zeros(len(mapping), dtype='uint8') # create empty vector # mark 1 for each tag in the vector for tag in tags: encoding[mapping[tag]] = 1 return encoding # load all images into memory def load_dataset(path, file_mapping, tag_mapping): photos, targets = list(), list() # enumerate files in the directory for filename in listdir(path): photo = load_img(path + filename, target_size=(128,128)) # load image photo = img_to_array(photo, dtype='uint8') # convert to numpy array tags = file_mapping[filename[:-4]] # get tags target = one_hot_encode(tags, tag_mapping) # one hot encode tags photos.append(photo) targets.append(target) X = asarray(photos, dtype='uint8') y = asarray(targets, dtype='uint8') return X, y filename = 'train_v2.csv' # load the target file mapping_csv = read_csv(filename) tag_mapping, _ = create_tag_mapping(mapping_csv) # create a mapping of tags to integers file_mapping = create_file_mapping(mapping_csv) # create a mapping of filenames to tag lists folder = 'train-jpg/' # load the jpeg images X, y = load_dataset(folder, file_mapping, tag_mapping) print(X.shape, y.shape) savez_compressed('planet_data.npz', X, y) # save both arrays to one file in compressed format
接下來再建立兩個輔助函數,第一個函數用來分割訓練集和驗證集,第二個函數用來畫出模型在訓練過程中的學習曲線

import numpy as np from matplotlib import pyplot from sklearn.model_selection import train_test_split # load train and test dataset def load_dataset(): # load dataset data = np.load('planet_data.npz') X, y = data['arr_0'], data['arr_1'] # separate into train and test datasets trainX, testX, trainY, testY = train_test_split(X, y, test_size=0.3, random_state=1) print(trainX.shape, trainY.shape, testX.shape, testY.shape) return trainX, trainY, testX, testY # plot diagnostic learning curves def summarize_diagnostics(history): # plot loss pyplot.subplot(121) pyplot.title('Cross Entropy Loss') pyplot.plot(history.history['loss'], color='blue', label='train') pyplot.plot(history.history['val_loss'], color='orange', label='test') # plot accuracy pyplot.subplot(122) pyplot.title('Fbeta') pyplot.plot(history.history['fbeta'], color='blue', label='train') pyplot.plot(history.history['val_fbeta'], color='orange', label='test') pyplot.show()
使用數據擴充技術(Data Augmentation)對模型進行訓練
from tensorflow.keras.preprocessing.image import ImageDataGenerator from tensorflow.keras.applications.vgg16 import preprocess_input from tensorflow.keras.callbacks import ModelCheckpoint trainX, trainY, testX, testY = load_dataset() # load dataset # create data generator using augmentation # vertical flip is reasonable since the pictures are satellite images train_datagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=90, preprocessing_function=preprocess_input) test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input) # prepare generators train_it = train_datagen.flow(trainX, trainY, batch_size=128) test_it = test_datagen.flow(testX, testY, batch_size=128) # define model model = define_model() # fit model # When one epoch ends, the validation generator will yield validation_steps batches, then average the evaluation results of all batches checkpointer = ModelCheckpoint(filepath='./weights.best.vgg16.hdf5', verbose=1, save_best_only=True) history = model.fit_generator(train_it, steps_per_epoch=len(train_it), validation_data=test_it, validation_steps=len(test_it), \ epochs=15, callbacks=[checkpointer], verbose=0) # evaluate optimal model # For simplicity, the validation set is used to test the model here. In fact an entirely new test set should have been used. model.load_weights('./weights.best.vgg16.hdf5') #load stored optimal coefficients loss, fbeta = model.evaluate_generator(test_it, steps=len(test_it), verbose=0) print('> loss=%.3f, fbeta=%.3f' % (loss, fbeta)) # loss=0.108, fbeta=0.884 model.save('final_model.h5') # learning curves summarize_diagnostics(history)
藍線代表訓練集,黃線代表驗證集