內容引用自https://www.kaggle.com/toregil/a-lung-u-net-in-keras?select=2d_masks.zip
#引入普通包
1 import os 2 import numpy as np 3 import pandas as pd 4 import cv2 #后面用於圖像放縮(插值) 5 import matplotlib.pyplot as plt 6 %matplotlib inline 7 from sklearn.model_selection import train_test_split #將總數據集分為訓練集和測試集
#引入深度學習包
from keras.models import Model #keras模型 from keras.layers import * #keras層 from keras.optimizers import Adam #keras優化算法 from keras.regularizers import l2 #l2正則化 from keras.preprocessing.image import ImageDataGenerator #圖像增強生成器 import keras.backend as K from keras.callbacks import LearningRateScheduler, ModelCheckpoint
#導入圖像文件並圖像設置為指定大小
IMAGE_LIB = '../input/2d_images/' #圖片路徑 MASK_LIB = '../input/2d_masks/' #掩模路徑 IMG_HEIGHT, IMG_WIDTH = 32, 32 #輸入網絡的圖片大小 SEED=42 #隨機種子
all_images = [x for x in sorted(os.listdir(IMAGE_LIB)) if x[-4:] == '.tif'] #圖片名數組(格式tif) x_data = np.empty((len(all_images), IMG_HEIGHT, IMG_WIDTH), dtype='float32') #圖片數據開辟空間 for i, name in enumerate(all_images): #導入圖片數據 im = cv2.imread(IMAGE_LIB + name, cv2.IMREAD_UNCHANGED).astype("int16").astype('float32') #cv2.IMREAD_UNCHANGED 包括alpha通道 im = cv2.resize(im, dsize=(IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_LANCZOS4) #cv2. INTER_LANCZOS4,8x8像素鄰域Lanczos插值 im = (im - np.min(im)) / (np.max(im) - np.min(im)) #歸一化 x_data[i] = im y_data = np.empty((len(all_images), IMG_HEIGHT, IMG_WIDTH), dtype='float32') #掩模數據開辟空間 for i, name in enumerate(all_images): #導入掩模數據 im = cv2.imread(MASK_LIB + name, cv2.IMREAD_UNCHANGED).astype('float32')/255. im = cv2.resize(im, dsize=(IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_NEAREST) #cv2.INTER_NEAREST,最近鄰域插值 y_data[i] = im
#顯示圖像及掩模
fig, ax = plt.subplots(1,2, figsize = (8,4)) #1行兩列顯示圖像 ax[0].imshow(x_data[10], cmap='gray') #圖像 ax[1].imshow(y_data[10], cmap='gray') #掩模 plt.show()
x_data = x_data[:,:,:,np.newaxis] #喂入神經網絡前需新增第四維度 y_data = y_data[:,:,:,np.newaxis] x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size = 0.5) #按0.5的比例分割訓練集和測試集
#定義標准——dice系數
def dice_coef(y_true, y_pred): y_true_f = K.flatten(y_true) #多維張量一維化 y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) #交叉部分1*1=1 return (2. * intersection + K.epsilon()) / (K.sum(y_true_f) + K.sum(y_pred_f) + K.epsilon()) #2*(A交B)/(A+B) 當A=B時,該值為1
#模型
input_layer = Input(shape=x_train.shape[1:]) #shape=32,32,1 c1 = Conv2D(filters=8, kernel_size=(3,3), activation='relu', padding='same')(input_layer) #shape=32,32,8 l = MaxPool2D(strides=(2,2))(c1) #shape=16,16,8 c2 = Conv2D(filters=16, kernel_size=(3,3), activation='relu', padding='same')(l) #shape=16,16,16 l = MaxPool2D(strides=(2,2))(c2) #shape=8,8,16 c3 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(l) #shape=8,8,32 l = MaxPool2D(strides=(2,2))(c3) #shape=4,4,32 c4 = Conv2D(filters=32, kernel_size=(1,1), activation='relu', padding='same')(l) #shape=4,4,32 l = concatenate([UpSampling2D(size=(2,2))(c4), c3], axis=-1) #UpSampling2D上采樣,shape=8,8,64 l = Conv2D(filters=32, kernel_size=(2,2), activation='relu', padding='same')(l) #shape=8,8,32 l = concatenate([UpSampling2D(size=(2,2))(l), c2], axis=-1) #上采樣,shape=16,16,48 l = Conv2D(filters=24, kernel_size=(2,2), activation='relu', padding='same')(l) #shape=16,16,24 l = concatenate([UpSampling2D(size=(2,2))(l), c1], axis=-1) #上采樣,shape=32,32,32 l = Conv2D(filters=16, kernel_size=(2,2), activation='relu', padding='same')(l) #shape=32,32,16 l = Conv2D(filters=64, kernel_size=(1,1), activation='relu')(l) #shape=32,32,64 l = Dropout(0.5)(l) #shape=32,32,64 output_layer = Conv2D(filters=1, kernel_size=(1,1), activation='sigmoid')(l) #shape=32,32,1 model = Model(input_layer, output_layer)
#模型參數數量
#數據增強器
def my_generator(x_train, y_train, batch_size): data_generator = ImageDataGenerator( width_shift_range=0.1, height_shift_range=0.1, rotation_range=10, zoom_range=0.1).flow(x_train, x_train, batch_size, seed=SEED) mask_generator = ImageDataGenerator( width_shift_range=0.1, height_shift_range=0.1, rotation_range=10, zoom_range=0.1).flow(y_train, y_train, batch_size, seed=SEED) while True: x_batch, _ = data_generator.next() y_batch, _ = mask_generator.next() yield x_batch, y_batch
#使用相同的隨機種子得到增強的圖像對應增強的掩模,顯示一個小批量增強后的圖像及掩模
image_batch, mask_batch = next(my_generator(x_train, y_train, 8)) fix, ax = plt.subplots(8,2, figsize=(8,20)) for i in range(8): ax[i,0].imshow(image_batch[i,:,:,0]) ax[i,1].imshow(mask_batch[i,:,:,0]) plt.show()
#編譯模型
model.compile(optimizer=Adam(2e-4), loss='binary_crossentropy', metrics=[dice_coef]) #optimizer優化器,loss損失函數,metrics評價指標
#為模型條件檢查點
weight_saver = ModelCheckpoint('lung.h5', monitor='val_dice_coef', save_best_only=True, save_weights_only=True)
#文件名,mnitor監視的值,save_best_only:當設置為True時,將只保存在驗證集上性能最好的模型,save_weights_only:若設置為True,只保存模型權重,否則將保存整個模型
#自動調整學習率
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.8 ** x)
#訓練
hist = model.fit_generator(my_generator(x_train, y_train, 8), steps_per_epoch = 200, validation_data = (x_val, y_val), epochs=10, verbose=2, callbacks = [weight_saver, annealer])
#generator:生成器函數
#steps_per_epoch:整數,當生成器返回steps_per_epoch
次數據時計一個epoch結束,執行下一個epoch
#epochs:整數,數據迭代的輪數
#verbose:日志顯示,0為不在標准輸出流輸出日志信息,1為輸出進度條記錄,2為每個epoch輸出一行記錄
#結果
#評價
#model.load_weights('lung.h5') #使用最佳參數
plt.plot(hist.history['loss'], color='b') plt.plot(hist.history['val_loss'], color='r')
plt.legend(['train_loss','val_loss']) plt.show() plt.plot(hist.history['dice_coef'], color='b') plt.plot(hist.history['val_dice_coef'], color='r')
plt.legend(['train_dice_coef','val_dice_coef']) plt.show()
#測試
pre=model.predict(x_train[10].reshape(1,IMG_HEIGHT, IMG_WIDTH, 1))[0,:,:,0]
fig, ax = plt.subplots(1,3, figsize = (12,6))
ax[0].imshow(x_train[10],cmap='gray')
ax[1].imshow(y_train[10],cmap='gray')
ax[2].imshow(pre)

y_hat = model.predict(x_val) fig, ax = plt.subplots(10,3,figsize=(12,30))
for i in range(10): ax[i,0].imshow(x_val[i,:,:,0], cmap='gray') ax[i,1].imshow(y_val[i,:,:,0]) ax[i,2].imshow(y_hat[i,:,:,0])
#討論
深度學習得到的圖像並非二值圖像,每個像素點的值都是從0-1之間,實際上再小的數都大於0,因為網絡的最后一層是sigmoid函數,dice系數的計算並不是想象中的交比並。
生成真正的預測掩模還需要一個閾值。
倒數第二幅圖的分割明顯有問題。
為什么測試集的dice系數總好於訓練接的dice系數? 答:測試集的數據未經過增強
#數據分享:鏈接: https://pan.baidu.com/s/1xXlHwn7Ek4mjJlJ4OFkgaw 提取碼: rd5y
歡迎探討、指教。