深度學習與醫學圖像處理 案例學習2——CNN肺炎檢測(CXR圖像)


文章來源:https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia/notebooks

什么是肺炎?

肺炎是肺部的一種炎症狀態,主要影響小肺泡。典型的症狀包括干咳、胸痛、發熱和呼吸困難。病情的嚴重程度是可變的。肺炎通常由病毒或細菌感染引起,很少由其他微生物、某些葯物或疾病(如自身免疫性疾病)引起。危險因素包括囊性纖維化、慢性阻塞性肺疾病(COPD)、哮喘、糖尿病、心力衰竭、吸煙史、咳嗽能力差(如中風后)和免疫系統弱。診斷通常是基於症狀和身體檢查。胸部x光檢查、血液檢查和痰液培養可以幫助確診。該病可根據獲得性進行分類,如社區或醫院獲得性肺炎或衛生保健相關肺炎。

數據集中共有訓練、驗證、測試三個文件夾,每個文件夾又包含正常與肺炎兩個子文件夾,圖片格式為jpeq。

 

#導入包

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout , BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from keras.callbacks import ReduceLROnPlateau
import cv2
import os

#查看樣本基本情況

PATH='/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/'
print('number of normal in training set:',len(os.listdir(PATH+'train/'+'NORMAL')))
print('number of pneumonia in training set:',len(os.listdir(PATH+'train/'+'PNEUMONIA')))
print('number of normal in validation set:',len(os.listdir(PATH+'val/'+'NORMAL')))
print('number of pneumonia in validation set:',len(os.listdir(PATH+'val/'+'PNEUMONIA')))
print('number of normal in test set:',len(os.listdir(PATH+'test/'+'NORMAL')))
print('number of pneumonia in test set:',len(os.listdir(PATH+'test/'+'PNEUMONIA')))

樣本不均衡,訓練集中肺炎約為正常的三倍,驗證集數據數量過小,只有9個。

#數據讀取

labels=['PNEUMONIA','NORMAL']
img_size=150
def get_training_data(data_dir):
    x_data=[]
    y_data=[]
    for label in labels:
        path=os.path.join(data_dir,label)
        class_num=labels.index(label)
        for img in os.listdir(path):
            try:
                img_arr=cv2.imread(os.path.join(path,img),cv2.IMREAD_GRAYSCALE)  #加載灰度圖像
                resized_arr=cv2.resize(img_arr,(img_size,img_size))
                x_data.append(resized_arr)  #n,150,150
                y_data.append(class_num)  #n
            except Exception as e:
                print(e)
    return np.array(x_data),np.array(y_data)
x_train,y_train = get_training_data('/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/train')
x_test,y_test = get_training_data('/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/test')
x_val,y_val = get_training_data('/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/val')

 #顯示類別數量對比

sns.set_style('darkgrid')
sns.countplot(y_train)

#顯示圖像

plt.figure(figsize = (5,5))
plt.imshow(x_train[0], cmap='gray')
plt.title(labels[y_train[0]])

plt.figure(figsize = (5,5))
plt.imshow(x_train[-1], cmap='gray')
plt.title(labels[y_train[-1]])

#歸一化,增加維度准備喂入網絡

x_train = np.array(x_train) / 255
x_val = np.array(x_val) / 255
x_test = np.array(x_test) / 255
x_train = x_train.reshape(-1, img_size, img_size, 1)
x_val = x_val.reshape(-1, img_size, img_size, 1)
x_test = x_test.reshape(-1, img_size, img_size, 1)

#數據增強器

datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range = 30,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.2, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip = True,  # randomly flip images
        vertical_flip=False)  # randomly flip images


datagen.fit(x_train)

#卷積神經網絡CNN

                          

model = Sequential()
model.add(Conv2D(32 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu' , input_shape = (150,150,1)))  #150,150,32
model.add(BatchNormalization()) #150,150,32
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))  #75,75,32
model.add(Conv2D(64 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))  #75,75,64
model.add(Dropout(0.1)) #75,75,64
model.add(BatchNormalization()) #75,75,64
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same')) #38,38,64
model.add(Conv2D(64 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu')) #38,38,64
model.add(BatchNormalization())  #38,38,64
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same')) #19,19,64
model.add(Conv2D(128 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu')) #19,19,128
model.add(Dropout(0.2))  #19,19,128
model.add(BatchNormalization()) #19,19,128
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same')) #10,10,128
model.add(Conv2D(256 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu')) #10,10,256
model.add(Dropout(0.2)) #10,10,256
model.add(BatchNormalization())  #10,10,256
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))  #5,5,256
model.add(Flatten()) #5*5*256=6400
model.add(Dense(units = 128 , activation = 'relu')) #128
model.add(Dropout(0.2))  #128
model.add(Dense(units = 1 , activation = 'sigmoid'))  #1
model.compile(optimizer = "rmsprop" , loss = 'binary_crossentropy' , metrics = ['accuracy'])
model.summary()

 #智能學習率函數

learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience = 2, verbose=1,factor=0.3, min_lr=0.000001)

#訓練

history = model.fit(datagen.flow(x_train,y_train, batch_size = 32) ,epochs = 12 , validation_data = datagen.flow(x_val, y_val) ,callbacks = [learning_rate_reduction])

沒有GPU花費了將近50分鍾,訓練集上的結果逐漸優化,驗證集上結果一般。

epochs = [i for i in range(12)]
fig , ax = plt.subplots(1,2)
train_acc = history.history['accuracy']
train_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']
fig.set_size_inches(20,10)

ax[0].plot(epochs , train_acc , 'go-' , label = 'Training Accuracy')
ax[0].plot(epochs , val_acc , 'ro-' , label = 'Validation Accuracy')
ax[0].set_title('Training & Validation Accuracy')
ax[0].legend()
ax[0].set_xlabel("Epochs")
ax[0].set_ylabel("Accuracy")

ax[1].plot(epochs , train_loss , 'g-o' , label = 'Training Loss')
ax[1].plot(epochs , val_loss , 'r-o' , label = 'Validation Loss')
ax[1].set_title('Testing Accuracy & Loss')
ax[1].legend()
ax[1].set_xlabel("Epochs")
ax[1].set_ylabel("Training & Validation Loss")
plt.show()

#測試集結果

predictions = model.predict_classes(x_test)
predictions = predictions.reshape(1,-1)[0]
cm = confusion_matrix(y_test,predictions)
cm
#sklearn.metrics.confusion_matrix(y_truey_pred*labels=Nonesample_weight=Nonenormalize=None)

#顯示預測正確圖像

correct = np.nonzero(predictions == y_test)[0]
i = 0
for c in correct[:6]:
    plt.subplot(3,2,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(x_test[c].reshape(150,150), cmap="gray", interpolation='none')
    plt.title("Predicted {},Actual {}".format(predictions[c], y_test[c]))
    i += 1

#顯示錯誤預測圖像

incorrect = np.nonzero(predictions != y_test)[0]
i = 0
for c in incorrect[:6]:
    plt.subplot(3,2,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(x_test[c].reshape(150,150), cmap="gray", interpolation='none')
    plt.title("Predicted {},Actual {}".format(predictions[c], y_test[c]))
    i += 1

#保存模型

model.save('model.h5')

歡迎討論!

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM