Keras之 cifar10數據集使用keras generator讀取、模型訓練、預測


  本文將介紹:

  使用keras實現resnet50模型

  實現遷移學習-finetune

  一,下載kaggle-cifar10數據

  下載dataset到本地目錄cifar10中

  二,實現tensorflow動態按需分配GPU

  import matplotlib as mpl

  import matplotlib.pyplot as plt

  import numpy as np

  import os

  import pandas as pd

  import sklearn

  import sys

  import tensorflow as tf

  import time

  from tensorflow import keras

  print(tf.__version__)

  print(sys.version_info)

  for module in mpl, np, pd, sklearn, tf, keras:

  print(module.__name__, module.__version__)

  # 一,實現tensorflow動態按需分配GPU

  from tensorflow.compat.v1 import ConfigProto

  from tensorflow.compat.v1 import InteractiveSession

  config = ConfigProto()

  config.gpu_options.allow_growth = True

  session = InteractiveSession(config=config)

  三,讀取訓練集、測試集的csv文件數據和訓練集、測試集數據對應關系

  # 二,讀取訓練集、測試集的csv文件數據和訓練集、測試集數據對應關系

  class_names = [

  'airplane',

  'automobile',

  'bird',

  'cat',

  'deer',

  'dog',

  'frog',

  'horse',

  'ship',

  'truck',

  ]

  train_lables_file = './cifar10/trainLabels.csv'

  test_csv_file = './cifar10/sampleSubmission.csv'

  train_folder = './cifar10/train/'

  test_folder = './cifar10/test'

  def parse_csv_file(filepath, folder):

  """Parses csv files into (filename(path), label) format"""

  results = []

  with open(filepath, 'r') as f:

  lines = f.readlines()[1:]

  for line in lines:

  image_id, label_str = line.strip('\n').split(',')

  image_full_path = os.path.join(folder, image_id + '.png')

  results.append((image_full_path, label_str))

  return results

  train_labels_info = parse_csv_file(train_lables_file, train_folder)

  test_csv_info = parse_csv_file(test_csv_file, test_folder)

  import pprint

  pprint.pprint(train_labels_info[0:5])

  pprint.pprint(test_csv_info[0:5])

  print(len(train_labels_info), len(test_csv_info))

  四,將對應關系轉換為dataframe類型

  # 四,將對應關系轉換為dataframe類型

  # train_df = pd.DataFrame(train_labels_info)

  train_df = pd.DataFrame(train_labels_info[0:45000])

  valid_df = pd.DataFrame(train_labels_info[45000:])

  test_df = pd.DataFrame(test_csv_info)

  train_df.columns = ['filepath', 'class']

  valid_df.columns = ['filepath', 'class']

  test_df.columns = ['filepath', 'class']

  print(train_df.head())

  print(valid_df.head())

  print(test_df.head())

  五,使用ImageDataGenerator加載數據並做數據增強

  # 五,使用ImageDataGenerator加載數據並做數據增強

  height = 32

  width = 32

  channels = 3

  batch_size = 32

  num_classes = 10

  train_datagen = keras.preprocessing.image.ImageDataGenerator(

  rescale = 1./255,

  rotation_range = 40,

  width_shift_range = 0.2,

  height_shift_range = 0.2,

  shear_range = 0.2,

  zoom_range = 0.2,

  horizontal_flip = True,

  fill_mode = 'nearest',

  )

  train_generator = train_datagen.flow_from_dataframe(

  train_df,

  directory = './',

  x_col = 'filepath',

  y_col = 'class',

  classes = class_names,

  target_size = (height, width),

  batch_size = batch_size,

  seed = 7,

  shuffle = True,

  class_mode = 'sparse',

  )

  valid_datagen = keras.preprocessing.image.ImageDataGenerator(

  rescale = 1./255)

  valid_generator = valid_datagen.flow_from_dataframe(

  valid_df,

  directory = './',

  x_col = 'filepath',

  y_col = 'class',

  classes = class_names,

  target_size = (height, width),

  batch_size = batch_size,

  seed = 7,

  shuffle = False,

  class_mode = "sparse")

  train_num = train_generator.samples

  valid_num = valid_generator.samples

  print(train_num, valid_num)

  六,查看generator數據

  for i in range(2):

  x, y = train_generator.next()

  print(x.shape, y.shape)

  print(y)

  七,構建模型

  # 七,構建模型

  model = keras.models.Sequential([

  keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',

  activation='relu',

  input_shape=[width, height, channels]),

  keras.layers.BatchNormalization(),

  keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',

  activation='relu'),

  keras.layers.BatchNormalization(),

  keras.layers.MaxPool2D(pool_size=2),

  keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',

  activation='relu'),

  keras.layers.BatchNormalization(),

  keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',

  activation='relu'),

  keras.layers.BatchNormalization(),

  keras.layers.MaxPool2D(pool_size=2),

  keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',

  activation='relu'),

  keras.layers.BatchNormalization(),

  keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',

  activation='relu'),

  keras.layers.BatchNormalization(),

  keras.layers.MaxPool2D(pool_size=2),

  keras.layers.Flatten(),

  keras.layers.Dense(512, activation='relu'),

  keras.layers.Dense(num_classes, activation='softmax'),

  ])

  model.compile(loss="sparse_categorical_crossentropy",

  optimizer="adam", metrics=['accuracy'])

  model.summary()

  八,訓練模型

  # 八,訓練模型

  epochs = 20

  history = model.fit_generator(train_generator,

  steps_per_epoch = train_num // batch_size,

  epochs = epochs,

  validation_data = valid_generator,

  validation_steps = valid_num // batch_size)

  九,打印模型訓練曲線

  # 九,打印模型訓練曲線

  def plot_learning_curves(history, label, epcohs, min_value, max_value):

  data = {}

  data[label] = history.history[label]

  data['val_'+label] = history.history['val_'+label]

  pd.DataFrame(data).plot(figsize=(8, 5))

  plt.grid(True)

  plt.axis([0, epochs, min_value, max_value])

  plt.show()

  plot_learning_curves(history, 'accuracy', epochs, 0, 1)

  plot_learning_curves(history, 'loss', epochs, 0, 2)

  十,使用keras.ImageDataGenerator加載測試集數據

  # 十,使用keras.ImageDataGenerator加載測試集數據

  test_datagen = keras.preprocessing.image.ImageDataGenerator(

  rescale = 1./255)

  test_generator = valid_datagen.flow_from_dataframe(

  test_df,

  directory = './',

  x_col = 'filepath',

  y_col = 'class',

  classes = class_names,

  target_size = (height, width),

  batch_size = batch_size,

  seed = 7,

  shuffle = False,

  class_mode = "sparse")

  test_num = test_generator.samples

  print(test_num)

  十一,使用測試集預測模型結果

  # 十一,使用測試集預測模型結果

  test_predict = model.predict_generator(test_generator,

  workers = 10,

  use_multiprocessing = True)

  1,測試集預測模型結果維度形狀

  print(test_predict.shape)

  2,抽取前5條數據查看

  print(test_predict[0:5])

  3,取結果數值為最大的為預測結果

  test_predict_class_indices = np.argmax(test_predict, axis = 1)

  4,取前5條結果查看

  print(test_predict_class_indices[0:5])

  5,將結果轉化為特征名稱

  test_predict_class = [class_names[index]

  for index in test_predict_class_indices]

  查看前五條結果

  print(test_predict_class[0:5])

  十二,將預測結果寫入到submission.csv文件中,並在kaggle上提交

  # 十二,將預測結果寫入到submission.csv文件中,並在kaggle上提交

  def generate_submissions(filename, predict_class):

  with open(filename, 'w') as f:

  f.write('id,label\n')

  for i in range(len(predict_class)):

  f.write('%d,%s\n' % (i+1, predict_class[i]))

  output_file = "./cifar10/submission.csv"

  generate_submissions(output_file, test_predict_class)

  十三,總結代碼

  #!/usr/bin/env python3

  # -*- coding: utf-8 -*-

  import matplotlib as mpl

  import matplotlib.pyplot as plt

  import numpy as np

  import os

  import pandas as pd

  import sklearn

  import sys

  import tensorflow as tf

  import time

  from tensorflow import keras

  print(tf.__version__)

  print(sys.version_info)

  for module in mpl, np, pd, sklearn, tf, keras:

  print(module.__name__, module.__version__)

  # 一,實現tensorflow動態按需分配GPU

  from tensorflow.compat.v1 import ConfigProto

  from tensorflow.compat.v1 import InteractiveSession

  config = ConfigProto()

  config.gpu_options.allow_growth = True

  session = InteractiveSession(config=config)

  # 二,讀取訓練集、測試集的csv文件數據和訓練集、測試集數據對應關系

  class_names = [

  'airplane',

  'automobile',

  'bird',

  'cat',

  'deer',

  'dog',

  'frog',

  'horse',

  'ship',

  'truck',

  ]

  train_lables_file = './cifar10/trainLabels.csv'

  test_csv_file = './cifar10/sampleSubmission.csv'

  train_folder = './cifar10/train/'

  test_folder = './cifar10/test'

  def parse_csv_file(filepath, folder):

  """Parses csv files into (filename(path), label) format"""

  results = []

  with open(filepath, 'r') as f:

  lines = f.readlines()[1:]

  for line in lines:

  image_id, label_str = line.strip('\n').split(',')

  image_full_path = os.path.join(folder, image_id + '.png')

  results.append((image_full_path, label_str))

  return results

  train_labels_info = parse_csv_file(train_lables_file, train_folder)

  test_csv_info = parse_csv_file(test_csv_file, test_folder)

  import pprint

  pprint.pprint(train_labels_info[0:5])

  pprint.pprint(test_csv_info[0:5])

  print(len(train_labels_info), len(test_csv_info))

  # 四,將對應關系轉換為dataframe類型

  # train_df = pd.DataFrame(train_labels_info)

  train_df = pd.DataFrame(train_labels_info[0:45000])

  valid_df = pd.DataFrame(train_labels_info[45000:])

  test_df = pd.DataFrame(test_csv_info)

  train_df.columns = ['filepath', 'class']

  valid_df.columns = ['filepath', 'class']

  test_df.columns = ['filepath', 'class']

  print(train_df.head())

  print(valid_df.head())

  print(test_df.head())

  # 五,使用ImageDataGenerator加載數據並做數據增強

  height = 32

  width = 32

  channels = 3

  batch_size = 32

  num_classes = 10

  train_datagen = keras.preprocessing.image.ImageDataGenerator(

  rescale = 1./255,

  rotation_range = 40,

  width_shift_range = 0.2,

  height_shift_range = 0.2,

  shear_range = 0.2,

  zoom_range = 0.2,

  horizontal_flip = True,

  fill_mode = 'nearest',

  )

  train_generator = train_datagen.flow_from_dataframe(

  train_df,

  directory = './',

  x_col = 'filepath',

  y_col = 'class',

  classes = class_names,

  target_size = (height, width),

  batch_size = batch_size,

  seed = 7,

  shuffle = True,

  class_mode = 'sparse',

  )棗庄婦科醫院 http://mobile.zzdffkyy.com/

  valid_datagen = keras.preprocessing.image.ImageDataGenerator(

  rescale = 1./255)

  valid_generator = valid_datagen.flow_from_dataframe(

  valid_df,

  directory = './',

  x_col = 'filepath',

  y_col = 'class',

  classes = class_names,

  target_size = (height, width),

  batch_size = batch_size,

  seed = 7,

  shuffle = False,

  class_mode = "sparse")

  train_num = train_generator.samples

  valid_num = valid_generator.samples

  print(train_num, valid_num)

  # 六,查看generator數據

  for i in range(2):

  x, y = train_generator.next()

  print(x.shape, y.shape)

  print(y)

  # 七,構建模型

  model = keras.models.Sequential([

  keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',

  activation='relu',

  input_shape=[width, height, channels]),

  keras.layers.BatchNormalization(),

  keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',

  activation='relu'),

  keras.layers.BatchNormalization(),

  keras.layers.MaxPool2D(pool_size=2),

  keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',

  activation='relu'),

  keras.layers.BatchNormalization(),

  keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',

  activation='relu'),

  keras.layers.BatchNormalization(),

  keras.layers.MaxPool2D(pool_size=2),

  keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',

  activation='relu'),

  keras.layers.BatchNormalization(),

  keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',

  activation='relu'),

  keras.layers.BatchNormalization(),

  keras.layers.MaxPool2D(pool_size=2),

  keras.layers.Flatten(),

  keras.layers.Dense(512, activation='relu'),

  keras.layers.Dense(num_classes, activation='softmax'),

  ])

  model.compile(loss="sparse_categorical_crossentropy",

  optimizer="adam", metrics=['accuracy'])

  model.summary()

  # 八,訓練模型

  epochs = 20

  history = model.fit_generator(train_generator,

  steps_per_epoch = train_num // batch_size,

  epochs = epochs,

  validation_data = valid_generator,

  validation_steps = valid_num // batch_size)

  # 九,打印模型訓練曲線

  def plot_learning_curves(history, label, epcohs, min_value, max_value):

  data = {}

  data[label] = history.history[label]

  data['val_'+label] = history.history['val_'+label]

  pd.DataFrame(data).plot(figsize=(8, 5))

  plt.grid(True)

  plt.axis([0, epochs, min_value, max_value])

  plt.show()

  plot_learning_curves(history, 'accuracy', epochs, 0, 1)

  plot_learning_curves(history, 'loss', epochs, 0, 2)

  # 十,使用keras.ImageDataGenerator加載測試集數據

  test_datagen = keras.preprocessing.image.ImageDataGenerator(

  rescale = 1./255)

  test_generator = valid_datagen.flow_from_dataframe(

  test_df,

  directory = './',

  x_col = 'filepath',

  y_col = 'class',

  classes = class_names,

  target_size = (height, width),

  batch_size = batch_size,

  seed = 7,

  shuffle = False,

  class_mode = "sparse")

  test_num = test_generator.samples

  print(test_num)

  # 十一,使用測試集預測模型結果

  test_predict = model.predict_generator(test_generator,

  workers = 10,

  use_multiprocessing = True)

  # 1,測試集預測模型結果維度形狀

  print(test_predict.shape)

  # 2,抽取前5條數據查看

  print(test_predict[0:5])

  # 3,取結果數值為最大的為預測結果

  test_predict_class_indices = np.argmax(test_predict, axis = 1)

  # 4,取前5條結果查看

  print(test_predict_class_indices[0:5])

  # 5,將結果轉化為特征名稱

  test_predict_class = [class_names[index]

  for index in test_predict_class_indices]

  # 查看前五條結果

  print(test_predict_class[0:5])

  # 十二,將預測結果寫入到submission.csv文件中,並在kaggle上提交

  def generate_submissions(filename, predict_class):

  with open(filename, 'w') as f:

  f.write('id,label\n')

  for i in range(len(predict_class)):

  f.write('%d,%s\n' % (i+1, predict_class[i]))

  output_file = "./cifar10/submission.csv"

  generate_submissions(output_file, test_predict_class)


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM