在利用深度神經網絡模型進行圖像分類時,數據集的處理至關重要,為了訓練和評價模型的好壞,一般需要將數據集划分為訓練集和測試集。
這里以遙感圖像場景分類數據集NWPU-RESISC45為例,按照預先設置好的比例,進行訓練集和測試集划分,Python代碼如下:
1 # *_*coding: utf-8 *_* 2 # Author --LiMing-- 3 4 import os 5 import random 6 import shutil 7 import time 8 9 def copyFile(fileDir, class_name): 10 image_list = os.listdir(fileDir) # 獲取圖片的原始路徑 11 image_number = len(image_list) 12 13 train_number = int(image_number * train_rate) 14 train_sample = random.sample(image_list, train_number) # 從image_list中隨機獲取0.8比例的圖像. 15 test_sample = list(set(image_list) - set(train_sample)) 16 sample = [train_sample, test_sample] 17 18 # 復制圖像到目標文件夾 19 for k in range(len(save_dir)): 20 if os.path.isdir(save_dir[k] + class_name): 21 for name in sample[k]: 22 shutil.copy(os.path.join(fileDir, name), os.path.join(save_dir[k] + class_name+'/', name)) 23 else: 24 os.makedirs(save_dir[k] + class_name) 25 for name in sample[k]: 26 shutil.copy(os.path.join(fileDir, name), os.path.join(save_dir[k] + class_name+'/', name)) 27 28 if __name__ == '__main__': 29 time_start = time.time() 30 31 # 原始數據集路徑 32 origion_path = '/home/room/lm_other/NWPU-RESISC45/' 33 34 # 保存路徑 35 save_train_dir = '/home/room/lm_other/RS_45/2_8/train/' 36 save_test_dir = '/home/room/lm_other/RS_45/2_8/test/' 37 save_dir = [save_train_dir, save_test_dir] 38 39 # 訓練集比例 40 train_rate = 0.2 41 42 # 數據集類別及數量 43 file_list = os.listdir(origion_path) 44 num_classes = len(file_list) 45 46 for i in range(num_classes): 47 class_name = file_list[i] 48 image_Dir = os.path.join(origion_path, class_name) 49 copyFile(image_Dir, class_name) 50 print('%s划分完畢!' % class_name) 51 52 time_end = time.time() 53 print('---------------') 54 print('訓練集和測試集划分共耗時%s!' % (time_end - time_start)