TensorFlow筆記五：將cifar10和Mnist數據集文件復原成圖片格式

本文轉載自查看原文 2019-05-07 10:49 575 Python/ TensorFlow

一、cifar10數據集

（http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz）源格式是數據文件，因為訓練需要轉換成圖片格式

轉換代碼：

注意文件路徑改成自己的文件路徑，train文件夾需要自己建，等待轉換完成

from scipy.misc import imsave
import numpy as np

# 解壓 返回解壓后的字典
def unpickle(file):
    import pickle as pk
    fo = open(file, 'rb')
    dict = pk.load(fo,encoding='iso-8859-1')
    fo.close()
    return dict

# 生成訓練集圖片
for j in range(1, 6):
    dataName = "cifar-10-python/cifar-10-batches-py/data_batch_" + str(j)  # 讀取當前目錄下的data_batch1~5文件。
    Xtr = unpickle(dataName)
    print (dataName + " is loading...")

    for i in range(0, 10000):
        img = np.reshape(Xtr['data'][i], (3, 32, 32))  # Xtr['data']為圖片二進制數據
        img = img.transpose(1, 2, 0)  # 讀取image
        picName = 'train/' + str(Xtr['labels'][i]) + '_' + str(i + (j - 1)*10000) + '.jpg'
        # Xtr['labels']為圖片的標簽，值范圍0-9，本文中，train文件夾需要存在，並與腳本文件在同一目錄下。
        imsave(picName, img)
    print (dataName + " loaded.")

print ("test_batch is loading...")

# 生成測試集圖片
testXtr = unpickle("test_batch")
for i in range(0, 10000):
    img = np.reshape(testXtr['data'][i], (3, 32, 32))
    img = img.transpose(1, 2, 0)
    picName = 'test/' + str(testXtr['labels'][i]) + '_' + str(i) + '.jpg'
    imsave(picName, img)
print ("test_batch loaded.")

二、mnist數據集的轉化

1、先解壓出二進制文件，再運行

import numpy as np  
import struct  

from PIL import Image  
import os  

data_file = 'MNIST_data/train-images.idx3-ubyte' #需要修改的路徑  
# It's 47040016B, but we should set to 47040000B  
data_file_size = 47040016  
data_file_size = str(data_file_size - 16) + 'B'  

data_buf = open(data_file, 'rb').read()  

magic, numImages, numRows, numColumns = struct.unpack_from(  
    '>IIII', data_buf, 0)  
datas = struct.unpack_from(  
    '>' + data_file_size, data_buf, struct.calcsize('>IIII'))  
datas = np.array(datas).astype(np.uint8).reshape(  
    numImages, 1, numRows, numColumns)  

label_file = 'MNIST_data/train-labels.idx1-ubyte' #需要修改的路徑  

# It's 60008B, but we should set to 60000B  
label_file_size = 60008  
label_file_size = str(label_file_size - 8) + 'B'  

label_buf = open(label_file, 'rb').read()  

magic, numLabels = struct.unpack_from('>II', label_buf, 0)  
labels = struct.unpack_from(  
    '>' + label_file_size, label_buf, struct.calcsize('>II'))  
labels = np.array(labels).astype(np.int64)  

datas_root = 'MNIST_data/mnist_train' #需要修改的路徑  
if not os.path.exists(datas_root):  
    os.mkdir(datas_root)  

for i in range(10):  
    file_name = datas_root + os.sep + str(i)  
    if not os.path.exists(file_name):  
        os.mkdir(file_name)  

for ii in range(numLabels):  
    img = Image.fromarray(datas[ii, 0, 0:28, 0:28])  
    label = labels[ii]  
    file_name = datas_root + os.sep + str(label) + os.sep + 'mnist_train_' + str(ii) + '.png'  
    img.save(file_name)

import numpy as np  
import struct  

from PIL import Image  
import os  

data_file = 'MNIST_data/t10k-images.idx3-ubyte' #需要修改的路徑  

# It's 7840016B, but we should set to 7840000B  
data_file_size = 7840016  
data_file_size = str(data_file_size - 16) + 'B'  

data_buf = open(data_file, 'rb').read()  

magic, numImages, numRows, numColumns = struct.unpack_from(  
    '>IIII', data_buf, 0)  
datas = struct.unpack_from(  
    '>' + data_file_size, data_buf, struct.calcsize('>IIII'))  
datas = np.array(datas).astype(np.uint8).reshape(  
    numImages, 1, numRows, numColumns)  

label_file = 'MNIST_data/t10k-labels.idx1-ubyte'#需要修改的路徑  

# It's 10008B, but we should set to 10000B  
label_file_size = 10008  
label_file_size = str(label_file_size - 8) + 'B'  

label_buf = open(label_file, 'rb').read()  

magic, numLabels = struct.unpack_from('>II', label_buf, 0)  
labels = struct.unpack_from(  
    '>' + label_file_size, label_buf, struct.calcsize('>II'))  
labels = np.array(labels).astype(np.int64)  

datas_root = 'MNIST_data/mnist_test' #需要修改的路徑  

if not os.path.exists(datas_root):  
    os.mkdir(datas_root)  

for i in range(10):  
    file_name = datas_root + os.sep + str(i)  
    if not os.path.exists(file_name):  
        os.mkdir(file_name)  

for ii in range(numLabels):  
    img = Image.fromarray(datas[ii, 0, 0:28, 0:28])  
    label = labels[ii]  
    file_name = datas_root + os.sep + str(label) + os.sep +  'mnist_test_' + str(ii) + '.png'  
    img.save(file_name)

2、接着構造出圖片集noisy_test和noisy_train

這兩個圖片集是加了椒鹽噪聲的集合（可用作圖像去噪）

import numpy as np  
import struct  
import numpy as np
from PIL import Image  
import os  

data_file = 'MNIST_data/train-images.idx3-ubyte' #需要修改的路徑  
# It's 47040016B, but we should set to 47040000B  
data_file_size = 47040016  
data_file_size = str(data_file_size - 16) + 'B'  

data_buf = open(data_file, 'rb').read()  

magic, numImages, numRows, numColumns = struct.unpack_from(  
    '>IIII', data_buf, 0)  
datas = struct.unpack_from(  
    '>' + data_file_size, data_buf, struct.calcsize('>IIII'))  
datas = np.array(datas).astype(np.uint8).reshape(  
    numImages, 1, numRows, numColumns)  

label_file = 'MNIST_data/train-labels.idx1-ubyte' #需要修改的路徑  

# It's 60008B, but we should set to 60000B  
label_file_size = 60008  
label_file_size = str(label_file_size - 8) + 'B'  

label_buf = open(label_file, 'rb').read()  

magic, numLabels = struct.unpack_from('>II', label_buf, 0)  
labels = struct.unpack_from(  
    '>' + label_file_size, label_buf, struct.calcsize('>II'))  
labels = np.array(labels).astype(np.int64)  

datas_root = 'MNIST_data/noisy_train' #需要修改的路徑  
if not os.path.exists(datas_root):  
    os.mkdir(datas_root)  

for i in range(10):  
    file_name = datas_root + os.sep + str(i)  
    if not os.path.exists(file_name):  
        os.mkdir(file_name)  

for ii in range(numLabels):  
    img = Image.fromarray(datas[ii, 0, 0:28, 0:28])
    label = labels[ii]  
    file_name = datas_root + os.sep + str(label) + os.sep + 'mnist_train_' + str(ii) + '.png'  
    x_train_noisy = np.array(img)
    noise_factor = 0.5
    x_train_noisy = x_train_noisy.astype('float32') / 255.
    x_train_noisy = x_train_noisy + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train_noisy.shape) 
    x_train_noisy = np.clip(x_train_noisy, 0., 1.)
    x_train_noisy = x_train_noisy.astype(np.float)
    x_train_noisy = x_train_noisy.astype('float32') * 255
    x_train_noisy = x_train_noisy.astype(np.uint8)
    #print(x_train_noisy)
    #os._exit(0)
    img=Image.fromarray(x_train_noisy)
    img.save(file_name)

import numpy as np  
import struct  

from PIL import Image  
import os  

data_file = 'MNIST_data/t10k-images.idx3-ubyte' #需要修改的路徑  

# It's 7840016B, but we should set to 7840000B  
data_file_size = 7840016  
data_file_size = str(data_file_size - 16) + 'B'  

data_buf = open(data_file, 'rb').read()  

magic, numImages, numRows, numColumns = struct.unpack_from(  
    '>IIII', data_buf, 0)  
datas = struct.unpack_from(  
    '>' + data_file_size, data_buf, struct.calcsize('>IIII'))  
datas = np.array(datas).astype(np.uint8).reshape(  
    numImages, 1, numRows, numColumns)  

label_file = 'MNIST_data/t10k-labels.idx1-ubyte'#需要修改的路徑  

# It's 10008B, but we should set to 10000B  
label_file_size = 10008  
label_file_size = str(label_file_size - 8) + 'B'  

label_buf = open(label_file, 'rb').read()  

magic, numLabels = struct.unpack_from('>II', label_buf, 0)  
labels = struct.unpack_from(  
    '>' + label_file_size, label_buf, struct.calcsize('>II'))  
labels = np.array(labels).astype(np.int64)  

datas_root = 'MNIST_data/noisy_test' #需要修改的路徑  

if not os.path.exists(datas_root):  
    os.mkdir(datas_root)  

for i in range(10):  
    file_name = datas_root + os.sep + str(i)  
    if not os.path.exists(file_name):  
        os.mkdir(file_name)  

for ii in range(numLabels):  
    img = Image.fromarray(datas[ii, 0, 0:28, 0:28])
    label = labels[ii]  
    file_name = datas_root + os.sep + str(label) + os.sep + 'mnist_test_' + str(ii) + '.png'  
    x_train_noisy = np.array(img)
    noise_factor = 0.5
    x_train_noisy = x_train_noisy.astype('float32') / 255.
    x_train_noisy = x_train_noisy + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train_noisy.shape) 
    x_train_noisy = np.clip(x_train_noisy, 0., 1.)
    x_train_noisy = x_train_noisy.astype(np.float)
    x_train_noisy = x_train_noisy.astype('float32') * 255
    x_train_noisy = x_train_noisy.astype(np.uint8)
    #print(x_train_noisy)
    #os._exit(0)
    img=Image.fromarray(x_train_noisy)
    img.save(file_name)

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 基於cifar10數據集的cnn圖片分類模型 CIFAR10 數據集分類 CIFAR10/CIFAR100數據集介紹 Tensorflow機器學習入門——cifar10數據集的讀取、展示與保存將cifar10數據集保存為可見圖片 resnet實現cifar10數據集分類 LeNet網絡實現cifar10數據集分類 AlexNet實現cifar10數據集分類 PyTorch學習筆記7--案例3：基於CNN的CIFAR10數據集的圖像分類 VGGNet實現cifar10數據集分類