將圖片數據寫入Record文件

# 定義函數轉化變量類型。
def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

# 讀取mnist數據。
mnist = input_data.read_data_sets("F:/data_of_zengjie/a_minist",dtype=tf.uint8, one_hot=True)
images = mnist.train.images
labels = mnist.train.labels
pixels = images.shape[1]
num_examples = mnist.train.num_examples

# 輸出TFRecord文件的地址。
filename = "./TFRecord_Output/output.tfrecords"
if not os.path.exists('./TFRecord_Output/'):
    os.makedirs('./TFRecord_Output/')
writer = tf.python_io.TFRecordWriter(filename)
print (num_examples)
#for index in range(num_examples):
#for index in range(9):
for index in range(101):
#for index in range(54999):
    image_raw = images[index].tostring()

    example = tf.train.Example(features=tf.train.Features(feature={
        'pixels': _int64_feature(pixels),
        'label': _int64_feature(np.argmax(labels[index])),
        'image_raw': _bytes_feature(image_raw)
    }))
    writer.write(example.SerializeToString())
writer.close()
print ("TFRecord文件已保存。")

在上面的代碼中：通過for index in range(101):

可以控制寫入文件的Example數量。

一次讀取一個樣例

# 讀取文件。
reader = tf.TFRecordReader()
filename_queue = tf.train.string_input_producer(["./TFRecord_Output/output.tfrecords"])
# 每次讀取一個
_,serialized_example = reader.read(filename_queue)

features = tf.parse_single_example(
    serialized_example,
    features={
        'image_raw':tf.FixedLenFeature([],tf.string),
        'pixels':tf.FixedLenFeature([],tf.int64),
        'label':tf.FixedLenFeature([],tf.int64)
    })

images = tf.decode_raw(features['image_raw'],tf.uint8)
labels = tf.cast(features['label'],tf.int32)
pixels = tf.cast(features['pixels'],tf.int32)

sess = tf.Session()

# 啟動多線程處理輸入數據。
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)

for i in range(10):
    image, label, pixel = sess.run([images, labels, pixels])
　　 print (image.shape,label,pixel) #image是長度為784的數組

一次讀取多個樣例，

使用read_up_to（注意當前代碼所在的方格不要和前面的程序都放在jupyter notebook中運行。否則前面的定義會影響下一個方格代碼的執行。最好刷新一下kernel，重新選擇某個方格運行。）

注意：從

_,serialized_example = reader.read(filename_queue)

# 解析讀取的樣例。

features = tf.parse_single_example(

改為

_,serialized_example = reader.read_up_to(filename_queue,10)

# 解析讀取的樣例。

features = tf.parse_example(

# 讀取文件。
reader = tf.TFRecordReader()
filename_queue = tf.train.string_input_producer(["./TFRecord_Output/output.tfrecords"])

# 每次讀取多個
_,serialized_example = reader.read_up_to(filename_queue,10)
# 解析讀取的樣例。
features = tf.parse_example(
    serialized_example,
    features={
        'image_raw':tf.FixedLenFeature([],tf.string),
        'pixels':tf.FixedLenFeature([],tf.int64),
        'label':tf.FixedLenFeature([],tf.int64)
    })

images = tf.decode_raw(features['image_raw'],tf.uint8)
labels = tf.cast(features['label'],tf.int32)
pixels = tf.cast(features['pixels'],tf.int32)
print(images.get_shape()) #通過輸出，發現是（?,?）也就是無法確定shape。

sess = tf.Session()
# 啟動多線程處理輸入數據。
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)

for i in range(100):
    image, label, pixel = sess.run([images, labels, pixels])
    print(image.shape)
　　 print(label.shape)

、

但是有一個問題就是：

第一：假設在一開始寫入TFreord的時候，使用的是for index in range(101):

即寫入的是101個example，而這里一次讀取10個，那么總是會出現某一次為（1，784）的情況。即read_up_to沒有為了單次達標10個而循環讀取的功能。當文件讀到最后的時候，它不會從文件的開頭再重新讀取，而是直接讀1個作為那次read_up_to(10)的結果。但是，還保留了一點比較好的地方就是，這里for i in range(100):100次讀取，每次讀取10個example（不考慮那種只讀1個的情況），明顯超出了文件的101個example。但是，read_up_to為了滿足全部讀取次數，此時會循環讀取。也就是說，read_up_to不會為了單次達標多少個example而循環讀取，但是會為了讀取次數達標，而循環讀取。

讀取TFRecord文件，每次讀取多個，使用的是batch

# 讀取文件。
reader = tf.TFRecordReader()
filename_queue = tf.train.string_input_producer(["./TFRecord_Output/output.tfrecords"])

# 每次讀取多個
_,serialized_example = reader.read(filename_queue)
# 解析讀取的樣例。
features = tf.parse_single_example(
    serialized_example,
    features={
        'image_raw':tf.FixedLenFeature([],tf.string),
        'pixels':tf.FixedLenFeature([],tf.int64),
        'label':tf.FixedLenFeature([],tf.int64)
    })

images = tf.decode_raw(features['image_raw'],tf.uint8)
labels = tf.cast(features['label'],tf.int32)
pixels = tf.cast(features['pixels'],tf.int32)
print(images.get_shape()) #通過輸出，發現是（?,?）也就是無法確定shape。

batch_size = 10
capacity = 1000 + 3 * batch_size

#images.set_shape(784,)
images.set_shape([784])
labels.set_shape([])
pixels.set_shape([])
image_batch, label_batch, pixel_batch = tf.train.batch(
    [images, labels, pixels], batch_size=batch_size, capacity=capacity)

sess = tf.Session()
# 啟動多線程處理輸入數據。
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)

for i in range(100):

    image, label, pixel = sess.run([image_batch, label_batch, pixel_batch ])
    print(image.shape)
    print(label.shape)

注意：首先，使用batch必須做到一點：

即用set_shape對tensor等進行指定shape否則會

ValueError: All shapes must be fully defined: [TensorShape([Dimension(None)]), TensorShape([]), TensorShape([])]

其次，batch不同於read_up_to，不僅僅會不斷的在文件中循環讀取，而且一定會為了湊出batch個數目，而循環。所以，不會像使用read_up_to那樣出現(1,784)的那種情況。而是全部是(10,784)的情形。

此外，batch還有一個好處是：

會建立一個最大容量為capacity的隊列，即如下圖所示：

即最后一部分標出的batch。所以，訓練神經網絡從Example Queue中取batch的時候，另外一個進程可以同步向隊列中添加batch。這樣的話，就可以避免IO瓶頸。而如果使用read_up_to，則不能像tf.train.batch一樣能夠構建一個隊列，並且支持訓練進程和數據處理進程並行化。（）

同時使用batch和read_up_to

# 讀取文件。
reader = tf.TFRecordReader()
filename_queue = tf.train.string_input_producer(["./TFRecord_Output/output.tfrecords"])

# 每次讀取多個
_,serialized_example = reader.read_up_to(filename_queue,10)
# 解析讀取的樣例。
features = tf.parse_example(
    serialized_example,
    features={
        'image_raw':tf.FixedLenFeature([],tf.string),
        'pixels':tf.FixedLenFeature([],tf.int64),
        'label':tf.FixedLenFeature([],tf.int64)
    })

images = tf.decode_raw(features['image_raw'],tf.uint8)
labels = tf.cast(features['label'],tf.int32)
pixels = tf.cast(features['pixels'],tf.int32)
print(images.get_shape()) #通過輸出，發現是（?,?）也就是無法確定shape。

batch_size = 10
capacity = 1000 + 3 * batch_size

#images.set_shape(784,)

images.set_shape([10,784])
labels.set_shape([10])
pixels.set_shape([10])
image_batch, label_batch, pixel_batch = tf.train.batch(
    [images, labels, pixels], batch_size=batch_size, capacity=capacity)
sess = tf.Session()
# 啟動多線程處理輸入數據。
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)

for i in range(100):

    #image, label, pixel = sess.run([images, labels, pixels])
    image, label, pixel = sess.run([image_batch, label_batch, pixel_batch ])
    print(image.shape)
print(label.shape)

可以看出101個example。

一個batch是10次，1次又是read_up_to(10)，所以，一個batch就讀了100個。

接着下一個batch的時候，又調用read_up_to時，只剩下1個了，因此報出expected[10,784],got [1,784]的錯誤。

但是，我們試探性的直接將read_up_to改為102.也就是一次read_UP_to就超出整個文件包含的example，發現：

所以，read_up_to是文件剩多少，就讀多少，然后再一直循環下去。不會說為了湊夠102.改為100.輸出是：

那么上述報錯的原因究竟是什么？？？

正是因為read_up_to是文件剩余多少，就讀多少。但是同時使用batch的情況下，需要set_shape。

此時，文件中101個example，一次batch以后讀出10*10個example。下一次batch時，調用read_up_to（10），但是只剩下一個了。於是得到的是[1,784]和set的[10,784]有矛盾。故而報錯。

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 完整神經網絡樣例程序詳解 Flink實戰(1) - Apache Flink安裝和示例程序的執行用python + hadoop streaming 編寫分布式程序（一） -- 原理介紹，樣例程序與本地調試 Blink示例程序 ASP.NET MVC 企業級實戰 —— 創建用戶權限管理范例程序（二） ASP.NET MVC 企業級實戰 —— 創建用戶權限管理范例程序（三） ASP.NET MVC 企業級實戰 —— 創建用戶權限管理范例程序（一） Kaldi樣例實戰 DPDK實例程序：testpmd 那個不管“萬一”的程序員后來怎么樣了？