【tensorflow】神經網絡:自制數據集


在實際應用中,我們常常需要自制數據集,解決本領域應用,而數據通常是圖片或文字,需要做格式轉換,才能在訓練時使用。

 

代碼:

import tensorflow as tf from PIL import Image import numpy as np import os # 訓練用的輸入特征和標簽
x_train_readpath = "class4/MNIST_FC/mnist_image_label/mnist_train_jpg_60000/" y_train_readpath = "class4/MNIST_FC/mnist_image_label/mnist_train_jpg_60000.txt" x_train_savapath = "class4/MNIST_FC/mnist_image_label/mnist_x_train.npy" y_train_savapath = "class4/MNIST_FC/mnist_image_label/mnist_y_train.npy"

# 測試用的輸入特征和標簽
x_test_readpath = "class4/MNIST_FC/mnist_image_label/mnist_test_jpg_10000/" y_test_readpath = "class4/MNIST_FC/mnist_image_label/mnist_test_jpg_10000.txt" x_test_savapath = "class4/MNIST_FC/mnist_image_label/mnist_x_test.npy" y_test_savapath = "class4/MNIST_FC/mnist_image_label/mnist_y_test.npy"

# 讀取輸入特征和標簽
def generateData(x_path, y_path): f = open(y_path, "r")     # 以只讀形式打開存放標簽的文件
    contents = f.readlines()  # 按行讀取文件中的所有數據
    f.close()                 # 關閉文件

    # 建立空列表,存放讀出來的數據
    x, y = [], [] for content in contents: # 數據存放形式為:文件名 標簽
        # 以空格分開后,value[0]=文件名,value[1]=標簽
        value = content.split() img_path = x_path + value[0]      # 拼接出訓練圖片完整路徑
        img = Image.open(img_path)        # 讀取圖片
        img = np.array(img.convert("L"))  # 將圖片變為 8位寬 灰度值的np.array格式
        img = img/255.0                   # 數據歸一化
 x.append(img) # 保存讀取出來的輸入特征和標簽
        y.append(value[1]) print("loding:" + content)        # 打印狀態提示
 x = np.array(x)         # [] -> np.array
    y = np.array(y) y = y.astype(np.int64)  # 將y中的數據統一設置為int64類型

    return x, y if os.path.exists(x_train_savapath) and os.path.exists(y_train_savapath) and os.path.exists(x_test_savapath) and os.path.exists(y_test_savapath): # 數據文件已存在,直接讀取
    x_train_save = np.load(x_train_savapath) x_train = np.reshape(x_train_save, (len(x_train_save), 28, 28)) y_train = np.load(y_train_savapath) x_test_save = np.load(x_test_savapath) x_test = np.reshape(x_test_save, (len(x_test_save), 28, 28)) y_test = np.load(y_test_savapath) else: # 數據文件不存在,生成數據文件
    x_train, y_train = generateData(x_train_readpath, y_train_readpath) x_test, y_test = generateData(x_test_readpath, y_test_readpath) x_train_save = np.reshape(x_train, (len(x_train), -1)) x_test_save = np.reshape(x_test, (len(x_test), -1)) np.save(x_train_savapath, x_train_save) np.save(y_train_savapath, y_train) np.save(x_test_savapath, x_test_save) np.save(y_test_savapath, y_test) # 聲明神經網絡結構
model = tf.keras.Sequential([ tf.keras.layers.Flatten(), tf.keras.layers.Dense(128, activation="relu"), tf.keras.layers.Dense(10, activation="softmax") ]) # 配置訓練方法(優化器,損失函數,評測方法)
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), metrics=[tf.keras.metrics.sparse_categorical_accuracy]) # 執行訓練過程
model.fit(x_train, y_train, batch_size=32, epochs=5, validation_data=(x_test, y_test), validation_freq=1) # 打印網絡結構和參數
model.summary()

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM