import tensorflow as tf from tensorflow.keras import optimizers,layers # 定義數據預處理函數 def preprocess(x,y): x = tf.cast(x,dtype=tf.float32) / 255 # 將特征數據轉化為float32類型,並縮放到0到1之間 y = tf.cast(y,dtype=tf.int32) # 將標記數據轉化為int32類型 y = tf.one_hot(y,depth= 10) # 將標記數據轉為one_hot編碼 return x,y def get_data(): # 加載手寫數字數據 mnist = tf.keras.datasets.mnist (train_x, train_y), (test_x, test_y) = mnist.load_data() # 開始預處理數據 # 訓練數據 db = tf.data.Dataset.from_tensor_slices((train_x,train_y)) # 將數據特征與標記組合 db = db.map(preprocess) # 根據預處理函數對組合數據進行處理 db = db.shuffle(60000).batch(100) # 將數據按10000行為單位打亂,並以100行為一個整體進行隨機梯度下降 # 測試數據 db_test = tf.data.Dataset.from_tensor_slices((test_x,test_y)) db_test = db_test.map(preprocess) db_test = db_test.shuffle(10000).batch(100) return db,db_test # 測試代碼 db,db_test = get_data() # 獲取訓練和測試數據 # 設置超參 iter = 100 learn_rate = 0.01 # 定義模型和優化器 model = tf.keras.Sequential([ layers.Dense(512, activation='relu'), layers.Dense(256, activation='relu'), # 全連接 layers.Dense(10) ]) optimizer = optimizers.SGD(learning_rate=learn_rate) # 優化器 # 迭代代碼 for i in range(iter): for step,(x,y) in enumerate(db): # 對每個batch樣本做梯度計算 # print('x.shape:{},y.shape:{}'.format(x.shape,y.shape)) with tf.GradientTape() as tape: x = tf.reshape(x,(-1,28*28)) # 將28*28展開為784 out = model(x) loss = tf.reduce_mean(tf.square(out-y)) grads = tape.gradient(loss,model.trainable_variables) # 求梯度 grads,_ = tf.clip_by_global_norm(grads,15) # 梯度參數進行限幅,防止偏導的nan和無窮大 optimizer.apply_gradients(zip(grads,model.trainable_variables)) # 優化器進行參數優化 if step % 100 == 0: print('i:{} ,step:{} ,loss:{}'.format(i, step,loss.numpy())) # 求准確率 acc = tf.equal(tf.argmax(out,axis=1),tf.argmax(y,axis=1)) acc = tf.cast(acc,tf.int8) acc = tf.reduce_mean(tf.cast(acc,tf.float32)) print('acc:',acc.numpy())