雙向LSTM

本文轉載自查看原文 2018-12-05 14:51 3902 python 人工智能

1.理論

　　雙向循環神經網絡（BRNN）的基本思想是提出每一個訓練序列向前和向后分別是兩個循環神經網絡（RNN），而且這兩個都連接着一個輸出層。

　　這個結構提供給輸出層輸入序列中每一個點的完整的過去和未來的上下文信息

　　六個獨特的權值在每一個時步被重復的利用，六個權值分別對應：輸入到向前和向后隱含層（w1, w3），隱含層到隱含層自己（w2, w5），向前和向后隱含層到輸出層（w4, w6）

　　值得注意的是：向前和向后隱含層之間沒有信息流，這保證了展開圖是非循環的

2.代碼

#!/usr/bin/env python3
# encoding: utf-8

'''
@author: bigcome
@desc:
@time: 2018/12/5 9:04
'''

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

#准備數據集
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

#設計模型
#設置參數
#學習率
learning_rate = 0.001
# Network Parameters
# n_steps*n_input其實就是那張圖 把每一行拆到每個time step上
n_input = 28
n_steps = 28
# 隱藏層大小
n_hidden = 512
n_classes = 10
# 每次訓練的樣本大小
batch_size = 100
n_batch  = mnist.train.num_examples // batch_size
display_step =10

# tf Graph input
# [None, n_steps, n_input]這個None表示這一維不確定大小
x = tf.placeholder(tf.float32,[None,n_steps,n_input])
y = tf.placeholder(tf.float32,[None,n_classes])

#Define weights
weights = tf.get_variable("weights", [2 * n_hidden, n_classes], dtype=tf.float32,   #注意這里的維度
                         initializer = tf.random_normal_initializer(mean=0, stddev=1))
biases = tf.get_variable("biases", [n_classes], dtype=tf.float32,
                        initializer = tf.random_normal_initializer(mean=0, stddev=1))
def BiRNN(x, weights, biases):
    #x是[50,28,28]
    #矩陣轉置后是[28,50,28]
    x = tf.transpose(x, [1, 0, 2])
    #調整維度[-1,28]
    x = tf.reshape(x, [-1, n_input])
    x = tf.split(x, n_steps)

    lstm_fw_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=0.8)
    lstm_bw_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=0.8)

    output, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32)
    return tf.matmul(output[-1], weights) + biases

#define bi-lstm
def Bilstm(x,weights,biases):
    lstm_fw_cell  = tf.nn.rnn_cell.BasicLSTMCell(n_hidden,forget_bias=1.0)
    lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden,forget_bias=1.0)
    init_fw = lstm_fw_cell.zero_state(batch_size, dtype=tf.float32)
    init_bw = lstm_bw_cell.zero_state(batch_size, dtype=tf.float32)
    outputs, final_states = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,
                                                            lstm_bw_cell,
                                                            x,
                                                            initial_state_fw=init_fw,
                                                            initial_state_bw=init_bw)
    outputs = tf.transpose(outputs, (1, 0, 2))
    #outputs = tf.concat(outputs, 2)  # 將前向和后向的狀態連接起來
    #tf.reshape(outputs, [-1, 2 * n_hiddens])
    ouput = tf.add(tf.matmul(outputs[-1], weights), biases)  # 注意這里的維度
    return ouput

prediction = BiRNN(x, weights, biases)
#prediction = Bilstm(x,weights,biases)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
init = tf.global_variables_initializer()


with tf.Session() as sess:
    sess.run(init)
    epoch = 0
    while epoch < n_batch:
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        batch_x = batch_x.reshape((batch_size, n_steps,n_input))  # 要保證x和batch_x的shape是一樣的
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
        if epoch % display_step == 0:
            acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
            loss = sess.run(cross_entropy, feed_dict={x: batch_x, y: batch_y})
            print("Iter " + str(epoch * batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))
        epoch += 1
    print("Optimization Finished!")

    test_len = 10000
    test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input))
    test_label = mnist.test.labels[:test_len]
    print("Testing Accuracy:", sess.run(accuracy, feed_dict={x: test_data, y: test_label}))

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 使用深度雙向LSTM模型構造社區問答系統詳解 LSTM lstm與bilstm LSTM與BiLSTM RNN和LSTM 詳解LSTM Tensorflow[LSTM] LSTM入門學習——結合《LSTM模型》文章看 RNN & LSTM詳解 LSTM簡單入門