RNN適用場景
循環神經網絡(Recurrent Neural Network)適合處理和預測時序數據
RNN的特點
RNN的隱藏層之間的節點是有連接的,他的輸入是輸入層的輸出向量.extend(上一時刻隱藏層的狀態向量)。
demo:單層全連接網絡作為循環體的RNN
輸入層維度:x
隱藏層維度:h
每個循環體的輸入大小為:x+h
每個循環體的輸出大小為:h
循環體的輸出有兩個用途:
- 下一時刻循環體的輸入的一部分
- 經過另一個全連接神經網絡,得到當前時刻的輸出
序列長度
理論上RNN支持任意序列長度,但過長會導致優化時梯度消散的問題,因此一般都設定一個最大長度。超過該長度是,進行截斷。
論文原文:On the difficulty of training Recurrent Neural Networks
長短時記憶網絡(LSTM結構)
論文原文:Long Short-term memory
循環體:擁有輸入門、遺忘門、輸出門的特殊網絡結構
遺忘門:決定忘記當前輸入、上一時刻狀態和上一時刻輸出中的哪一部分
輸入門:決定當前輸入、上一時刻狀態、上一時刻輸出中,哪些部分將進入當前時刻的狀態
RNN的變種
- 雙向RNN
- 深層RNN
RNN的dropout
不同層的循環體之間使用dropout,同一層循環體之間不使用dropout
demo
import os
import re
import io
import requests
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from zipfile import ZipFile
from tensorflow.python.framework import ops
ops.reset_default_graph()
1. start a graph session and set RNN parameters
sess = tf.Session()
epochs = 20 # run 20 epochs. An epoch equals to all batches of this training set.
batch_size = 250
max_sequence_length = 25
rnn_size = 10 # The RNN will be of size 10 units.
embedding_size = 50 # every word will be embedded in a trainable vector of size 50
min_word_frequency = 10 # We will only consider words that appear at least 10 times in our vocabulary
learning_rate = 0.0005
dropout_keep_prob = tf.placeholder(tf.float32)
2. Download or open data
Check if it was already downloaded and, if so,read in the file.
Otherwise, download the data and save it
# Download or open data
data_dir = 'data'
data_file = 'text_data.txt'
if not os.path.exists(data_dir):
os.makedirs(data_dir)
if not os.path.isfile(os.path.join(data_dir, data_file)):
zip_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip'
r = requests.get(zip_url)
z = ZipFile(io.BytesIO(r.content))
file = z.read('SMSSpamCollection')
# Format Data
text_data = file.decode()
text_data = text_data.encode('ascii',errors='ignore')
text_data = text_data.decode().split('\n')
# Save data to text file
with open(os.path.join(data_dir, data_file), 'w') as file_conn:
for text in text_data:
file_conn.write("{}\n".format(text)) # append "\n" to each row. Format method is from re lib.
else:
# Open data from text file
text_data = []
with open(os.path.join(data_dir, data_file), 'r') as file_conn:
for row in file_conn:
text_data.append(row)
text_data = text_data[:-1]
text_data = [x.split('\t') for x in text_data if len(x)>=1]
[text_data_target, text_data_train] = [list(x) for x in zip(*text_data)]
3. Create a text cleaning function then clean the data
def clean_text(text_string):
text_string = re.sub(r'([^\s\w]|_|[0-9])+', '', text_string) # \w匹配包括下划線的任何單詞字符 [^\s\w]匹配空格開頭字符串
text_string = " ".join(text_string.split())
text_string = text_string.lower()
return(text_string)
# Clean texts
text_data_train = [clean_text(x) for x in text_data_train]
4. Change texts into numeric vectors
This will convert a text to an appropriate list of indices
x_shuffled = text_processed[shuffled_ix]
y_shuffled = text_data_target[shuffled_ix]
# Split train/test set
ix_cutoff = int(len(y_shuffled)*0.80)
x_train, x_test = x_shuffled[:ix_cutoff], x_shuffled[ix_cutoff:]
y_train, y_test = y_shuffled[:ix_cutoff], y_shuffled[ix_cutoff:]
vocab_size = len(vocab_processor.vocabulary_)
print("Vocabulary Size: {:d}".format(vocab_size))
print("80-20 Train Test split: {:d} -- {:d}".format(len(y_train), len(y_test)))
# Create placeholders
x_data = tf.placeholder(tf.int32, [None, max_sequence_length])
y_output = tf.placeholder(tf.int32, [None])
# Create embedding
embedding_mat = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0))
embedding_output = tf.nn.embedding_lookup(embedding_mat, x_data)
#embedding_output_expanded = tf.expand_dims(embedding_output, -1)
# Define the RNN cell
#tensorflow change >= 1.0, rnn is put into tensorflow.contrib directory. Prior version not test.
if tf.__version__[0]>='1':
cell=tf.contrib.rnn.BasicRNNCell(num_units = rnn_size)
else:
cell = tf.nn.rnn_cell.BasicRNNCell(num_units = rnn_size)
output, state = tf.nn.dynamic_rnn(cell, embedding_output, dtype=tf.float32)
output = tf.nn.dropout(output, dropout_keep_prob)
# Get output of RNN sequence
output = tf.transpose(output, [1, 0, 2])
last = tf.gather(output, int(output.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([rnn_size, 2], stddev=0.1))
bias = tf.Variable(tf.constant(0.1, shape=[2]))
logits_out = tf.matmul(last, weight) + bias
# Loss function
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_out, labels=y_output) # logits=float32, labels=int32
loss = tf.reduce_mean(losses)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits_out, 1), tf.cast(y_output, tf.int64)), tf.float32))
optimizer = tf.train.RMSPropOptimizer(learning_rate)
train_step = optimizer.minimize(loss)
init = tf.global_variables_initializer()
sess.run(init)
train_loss = []
test_loss = []
train_accuracy = []
test_accuracy = []
# Start training
for epoch in range(epochs):
# Shuffle training data
shuffled_ix = np.random.permutation(np.arange(len(x_train)))
x_train = x_train[shuffled_ix]
y_train = y_train[shuffled_ix]
num_batches = int(len(x_train)/batch_size) + 1
# TO DO CALCULATE GENERATIONS ExACTLY
for i in range(num_batches):
# Select train data
min_ix = i * batch_size
max_ix = np.min([len(x_train), ((i+1) * batch_size)])
x_train_batch = x_train[min_ix:max_ix]
y_train_batch = y_train[min_ix:max_ix]
# Run train step
train_dict = {x_data: x_train_batch, y_output: y_train_batch, dropout_keep_prob:0.5}
sess.run(train_step, feed_dict=train_dict)
# Run loss and accuracy for training
temp_train_loss, temp_train_acc = sess.run([loss, accuracy], feed_dict=train_dict)
train_loss.append(temp_train_loss)
train_accuracy.append(temp_train_acc)
# Run Eval Step
test_dict = {x_data: x_test, y_output: y_test, dropout_keep_prob:1.0}
temp_test_loss, temp_test_acc = sess.run([loss, accuracy], feed_dict=test_dict)
test_loss.append(temp_test_loss)
test_accuracy.append(temp_test_acc)
print('Epoch: {}, Test Loss: {:.2}, Test Acc: {:.2}'.format(epoch+1, temp_test_loss, temp_test_acc))
# Plot loss over time
epoch_seq = np.arange(1, epochs+1)
plt.plot(epoch_seq, train_loss, 'k--', label='Train Set')
plt.plot(epoch_seq, test_loss, 'r-', label='Test Set')
plt.title('Softmax Loss')
plt.xlabel('Epochs')
plt.ylabel('Softmax Loss')
plt.legend(loc='upper left')
plt.show()
# Plot accuracy over time
plt.plot(epoch_seq, train_accuracy, 'k--', label='Train Set')
plt.plot(epoch_seq, test_accuracy, 'r-', label='Test Set')
plt.title('Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='upper left')
plt.show()
Vocabulary Size: 1124
80-20 Train Test split: 4459 -- 1115
C:\Users\Diane\Anaconda3\lib\site-packages\tensorflow\python\ops\gradients_impl.py💯 UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
"Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
Epoch: 1, Test Loss: 0.53, Test Acc: 0.84
Epoch: 2, Test Loss: 0.52, Test Acc: 0.84
Epoch: 3, Test Loss: 0.49, Test Acc: 0.84
Epoch: 4, Test Loss: 0.46, Test Acc: 0.84
Epoch: 5, Test Loss: 0.43, Test Acc: 0.84
Epoch: 6, Test Loss: 0.4, Test Acc: 0.85
Epoch: 7, Test Loss: 0.38, Test Acc: 0.85
Epoch: 8, Test Loss: 0.37, Test Acc: 0.85
Epoch: 9, Test Loss: 0.36, Test Acc: 0.85
Epoch: 10, Test Loss: 0.36, Test Acc: 0.86
Epoch: 11, Test Loss: 0.35, Test Acc: 0.86
Epoch: 12, Test Loss: 0.35, Test Acc: 0.86
Epoch: 13, Test Loss: 0.34, Test Acc: 0.87
Epoch: 14, Test Loss: 0.34, Test Acc: 0.87
Epoch: 15, Test Loss: 0.33, Test Acc: 0.87
Epoch: 16, Test Loss: 0.33, Test Acc: 0.87
Epoch: 17, Test Loss: 0.32, Test Acc: 0.87
Epoch: 18, Test Loss: 0.32, Test Acc: 0.88
Epoch: 19, Test Loss: 0.31, Test Acc: 0.88
Epoch: 20, Test Loss: 0.3, Test Acc: 0.88
