Tensorflow[LSTM]
0.背景
通过对《tensorflow machine learning cookbook》第9章第3节"implementing_lstm"进行阅读,发现如下形式可以很方便的进行训练和预测,通过类进行定义,并利用了tf中的变量重用的能力,使得在训练阶段模型的许多变量,比如权重等,能够直接用在预测阶段。十分方便,不需要自己去做一些权重复制等事情。这里只是简单记录下这一小节的源码中几个概念性的地方。
# 定义LSTM模型 class LSTM_Model(): def __init__(self, embedding_size, rnn_size, batch_size, learning_rate, training_seq_len, vocab_size, infer_sample=False): self.embedding_size = embedding_size self.rnn_size = rnn_size #LSTM单元隐层的神经元个数 self.vocab_size = vocab_size self.infer_sample = infer_sample self.learning_rate = learning_rate#学习率 if infer_sample:#如果是inference,则batch size设为1 self.batch_size = 1 self.training_seq_len = 1 else: self.batch_size = batch_size self.training_seq_len = training_seq_len '''建立LSTM单元和初始化state''' self.lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.rnn_size) self.initial_state = self.lstm_cell.zero_state(self.batch_size, tf.float32) '''进行输入和输出的占位''' self.x_data = tf.placeholder(tf.int32, [self.batch_size, self.training_seq_len]) self.y_output = tf.placeholder(tf.int32, [self.batch_size, self.training_seq_len]) with tf.variable_scope('lstm_vars'): # Softmax 部分的权重 W = tf.get_variable('W', [self.rnn_size, self.vocab_size], tf.float32, tf.random_normal_initializer()) b = tf.get_variable('b', [self.vocab_size], tf.float32, tf.constant_initializer(0.0)) # Define Embedding embedding_mat = tf.get_variable('embedding_mat', [self.vocab_size, self.embedding_size], tf.float32, tf.random_normal_initializer()) embedding_output = tf.nn.embedding_lookup(embedding_mat, self.x_data) rnn_inputs = tf.split(axis=1, num_or_size_splits=self.training_seq_len, value=embedding_output) rnn_inputs_trimmed = [tf.squeeze(x, [1]) for x in rnn_inputs] # If we are inferring (generating text), we add a 'loop' function # Define how to get the i+1 th input from the i th output def inferred_loop(prev, count): # Apply hidden layer prev_transformed = tf.matmul(prev, W) + b # Get the index of the output (also don't run the gradient) prev_symbol = tf.stop_gradient(tf.argmax(prev_transformed, 1)) # Get embedded vector output = tf.nn.embedding_lookup(embedding_mat, prev_symbol) return(output) decoder = tf.contrib.legacy_seq2seq.rnn_decoder outputs, last_state = decoder(rnn_inputs_trimmed, self.initial_state, self.lstm_cell, loop_function=inferred_loop if infer_sample else None) # Non inferred outputs output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, self.rnn_size]) # Logits and output self.logit_output = tf.matmul(output, W) + b self.model_output = tf.nn.softmax(self.logit_output) loss_fun = tf.contrib.legacy_seq2seq.sequence_loss_by_example loss = loss_fun([self.logit_output],[tf.reshape(self.y_output, [-1])], [tf.ones([self.batch_size * self.training_seq_len])], self.vocab_size) self.cost = tf.reduce_sum(loss) / (self.batch_size * self.training_seq_len) self.final_state = last_state gradients, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tf.trainable_variables()), 4.5) optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, tf.trainable_variables())) def sample(self, sess, words=ix2vocab, vocab=vocab2ix, num=10, prime_text='thou art'): state = sess.run(self.lstm_cell.zero_state(1, tf.float32)) word_list = prime_text.split() for word in word_list[:-1]: x = np.zeros((1, 1)) x[0, 0] = vocab[word] feed_dict = {self.x_data: x, self.initial_state:state} [state] = sess.run([self.final_state], feed_dict=feed_dict) out_sentence = prime_text word = word_list[-1] for n in range(num): x = np.zeros((1, 1)) x[0, 0] = vocab[word] feed_dict