深入理解Keras中LSTM的stateful和stateless應用區別

本文轉載自查看原文 2020-08-01 15:50 799 深度學習/ keras

本文通過讓LSTM學習字母表，來預測下一個字母，詳細的請參考：

https://blog.csdn.net/zwqjoy/article/details/80493341

https://machinelearningmastery.com/understanding-stateful-lstm-recurrent-neural-networks-python-keras/

一、Stateful模式預測下一個字母

# Stateful LSTM to learn one-char to one-char mapping
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.utils import np_utils
# fix random seed for reproducibility
numpy.random.seed(7)
# define the raw dataset
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# create mapping of characters to integers (0-25) and the reverse
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))
# prepare the dataset of input to output pairs encoded as integers
seq_length = 1
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    print (seq_in, '->', seq_out)
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (len(dataX), seq_length, 1))
# normalize
X = X / float(len(alphabet))
# one hot encode the output variable
y = np_utils.to_categorical(dataY)
# create and fit the model
batch_size = 1
model = Sequential()
model.add(LSTM(16, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
for i in range(300):
    model.fit(X, y, epochs=1, batch_size=batch_size, verbose=2, shuffle=False)
    model.reset_states()
# summarize performance of the model
scores = model.evaluate(X, y, batch_size=batch_size, verbose=0)
model.reset_states()
print("Model Accuracy: %.2f%%" % (scores[1]*100))

OUT:

Model Accuracy: 100.00%

模型訓練后預測一下：

model.reset_states()#這個時候我們重置一下狀態，那么就會從字母表的開頭開始
# demonstrate some model predictions
seed = [char_to_int[alphabet[0]]]
for i in range(0, len(alphabet)-1):
    x = numpy.reshape(seed, (1, len(seed), 1))
    x = x / float(len(alphabet))
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    print (int_to_char[seed[0]], "->", int_to_char[index])
    seed = [index]

OUT:

A -> B
B -> C
C -> D
D -> E
E -> F
F -> G
G -> H
H -> I
I -> J
J -> K
K -> L
L -> M
M -> N
N -> O
O -> P
P -> Q
Q -> R
R -> S
S -> T
T -> U
U -> V
V -> W
W -> X
X -> Y
Y -> Z

那么如果我們從中間字母開始預測呢？

model.reset_states()#這個時候我們依然先重置一下狀態
# demonstrate a random starting point
letter = "K"
seed = [char_to_int[letter]]
print ("New start: ", letter)
for i in range(0, 5):
    x = numpy.reshape(seed, (1, len(seed), 1))
    x = x / float(len(alphabet))
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    print (int_to_char[seed[0]], "->", int_to_char[index])
    seed = [index]

OUT:

New start:  K
K -> B
B -> C
C -> D
D -> E
E -> F
我們可以看到，重置狀態后，即便是從中間的字母K開始預測，接下來輸出依然是從字母表開始輸出一樣輸出B，這說明前一個狀態的輸入Ct-1的作用是大於本次的輸入xt的。
如果我們不重置狀態，直接從中間字母開始呢？

# demonstrate a random starting point
letter = "K"
seed = [char_to_int[letter]]
print ("New start: ", letter)
for i in range(0, 5):
    x = numpy.reshape(seed, (1, len(seed), 1))
    x = x / float(len(alphabet))
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    print (int_to_char[seed[0]], "->", int_to_char[index])
    seed = [index]

OUT:

New start:  K
K -> Z
Z -> Z
Z -> Z
Z -> Z
Z -> Z
我們可以看到，沒有重置狀態，直接預測，輸入的狀態依然是接着上一次的最后輸出狀態開始的，所以都預測成了Z，再次說明了上一次的狀態輸入其作用大於本次的輸入。

二、Stateless模式預測下一個字母

從上面stateful模式我們可以看出，需要經常重置狀態，否則狀態就會不停延續上一次，有時候並不需要狀態一直延續；stateless模式默認的就是自動重置狀態，而且stateless模式可以完成大部分的任務，因為連續的timestep往往就放在一個sample里來循環了，樣本之間盡量保持獨立，所以不要濫用stateful模式。

　　下面我們用stateless模式來讓模型學習字母表中的隨機子序列，從而預測下一個字母，序列最大長度為5，不夠補0填充，各個子序列之間獨立，這種模式可能是現實中應用比較多的。

import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.utils import np_utils
from keras.preprocessing.sequence import pad_sequences
from keras import callbacks

#構建字母與數字之間的映射字典
np.random.seed(7)
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# create mapping of characters to integers (0-25) and the reverse
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))

#構建模型樣本，序列長度最大為5，1000個訓練樣本
num_inputs = 1000
max_len = 5
dataX = []
dataY = []
for i in range(num_inputs):
    start = np.random.randint(len(alphabet)-2)
    end = np.random.randint(start, min(start+max_len,len(alphabet)-1))
    sequence_in = alphabet[start:end+1]
    sequence_out = alphabet[end + 1]
    dataX.append([char_to_int[char] for char in sequence_in])
    dataY.append(char_to_int[sequence_out])
    print (sequence_in, '->', sequence_out)

# convert list of lists to array and pad sequences if needed
X = pad_sequences(dataX, maxlen=max_len, dtype='float32')
# reshape X to be [samples, time steps, features]
X = np.reshape(X, (X.shape[0], max_len, 1))
# normalize
X = X / float(len(alphabet))
# one hot encode the output variable
y = np_utils.to_categorical(dataY,26)

#構建並運行模型
batch_size = 1
model = Sequential()
model.add(LSTM(32, input_shape=(X.shape[1], 1),return_sequences=True))
model.add(LSTM(32))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

remote = callbacks.RemoteMonitor(root='http://localhost:9000')
model.fit(X, y, epochs=200, batch_size=batch_size, verbose=2, callbacks=[remote])
# summarize performance of the model
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))

OUT:

Model Accuracy: 100.00%

模型預測：

#模型預測
for i in range(20):
    pattern_index = np.random.randint(len(dataX))
    pattern = dataX[pattern_index]
    x = pad_sequences([pattern], maxlen=max_len, dtype='float32')
    x = np.reshape(x, (1, max_len, 1))
    x = x / float(len(alphabet))
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    print (seq_in, "->", result)

OUT:

['J'] -> K
['H', 'I', 'J'] -> K
['E', 'F'] -> G
['K', 'L', 'M'] -> N
['B'] -> C
['C'] -> D
['R', 'S'] -> T
['A', 'B', 'C'] -> D
['C', 'D', 'E'] -> F
['N', 'O', 'P'] -> Q
['C', 'D'] -> E
['L', 'M'] -> N
['F', 'G', 'H', 'I', 'J'] -> K
['N', 'O', 'P', 'Q'] -> R
['C', 'D', 'E', 'F', 'G'] -> H
['A', 'B', 'C'] -> D
['R', 'S', 'T', 'U', 'V'] -> W
['B', 'C', 'D'] -> E
['F', 'G'] -> H
['K'] -> L

可以看出隨便輸入長度不超過5的序列，都能正確預測下一個字母，不用再不停手動重置狀態。

import pandas as pd
def predict(seq='A'):
    seq_in=[s for s in seq if 'A'<=s<='Z']
    x=pd.Series(seq_in).map(char_to_int)
    if len(x)==0:
        return ''
    else:
        x = pad_sequences([x], maxlen=max_len, dtype='float32')
        x = np.reshape(x, (1, max_len, 1))
        x = x / float(len(alphabet))
        prediction = model.predict(x, verbose=0)
        index = np.argmax(prediction)
        result = int_to_char[index]
        print (seq_in, "->", result)
predict('OP')

OUT:
['O', 'P'] -> Q

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 LSTM之Keras中Stateful參數深入理解跳表及其在Redis中的應用深入理解kestrel的應用理解keras 的 LSTM 深入理解 '0' "0" '\0' 0 之間的區別有狀態(Stateful)與無狀態(Stateless) 深入理解NIO（二）—— Tomcat中對NIO的應用深入理解Java中的字段與屬性的區別深入理解JavaScript中 fn() 和 return fn() 的區別 Python機器學習筆記：深入理解Keras中序貫模型和函數模型