基於循環網絡實現編解碼結構,代碼參考了Jason Brownlee博士博客,看上去博士也是參考官方文檔的內容。
1. 本人進行了一些注釋。
2. 該架構並不是循環網絡特有。
3. 序列的多部預測遵循循環導出的原則。
4.其中的隱狀態和細胞狀態確實依賴於LSTM這個特定模型
5. 對於上圖的結構,基於循環網絡的編解碼結構,不忍許多人扯的血淋淋,故鄙人希望從簡抓要點。
遵循一般做事原則:找出口或入口或轉折點等重要接口;分割出循環單體。
a. 重要接口在於中間的狀態
b. 循環單體在於循環網絡時間步驟
c. 做過時間序列預測不難看出,解碼器完成從不等長序列空間到狀態空間的映射,狀態沒有時間步驟概念(指的就是在某個時間戳下;解碼器就是一個一階時間序列預測(錯開一步預測);
既然是循環就需要起始條件,這也就是跟差分方程(或者微分方程)的起始條件差不多,由編碼器提供;時間序列的玩法,與經典統計中類似,一步模型的多步預測就按如下方式搞。
1 from random import randint 2 from numpy import array, argmax, array_equal 3 from keras.utils import to_categorical 4 from keras.models import Model 5 from keras.layers import Input, LSTM, Dense 6 7 # generate a sequence of random integers 8 def generate_sequence(length, n_unique): 9 return [randint(1, n_unique-1) for _ in range(length)] 10 11 12 # prepare data for the LSTM 13 def get_dataset(n_in, n_out, cardinality, n_samples): 14 X1, X2, y = list(), list(), list() 15 for _ in range(n_samples): 16 # generate source sequence 17 source = generate_sequence(n_in, cardinality) 18 # define padded target sequence 19 target = source[:n_out] 20 target.reverse() 21 # create padded input target sequence 22 target_in = [0] + target[:-1] 23 # encode 24 src_encoded = to_categorical(source, num_classes=cardinality) 25 tar_encoded = to_categorical(target, num_classes=cardinality) 26 tar2_encoded = to_categorical(target_in, num_classes=cardinality) 27 # store 28 X1.append(src_encoded) 29 X2.append(tar2_encoded) 30 y.append(tar_encoded) 31 return array(X1), array(X2), array(y) 32 33 34 # returns train, inference_encoder and inference_decoder models 35 def define_models(n_input, n_output, n_units): 36 # 定義訓練編碼器 37 encoder_inputs = Input(shape=(None, n_input)) # n_input表示特征這一維(維的大小即特征的數目,如圖像的feature map) 38 encoder = LSTM(n_units, return_state=True) # 編碼器的特征維的大小dimension(n),即單元數。 39 encoder_outputs, state_h, state_c = encoder(encoder_inputs) # 取出輸入生成的隱藏狀態和細胞狀態,作為解碼器的隱藏狀態和細胞狀態的初始化值。 40 # 定義訓練解碼器 41 decoder_inputs = Input(shape=(None, n_output)) # n_output:輸出響應序列的特征維的大小。 42 decoder_lstm = LSTM(n_units, return_sequences=True, return_state=True) # 因解碼器用編碼器的隱藏狀態和細胞狀態,所以n_units必等 43 decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=[state_h, state_c]) # 這個解碼層在后面推斷中會被共享!! 44 45 decoder_dense = Dense(n_output, activation='softmax') # 這個full層在后面推斷中會被共享!! 46 decoder_outputs = decoder_dense(decoder_outputs) 47 model = Model([encoder_inputs, decoder_inputs], decoder_outputs) # 得到以輸入序列和目標序列作為輸入,以目標序列的移位為輸出的訓練模型 48 49 # 定義推斷編碼器 根據輸入序列得到隱藏狀態和細胞狀態的路徑圖,得到模型,使用的輸入到輸出之間所有層的權重,與tf的預測簽名一樣 50 encoder_model = Model(encoder_inputs, [state_h, state_c]) # 層編程模型很簡單,只要用Model包住其輸入和輸出即可。 51 #encoder_outputs, state_h, state_c = encoder(encoder_inputs) # ? 似乎就是上面的 52 # 定義推斷解碼器,由於循環網絡的性質,由輸入狀態(前)推理出輸出狀態(后)。 53 decoder_state_input_h = Input(shape=(n_units,)) 54 decoder_state_input_c = Input(shape=(n_units,)) 55 56 decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=[decoder_state_input_h, decoder_state_input_c]) 57 decoder_outputs = decoder_dense(decoder_outputs) 58 # 由老狀態更新出新的狀態 59 decoder_model = Model([decoder_inputs, decoder_state_input_h, decoder_state_input_c], [decoder_outputs, state_h, state_c]) 60 # return all models 61 return model, encoder_model, decoder_model 62 63 64 # generate target given source sequence 65 def predict_sequence(infenc, infdec, source, n_steps, cardinality): 66 # encode 67 h_state, c_state = infenc.predict(source) # 根據輸入計算該原輸入在狀態空間的取值 68 # start of sequence input 69 target_seq = array([0.0 for _ in range(cardinality)]).reshape(1, 1, cardinality) # shape (1, 1, 51) [[[0,0,..]]] 一步 70 # collect predictions 71 output = list() 72 for t in range(n_steps): 73 # predict next char 這是遞歸網絡的序列預測過程 74 yhat,h_state, c_state = infdec.predict([target_seq, h_state, c_state]) # 獲得循環地推的target_seq初始值,不停迭代產生新值 75 # store prediction 76 output.append(yhat[0, 0, :]) 77 # update state 78 # update target sequence 79 target_seq = yhat 80 return array(output) 81 82 # decode a one hot encoded string 83 def one_hot_decode(encoded_seq): 84 return [argmax(vector) for vector in encoded_seq] 85 86 # configure problem 87 n_features = 50 + 1 88 n_steps_in = 6 89 n_steps_out = 3 90 # define model 91 train, infenc, infdec = define_models(n_features, n_features, 128) 92 train.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc']) # 這一層需要被編譯 93 # generate training dataset 94 X1, X2, y = get_dataset(n_steps_in, n_steps_out, n_features, 100000) 95 print(X1.shape,X2.shape,y.shape) 96 # train model 97 train.fit([X1, X2], y, epochs=1) 98 # evaluate LSTM 99 total, correct = 100, 0 100 # for _ in range(total): 101 # X1, X2, y = get_dataset(n_steps_in, n_steps_out, n_features, 1) 102 # target = predict_sequence(infenc, infdec, X1, n_steps_out, n_features) 103 # if array_equal(one_hot_decode(y[0]), one_hot_decode(target)): 104 # correct += 1 105 # print('Accuracy: %.2f%%' % (float(correct)/float(total)*100.0)) 106 # spot check some examples 107 for _ in range(10): 108 X1, X2, y = get_dataset(n_steps_in, n_steps_out, n_features, 1) 109 target = predict_sequence(infenc, infdec, X1, n_steps_out, n_features) 110 print('X=%s y=%s, yhat=%s' % (one_hot_decode(X1[0]), one_hot_decode(y[0]), one_hot_decode(target)))