前面都是寫的cell版本的GRU和LSTM,比較底層,便於理解原理。
下面的Sequential版不用自定義state參數的形狀,使用更簡便:
import tensorflow as tf import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' assert tf.__version__.startswith('2.') # 設置相關底層配置 physical_devices = tf.config.experimental.list_physical_devices('GPU') assert len(physical_devices) > 0, "Not enough GPU hardware devices available" tf.config.experimental.set_memory_growth(physical_devices[0], True) # 只取10000個單詞,超過10000的按生僻詞處理 total_words = 10000 max_sentencelength = 121 # 每個句子最大長度 batchsize = 2000 embedding_len = 100 # 將單詞從原來的的一個數擴充為100維的向量 (x_train,y_train),(x_test,y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words) # numweord為單詞種類個數 print('x_train.shape:',x_train.shape) print('x_train[3]:',len(x_train[3]),x_train[3]) x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,maxlen = max_sentencelength) # 把句子長度限制為定長 print('x_train[3]:',len(x_train[3]),x_train[3]) x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,maxlen = max_sentencelength) # x_train : [b,80] [b,max_sentencelength] b個句子,每句80(max_sentencelength)個單詞 # x_test : [b,80] print('x_train.shape:',x_train.shape) # print('x_train[3]:',x_train[3].shape,x_train[3]) print('y_train.shape:',y_train.shape,tf.reduce_max(y_train),tf.reduce_min(y_train)) db_train = tf.data.Dataset.from_tensor_slices((x_train,y_train)) db_train = db_train.shuffle(1000).batch(batch_size=batchsize,drop_remainder=True) # 設置drop參數可以把最后一個batch如果與前面的batch長度不一樣,就丟棄掉 db_test = tf.data.Dataset.from_tensor_slices((x_test,y_test)) db_test = db_test.batch(batch_size=batchsize,drop_remainder=True) class MyRnn(tf.keras.Model): def __init__(self,units): super(MyRnn,self).__init__() # [b,80] => [b,80,100] # [b,64] [b,units] # self.state0 = [tf.zeros([batchsize,units])] self.embedding = tf.keras.layers.Embedding(total_words,embedding_len,input_length = max_sentencelength) # [b,80,100] ,-> h_dim:units(比如64) self.rnn = tf.keras.Sequential([ tf.keras.layers.LSTM(units, return_sequences=True, unroll=True), tf.keras.layers.LSTM(units, unroll=True), # tf.keras.layers.GRU(units, return_sequences=True, unroll=True), # tf.keras.layers.GRU(units, unroll=True), ]) # fc , [b,80,100] =>[b,64]=>[b,1] self.outlayer = tf.keras.layers.Dense(1) def __call__(self, inputs, training = None): """ :param inputs:[b,80] [b,句子最大長度(80)] :param training: """ # [b,80] x = inputs print('x.shape:',x.shape) # embedding:[b,80]=>[b,80,100] x = self.embedding(x) # [b,80,100] = [b,64] x = self.rnn(x) # out:[b,64] => [b,1] x = self.outlayer(x) prob = tf.sigmoid(x) return prob if __name__ == '__main__': units = 64 epochs = 40 lr = 1e-2 model = MyRnn(units) model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss= tf.losses.BinaryCrossentropy(), # 二分類的loss函數 metrics=['accuracy']) model.fit(db_train,epochs=epochs,validation_data=db_test) model.evaluate(db_test)