Tensorflow2(預課程)---11.1、循環神經網絡實現股票預測
一、總結
一句話總結:
用了兩個SimpleRNN,后面接Dropout,最后是一個dense層輸出結果
model = tf.keras.Sequential([ SimpleRNN(80, return_sequences=True), Dropout(0.2), SimpleRNN(100), Dropout(0.2), Dense(1) ]) model.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='mean_squared_error') # 損失函數用均方誤差
1、SimpleRNN輸入數據?
依次是數據量、循環核時間展開步數、輸出特征:x_train = np.reshape(x_train, (x_train.shape[0], 60, 1))
# 使x_train符合RNN輸入要求:[送入樣本數, 循環核時間展開步數, 每個時間步輸入特征個數]。 # 此處整個數據集送入,送入樣本數為x_train.shape[0]即2066組數據;輸入60個開盤價,預測出第61天的開盤價, # 循環核時間展開步數為60; 每個時間步送入的特征是某一天的開盤價, # 只有1個數據,故每個時間步輸入特征個數為1 x_train = np.reshape(x_train, (x_train.shape[0], 60, 1))
二、循環神經網絡實現股票預測
博客對應課程的視頻位置:
import numpy as np import tensorflow as tf from tensorflow.keras.layers import Dropout, Dense, SimpleRNN import matplotlib.pyplot as plt import os import pandas as pd from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_squared_error, mean_absolute_error import math
In [2]:
maotai = pd.read_csv('./SH600519.csv') # 讀取股票文件 print(maotai)
In [3]:
training_set = maotai.iloc[0:2426 - 300, 2:3].values # 前(2426-300=2126)天的開盤價作為訓練集,表格從0開始計數,2:3 是提取[2:3)列,前閉后開,故提取出C列開盤價 test_set = maotai.iloc[2426 - 300:, 2:3].values # 后300天的開盤價作為測試集 print(training_set.shape) print(test_set.shape)
In [4]:
# 歸一化
sc = MinMaxScaler(feature_range=(0, 1)) # 定義歸一化:歸一化到(0,1)之間 print(sc)
In [5]:
training_set_scaled = sc.fit_transform(training_set) # 求得訓練集的最大值,最小值這些訓練集固有的屬性,並在訓練集上進行歸一化 test_set = sc.transform(test_set) # 利用訓練集的屬性對測試集進行歸一化 print(training_set_scaled[:5,]) print(test_set[:5,])
In [6]:
x_train = [] y_train = [] x_test = [] y_test = []
In [7]:
# 測試集:csv表格中前2426-300=2126天數據
# 利用for循環,遍歷整個訓練集,提取訓練集中連續60天的開盤價作為輸入特征x_train,第61天的數據作為標簽,for循環共構建2426-300-60=2066組數據。 for i in range(60, len(training_set_scaled)): x_train.append(training_set_scaled[i - 60:i, 0]) y_train.append(training_set_scaled[i, 0])
In [8]:
print(x_train[:2]) print(y_train[:2])
In [9]:
# 對訓練集進行打亂
np.random.seed(7) np.random.shuffle(x_train) np.random.seed(7) np.random.shuffle(y_train) tf.random.set_seed(7)
In [10]:
# 將訓練集由list格式變為array格式
x_train, y_train = np.array(x_train), np.array(y_train)
In [11]:
print(x_train.shape) print(y_train.shape)
In [12]:
# 使x_train符合RNN輸入要求:[送入樣本數, 循環核時間展開步數, 每個時間步輸入特征個數]。
# 此處整個數據集送入,送入樣本數為x_train.shape[0]即2066組數據;輸入60個開盤價,預測出第61天的開盤價, # 循環核時間展開步數為60; 每個時間步送入的特征是某一天的開盤價, # 只有1個數據,故每個時間步輸入特征個數為1 x_train = np.reshape(x_train, (x_train.shape[0], 60, 1))
In [13]:
# 測試集:csv表格中后300天數據
# 利用for循環,遍歷整個測試集,提取測試集中連續60天的開盤價作為輸入特征x_train,第61天的數據作為標簽,for循環共構建300-60=240組數據。 for i in range(60, len(test_set)): x_test.append(test_set[i - 60:i, 0]) y_test.append(test_set[i, 0]) # 測試集變array並reshape為符合RNN輸入要求:[送入樣本數, 循環核時間展開步數, 每個時間步輸入特征個數] x_test, y_test = np.array(x_test), np.array(y_test) x_test = np.reshape(x_test, (x_test.shape[0], 60, 1))
In [14]:
print(x_train.shape) print(y_train.shape)
In [15]:
print(x_test.shape) print(y_test.shape)
In [16]:
model = tf.keras.Sequential([ SimpleRNN(80, return_sequences=True), Dropout(0.2), SimpleRNN(100), Dropout(0.2), Dense(1) ]) model.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='mean_squared_error') # 損失函數用均方誤差
In [17]:
# 該應用只觀測loss數值,不觀測准確率,所以刪去metrics選項,一會在每個epoch迭代顯示時只顯示loss值
checkpoint_save_path = "./checkpoint/rnn_stock.ckpt" if os.path.exists(checkpoint_save_path + '.index'): print('-------------load the model-----------------') model.load_weights(checkpoint_save_path) cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_save_path, save_weights_only=True, save_best_only=True, monitor='val_loss')
In [18]:
history = model.fit(x_train, y_train, batch_size=64, epochs=50, validation_data=(x_test, y_test), validation_freq=1, callbacks=[cp_callback]) model.summary()
In [19]:
file = open('./weights.txt', 'w') # 參數提取 for v in model.trainable_variables: file.write(str(v.name) + '\n') file.write(str(v.shape) + '\n') file.write(str(v.numpy()) + '\n') file.close() loss = history.history['loss'] val_loss = history.history['val_loss'] plt.plot(loss, label='Training Loss') plt.plot(val_loss, label='Validation Loss') plt.title('Training and Validation Loss') plt.legend() plt.show()
In [20]:
################## predict ######################
# 測試集輸入模型進行預測 predicted_stock_price = model.predict(x_test) # 對預測數據還原---從(0,1)反歸一化到原始范圍 predicted_stock_price = sc.inverse_transform(predicted_stock_price) # 對真實數據還原---從(0,1)反歸一化到原始范圍 real_stock_price = sc.inverse_transform(test_set[60:]) # 畫出真實數據和預測數據的對比曲線 plt.plot(real_stock_price, color='red', label='MaoTai Stock Price') plt.plot(predicted_stock_price, color='blue', label='Predicted MaoTai Stock Price') plt.title('MaoTai Stock Price Prediction') plt.xlabel('Time') plt.ylabel('MaoTai Stock Price') plt.legend() plt.show()
In [21]:
##########evaluate##############
# calculate MSE 均方誤差 ---> E[(預測值-真實值)^2] (預測值減真實值求平方后求均值) mse = mean_squared_error(predicted_stock_price, real_stock_price) # calculate RMSE 均方根誤差--->sqrt[MSE] (對均方誤差開方) rmse = math.sqrt(mean_squared_error(predicted_stock_price, real_stock_price)) # calculate MAE 平均絕對誤差----->E[|預測值-真實值|](預測值減真實值求絕對值后求均值) mae = mean_absolute_error(predicted_stock_price, real_stock_price) print('均方誤差: %.6f' % mse) print('均方根誤差: %.6f' % rmse) print('平均絕對誤差: %.6f' % mae)
In [ ]: