TENSORFLOW2.0+BLSTM+ATTENTION 基於深度學習的股票趨勢預測

本文轉載自查看原文 2020-06-27 21:46 1628 tensorflow2學習之路
這里給出的模型是我沒有進行參數調優的，所以效果不是最終的理想狀態。


#%%

! pip install tushare

#%% md

https://mp.weixin.qq.com/s?__biz=Mzg4MDE3OTA5NA==&mid=2247491137&amp;idx=1&amp;sn=9506137b0ba2f1b59fadae117aaa97dd&source=41#wechat_redirect


x = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1, activation= 'sigmoid'))(x)

#%%

# 導入包
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tushare as ts
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
#根據你的特征輸入的大小調節
#參數設置/parameter setting
timesteps = seq_length = 20 #時間窗/window length
data_dim = 7 #輸入數據維度/dimension of input data
output_dim = 1 #輸出數據維度/dimension of output data

ts_code = '000001.SZ'
start_date = '20120101'
end_date = '20200301'

# A1

# 設置顯示的最大列、寬等參數，消掉打印不完全中間的省略號
# pd.set_option('display.max_columns', 1000)
# pd.set_option('display.width', 1000)
# pd.set_option('display.max_colwidth', 1000)
# pd.set_option('display.height', 1000)
#顯示所有列
pd.set_option('display.max_columns', None)
#顯示所有行
pd.set_option('display.max_rows', None)

#數據准備/data preparation 
#變量選取Open,High,Low,Close,Volume等
pro = ts.pro_api('XXXXXXXXXXXXXXXXXXXXXXXXXXXX') #token可以在新版tushare的網站上找到
stock_data = pro.query('daily',ts_code = '000001.SZ', start_date = '20120101', end_date = '20200301')
stock_data = stock_data[::-1] #倒序，使日期靠前的排在前面
print(stock_data.head(5))
stock_data.reset_index(drop=True, inplace=True) #把每行的索引改為“0、1、2……”
xy = stock_data[['open','close','high','low','vol','pct_chg','amount']] #選取需要的features
xy = np.array(xy.values) #轉為array


#%% md

開始切分

#%%

#切分訓練集合測試集/split to train and testing
train_size = int(len(xy) * 0.7) #訓練集長度
test_size = len(xy) - train_size #測試集長度
xy_train, xy_test = np.array(xy[0:train_size]),np.array(xy[train_size:len(xy)]) #划分訓練集、測試集

scaler = MinMaxScaler()
xy_train_new = scaler.fit_transform(xy_train) #預處理，按列操作，每列最小值為0，最大值為1
x_new = xy_train_new[:,0:] #features
y_new = xy_train_new[:,1] * 10 #labels，適當放大方便訓練

x = x_new
y = y_new
dataX = []
dataY = []
for i in range(0, len(y) - seq_length):
    _x = x[i:i + seq_length]
    _y = y[i + seq_length]  # Next close price
#     print(_x, "->", _y)
    dataX.append(_x)
    dataY.append(_y)

#處理數據shape,准備進入神經網絡層
x_real = np.vstack(dataX).reshape(-1,seq_length,data_dim)
y_real= np.vstack(dataY).reshape(-1,output_dim)
print(x_real.shape)
print(y_real.shape)
dataX = x_real
dataY = y_real
trainX, trainY = dataX, dataY

xy_test_new = scaler.transform(xy_test) #使用訓練集的scaler預處理測試集的數據
x_new = xy_test_new[:,0:]
y_new = xy_test_new[:,1] * 10

x = x_new
y = y_new
dataX = []
dataY = []
for i in range(0, len(y) - seq_length):
    _x = x[i:i + seq_length]
    _y = y[i + seq_length]  # Next price change
#     print(_x, "->", _y)
    dataX.append(_x)
    dataY.append(_y)

#處理數據shape,准備進入神經網絡層
x_real = np.vstack(dataX).reshape(-1,seq_length,data_dim)
y_real= np.vstack(dataY).reshape(-1,output_dim)
print(x_real.shape)
print(y_real.shape)
dataX = x_real
dataY = y_real

testX, testY = dataX, dataY
# print('訓練集第一個窗口數據')
# print(trainX[0])
# print('訓練集第一個回歸數據')
# print(trainY[0])
# print('驗證集第一個窗口數據')
# print(testX[0])
# print('驗證集第一個回歸數據')
# print(testY[0])

#%% md

建立模型

#%%


# B1


import os
import tensorflow as tf

inputs = tf.keras.Input(shape=(seq_length, data_dim))
# x = tf.keras.layers.Dense(units=10,activation='relu')(i)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=50,return_sequences=True))(inputs)
# x = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1, activation= 'sigmoid'))
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=50))(x)
# x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(20)(x)
output = tf.keras.layers.Dense(1)(x)
model = tf.keras.Model(inputs=inputs, outputs=output)



#%% md

模型訓練

#%%

import os
import time
log_dir= './drive/My Drive/callsbacks_b1_chun'
if not os.path.exists(log_dir):
    os.mkdir(log_dir)
output_model_file = os.path.join(log_dir,"best2.h5")

callbacks = [
tf.keras.callbacks.TensorBoard(log_dir)
]

# opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer='adam', loss='mean_squared_error',metrics=['mae','mse'])

# model.compile(optimizer='rmsprop', loss='mae',metrics=['mae','mse'])
print(model.summary())


start = time.time()
# history = model.fit(trainX, trainY, batch_size=256, epochs=200, verbose=2, validation_split=0.1)
history = model.fit(trainX, trainY, batch_size=256, epochs=200, verbose=2, validation_split=0.1,callbacks=callbacks)
elapsed = (time.time() - start)
print(elapsed)


#%% md

模型的評估

#%%


# Plot training & validation loss values繪制訓練和驗證集的損失函數
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()



#%% md

訓練樣本的評估

#%%

trainPredict2 = model.predict(trainX) #查看⚠️訓練結果

# trainPredict2_2 = scaler.inverse_transform(trainPredict2)
# trainY2=scaler.inverse_transform(trainY)
trainPredict2_2 = trainPredict2 / 10 * scaler.data_range_[1] + scaler.data_min_[1] #放大和scale的逆運算
trainY2 = trainY / 10 * scaler.data_range_[1] + scaler.data_min_[1] #放大和scale的逆運算

#下面畫出收盤價走勢
plt.subplots(figsize=(16, 6))
# ax.plot(trainY2,color='blue')
# ax.plot(trainPredict2_2,color='orange')
plt.plot(trainY2, color='blue', label='REAL Stock Price')
plt.plot(trainPredict2_2, color='orange', label='PREDICT Stock Price')
plt.title(' TRAIN CLOSE PRICE')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()



#%%

plt.figure(figsize=(8,8)) #畫布大小
plt.xlim((5,30)) #x坐標范圍
plt.ylim((5,30)) #y坐標范圍
plt.scatter(trainY2, trainPredict2_2) #理想情況下散點應該分布在斜率為1的直線周圍
plt.ylabel('prediction')
plt.xlabel('label')


print('訓練平均誤差:',np.mean((trainPredict2_2 - trainY2) / trainY2 * 100)) #平均誤差（%）
print('訓練最大誤差:',np.max((trainPredict2_2 - trainY2) / trainY2 * 100)) #最大誤差（%）
print('訓練最小誤差:',np.min((trainPredict2_2 - trainY2) / trainY2 * 100)) #最小誤差（%）

#計算誤差小於5%的比例
count = 0
for i in range(len(trainY2)):
    if abs(trainPredict2_2[i] - trainY2[i]) / trainY2[i] * 100 <= 5: 
        count += 1
count = count / len(trainY2) * 100
print('訓練誤差小於5%的比例:',count)


#%% md

測試樣本的評估

#%%

testPredict2 = model.predict(testX) #查看測試結果
testPredict2_2 = testPredict2 / 10 * scaler.data_range_[1] + scaler.data_min_[1] #放大和scale的逆運算
testY2 = testY / 10 * scaler.data_range_[1] + scaler.data_min_[1] #放大和scale的逆運算



# ⚠️⚠️⚠️因為本身是模型預測趨勢,而對於轉折點對預測還是不高，所以在驗證集上表現不是特別好⚠️藍線和黃線是獨立對沒有聯系對，也就是說藍線是我們上帝視角提前獲取對未來數據，而黃線是模型自己擬合出來對數據
print(model.evaluate(testX, testPredict2_2))

#下面畫出收盤價走勢
fig, ax = plt.subplots(figsize=(16, 6))
# ax.plot(testY2,color='blue')
# ax.plot(testPredict2_2,color='orange')

# Visualising the results 使用Matplotlib將預測股價和實際股價的結果可視化。
plt.plot(testY2, color='blue', label='REAL Stock Price')
plt.plot(testPredict2_2, color='orange', label='PREDICT Stock Price')
plt.title('TEST CLOSE PRICE')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()



#%%


plt.figure(figsize=(8,8)) #畫布大小
plt.xlim((5,20)) #x坐標范圍
plt.ylim((5,20)) #y坐標范圍
plt.scatter(testY2, testPredict2_2) #理想情況下散點應該分布在斜率為1的直線周圍
plt.ylabel('prediction')
plt.xlabel('label')


print('測試平均誤差:',np.mean((testPredict2_2 - testY2) / testY2 * 100)) #平均誤差（%）
print('測試最大誤差:',np.max((testPredict2_2 - testY2) / testY2 * 100)) #最大誤差（%）
print('測試最小誤差:',np.min((testPredict2_2 - testY2) / testY2 * 100)) #最小誤差（%）
#計算誤差小於5%的比例
count = 0
for i in range(len(testY2)):
    if abs(testPredict2_2[i] - testY2[i]) / testY2[i] * 100 <= 5:
        count += 1
count = count / len(testY2) * 100
print('測試誤差小於5%的比例:',count)


#%% md

對模型整體訓練對評估

#%%

# 模型的評估loss的值越低說明模型擬合的效果越好，LSTM模型主要采用RMSE作為評價標准
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
from math import sqrt
 
#回歸評價指標
# calculate MSE 均方誤差
mse=mean_squared_error(testY2,testPredict2_2)
# calculate RMSE 均方根誤差
rmse = sqrt(mean_squared_error(testY2, testPredict2_2))
#calculate MAE 平均絕對誤差
mae=mean_absolute_error(testY2,testPredict2_2)
#calculate R square
r_square=r2_score(testY2,testPredict2_2)
print('均方誤差mse: %.6f' % mse)
print('均方根誤差rmse: %.6f' % rmse)
print('平均絕對誤差mae: %.6f' % mae)
print('R_square: %.6f' % r_square)


#%% md

股票評價

#%%

#計算對轉折點的預測正確率
correct = np.zeros(len(testPredict2_2))
for i in range(1, len(testPredict2_2)):
    if np.sign(testPredict2_2[i] - testPredict2_2[i-1]) == np.sign(testY2[i] - testY2[i-1]): #如果對漲或跌的判斷准確，這里用正負符號判斷###########################################################
        correct[i] = 1 #就加1
accuracy = np.sum(correct) / len(correct) * 100
print('對轉折點的預測正確率:',accuracy)

#如果對明天的預測價格高於今天的收盤價，就買進並持有一天，計算能掙多少錢
count = 0
for i in range(1, len(testPredict2_2)):
    if testPredict2_2[i] >= testY2[i-1]:
        count = count + (testY2[i] - testY2[i-1])
print('\n\n如果對明天的預測價格高於今天的收盤價，就買進並持有一天，能掙{}元'.format(count))

#如果對明天的預測價格高於今天的收盤價，就買進並持有十一天，計算能掙多少錢
count = 0
for i in range(1, len(testPredict2_2)):
    if testPredict2_2[i] >= testY2[i-1]:
        if i+10 <len(testPredict2_2):
            count = count + (testY2[i+10] - testY2[i-1])
print('如果對明天的預測價格高於今天的收盤價，就買進並持有十一天，能掙{}元'.format(count))

#最理想的狀況下，能掙多少錢
count = 0
for i in range(1, len(testPredict2_2)):
    if testY2[i] >= testY2[i-1]:
        count = count + (testY2[i] - testY2[i-1])
print('最理想的狀況下，能掙{}元'.format(count))

#%%

#如果對明天的預測價格高於今天的收盤價，就買進並持有n天，計算能掙多少錢
print(len(testPredict2_2))
maxmoneyday = 0
maxmoney = 0
for n in range(30):
    count = 0
    for i in range(1, len(testPredict2_2)):
        # for n in range(len(testPredict2_2)-i):
            # print(n)
        if testPredict2_2[i] >= testY2[i-1]:
            if i+n <len(testPredict2_2):
                count = count + (testY2[i+n] - testY2[i-1])
                
    print('在所有的這一段時間中，如果明天的預測價格高於今天的收盤價，就買進一股並持有{}天后賣出，總共能掙{}元'.format(n+1,count))
    if maxmoney <count:
        maxmoney = count
        maxmoneyday = n+1
print('最佳的固定持有相同天數的方式投資賺了{},持有{}天'.format(maxmoney,maxmoneyday))





#%%

# 這里是導出可視化對界面
%reload_ext tensorboard
%tensorboard --logdir './drive/My Drive/callsbacks_b1_chun'



#%%

# 保存模型
# tf.saved_model.save(model, "Bilstm七輸入0408.h5")

#%%

# 載入模型
# model = tf.saved_model.load('Bilstm七輸入0402.h5')

#%% md

日線行情輸出參數
名稱 類型 描述
ts_code str 股票代碼
trade_date str 交易日期
open float 開盤價
high float 最高價
low float 最低價
close float 收盤價
pre_close float 昨收價
change float 漲跌額
pct_chg float 漲跌幅 （未復權，如果是復權請用 通用行情接口 ）
vol float 成交量 （手）
amount float 成交額 （千元）

---
每日指標輸出參數
名稱 類型 描述
ts_code str TS股票代碼
trade_date str 交易日期
close float 當日收盤價
turnover_rate float 換手率（%）
turnover_rate_f float 換手率（自由流通股）
volume_ratio float 量比
pe float 市盈率（總市值/凈利潤， 虧損的PE為空）
pe_ttm float 市盈率（TTM，虧損的PE為空）
pb float 市凈率（總市值/凈資產）
ps float 市銷率
ps_ttm float 市銷率（TTM）
dv_ratio float 股息率 （%）
dv_ttm float 股息率（TTM）（%）
total_share float 總股本 （萬股）
float_share float 流通股本 （萬股）
free_share float 自由流通股本 （萬）
total_mv float 總市值 （萬元）
circ_mv float 流通市值（萬元）
免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。
猜您在找 利用深度學習來預測股票價格變動深度學習RNN實現股票預測實戰（附數據、代碼）【深度學習與TensorFlow 2.0】入門篇深度學習的Attention模型 python-灰色預測平均房價趨勢kera深度學習庫的介紹基於深度學習的事件驅動型股票預測[論文研讀筆記] 通過深度學習股價截面數據分析和預測股票價格 tensorflow2.0、keras實現Attention 深度學習中的Attention機制深度學習中的Attention機制