1. 問題描述

已知 [k, k+n)時刻的正弦函數，預測 [k+t, k+n+t)時刻的正弦曲線。
因為每個時刻曲線上的點是一個值，即feature_len=1
如果給出50個時刻的點，即seq_len=50
如果只提供一條曲線供輸入，即batch=1
輸入的shape=[seq_len, batch, feature_len] = [50, 1, 1]。

2. 代碼實現

import torch
import torch.nn as nn
import numpy as np
import torch.optim as optim
from matplotlib import pyplot as plt 

input_size = 1
batch_size = 1
hidden_size = 16
num_layers = 1
output_size = 1

class Net(nn.Module):

    def __init__(self):
        super().__init__()

        self.rnn = nn.RNN(
            input_size=input_size,       # feature_len = 1
            hidden_size=hidden_size,     # 隱藏記憶單元個數hidden_len = 16
            num_layers=num_layers,       # 網絡層數 = 1
            batch_first=True             # 在傳入數據時,按照[batch,seq_len,feature_len]的格式
        )

        for p in self.rnn.parameters():  # 對RNN層的參數做初始化
            nn.init.normal_(p, mean=0.0, std=0.001)

        self.linear = nn.Linear(hidden_size, output_size) # 輸出層

    def forward(self, x, hidden_prev):
        """
        x：一次性輸入所有樣本所有時刻的值(batch,seq_len,feature_len)
        hidden_prev：第一個時刻空間上所有層的記憶單元(batch, num_layer, hidden_len)
        輸出out(batch,seq_len,hidden_len) 和 hidden_prev(batch,num_layer,hidden_len)
        """
        out, hidden_prev = self.rnn(x, hidden_prev)
       
        # 因為要把輸出傳給線性層處理，這里將batch和seq_len維度打平
        # 再把batch=1添加到最前面的維度（為了和y做MSE）
        # [batch=1,seq_len,hidden_len]->[seq_len,hidden_len]
        out = out.view(-1, hidden_size)  
        #[seq_len,hidden_len]->[seq_len,output_size=1]
        out = self.linear(out)
        
        # [seq_len,output_size=1]->[batch=1,seq_len,output_size=1]
        out = out.unsqueeze(dim=0)

        return out, hidden_prev

# 訓練過程
learning_rate = 0.01

model = Net()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

hidden_prev = torch.zeros(batch_size, num_layers, hidden_size)      #  初始化記憶單元h0[batch,num_layer,hidden_len]

num_time_steps = 50    # 區間內取多少樣本點


for iter in range(6000):
    start = np.random.randint(3, size=1)[0]                      # 在0~3之間隨機取開始的時刻點
    time_steps = np.linspace(start, start + 10, num_time_steps)   # 在[start,start+10]區間均勻地取num_points個點
    data = np.sin(time_steps)
    data = data.reshape(num_time_step, 1)                        # [num_time_steps,] -> [num_points,1]
    # 輸入前49個點(seq_len=49)，即下標0~48 [batch, seq_len, feature_len]
    x = torch.tensor(data[:-1]).float().view(1, num_time_steps - 1, 1)
    # 預測后49個點，即下標1~49
    y = torch.tensor(data[1:]).float().view(1, num_time_steps - 1, 1)     
    # 以上步驟生成(x,y)數據對

    output, hidden_prev = model(x, hidden_prev)   # 喂入模型得到輸出
    hidden_prev = hidden_prev.detach()            # at

    loss = criterion(output, y)                   # 計算MSE損失   
    model.zero_grad()
    loss.backward()
    optimizer.step()

    if iter % 1000 == 0:
        print("Iteration: {} loss {}".format(iter, loss.item()))

    
# 測試過程
# 先用同樣的方式生成一組數據x,y
start = np.random.randint(3, size=1)[0]
time_steps = np.linspace(start, start + 10, num_time_steps)
data = np.sin(time_steps)
data = data.reshape(num_time_steps, 1)
x = torch.tensor(data[:-1]).float().view(1, num_time_steps - 1, 1)
y = torch.tensor(data[1:]).float().view(1, num_time_steps - 1, 1)

predictions = []

input = x[:, 0, :]                      # 取seq_len里面第0號數據
input = input.view(1, 1, 1)             # input：[1,1,1]
for _ in range(x.shape[1]):             # 迭代seq_len次

    pred, hidden_prev = model(input, hidden_prev)
    input = pred                        # 預測出的(下一個點的)序列pred當成輸入(或者直接寫成input, hidden_prev = model(input, hidden_prev))
    predictions.append(pred.detach().numpy().ravel()[0])


x = x.data.numpy()
y = y.data.numpy()
plt.plot(time_steps[:-1], x.ravel())

plt.scatter(time_steps[:-1], x.ravel(), c='r')     # x值
plt.scatter(time_steps[1:], y.ravel(), c='y')      # y值
plt.scatter(time_steps[1:], predictions, c='b')    # y的預測值
plt.show()

Iteration: 0 loss 0.47239747643470764
Iteration: 1000 loss 0.0028104630764573812
Iteration: 2000 loss 0.00022502802312374115
Iteration: 3000 loss 0.00013326731277629733
Iteration: 4000 loss 0.00011971688218181953
Iteration: 5000 loss 0.00046832612133584917

3. 梯度裁剪

如果發生梯度爆炸，在上面代碼loss.backward() 與 optimizer.step() 之間要進行梯度裁剪：

    model.zero_grad()
    loss.backward()
    # 梯度裁剪
    for p in model.parameters():
        # print(p.grad.norm())                 # 查看參數p的梯度
        torch.nn.utils.clip_grad_norm_(p, 10)  # 將梯度裁剪到小於10
    optimizer.step()

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 基於 Keras 用深度學習預測時間序列在Python中使用LSTM和PyTorch進行時間序列預測（深度學習時序數據預測）深度學習與Pytorch入門實戰（十三）RNN 深度學習與Pytorch入門實戰（十五）LSTM 《深度學習與Pytorch入門實戰》2019 Pytorch-時間序列預測利用深度學習進行時間序列預測基於深度學習的時間序列預測系統——測試心得深度學習入門之PyTorch PyTorch學習筆記9--案例4,5: Pytorch LSTM 時間序列預測