循环神经网络
RNN单元会自我更新之后再输出,其前向传播函数如下:
Pytorch实现RNN单元的两种方式
nn.RNN()
初始化: 使用:
- x给的是x不是x(t),就是把x一次全部喂进去
X.shape:[seq_len, batch_size, input_size] input_size就是word_vec - h0可以给也可以不给,不给的话就默认给0
- ht:最后一个时刻的所有层cnn的状态
h.shape:[num_layers, batch_size, h_dim] h_dim就是hidden_len - out:所有时刻的最后一层cnn的状态
out_size:[seq_len, batch_size, h_dim]
单层的RNN网络:
import torch
import torch.nn as nn
rnn = nn.RNN(input_size=100, hidden_size=20, num_layers=1)
print(rnn)
x = torch.randn(10, 3, 100)
out, h = rnn(x, torch.zeros(1, 3, 20))
print(out.shape)
print(h.shape)
多层RNN网络:
import torch
import torch.nn as nn
rnn = nn.RNN(input_size=100, hidden_size=20, num_layers=4)
print(rnn)
x = torch.randn(10, 3, 100)
out, h = rnn(x)
print(out.shape)
print(h.shape)
nn.RNNCell()
- x给的是x(t)。
x(t):[batch_size, input_size] - ht:t时刻某一层的状态
[batch_size, hidden_size]
单层RNN:
x = torch.randn(10, 3, 100)
cell1 = nn.RNNCell(100,20)
h1 = torch.zeros(3, 20)
for xt in x:
h1 = cell1(xt,h1)
print(h1.shape)
双层RNN:
x = torch.randn(10, 3, 100)
cell1 = nn.RNNCell(100,30)
cell2 = nn.RNNCell(30,20)
h1 = torch.zeros(3, 30)
h2 = torch.zeros(3, 20)
for xt in x:
h1 = cell1(xt,h1)
h2 = cell2(h1,h2)
print(h2.shape)
RNN预测正弦函数
import torch
import torch.nn as nn
import numpy as np
import torch.optim as optim
from matplotlib import pyplot as plt
'''超参数'''
num_time_steps = 50
input_size = 1
hidden_size = 16
output_size = 1
num_layers = 1
lr=0.01
'''定义网络结构'''
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.rnn = nn.RNN(
input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True
)
self.linear = nn.Linear(hidden_size, output_size)
def forward(self, x, hidden_prev):
out, hidden_prev = self.rnn(x, hidden_prev)
out = out.view(-1, hidden_size)
out = self.linear(out)
out = out.unsqueeze(dim=0)
return out, hidden_prev
'''声明网络。loss,优化器'''
model = Net()
loss_function = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr)
hidden_prev = torch.zeros(num_layers, 1, hidden_size)
'''训练'''
for iter in range(6000):
start = np.random.randint(3, size=1)[0]
time_steps = np.linspace(start, start + 10, num_time_steps)
data = np.sin(time_steps)
data = data.reshape(num_time_steps, 1)
x = torch.tensor(data[:-1]).float().view(1, num_time_steps - 1, 1)
y = torch.tensor(data[1:]).float().view(1, num_time_steps - 1, 1)
output, hidden_prev = model(x, hidden_prev)
hidden_prev = hidden_prev.detach()
loss = loss_function(output, y)
model.zero_grad()
loss.backward()
optimizer.step()
if iter % 100 == 0:
print("Iteration: {} loss {}".format(iter, loss.item()))
'''测试'''
start = np.random.randint(3, size=1)[0]
time_steps = np.linspace(start, start + 10, num_time_steps)
data = np.sin(time_steps)
data = data.reshape(num_time_steps, 1)
x = torch.tensor(data[:-1]).float().view(1, num_time_steps - 1, 1)
y = torch.tensor(data[1:]).float().view(1, num_time_steps - 1, 1)
predictions = []
input = x[:, 0, :]
for _ in range(x.shape[1]):
input = input.view(1, 1, 1)
pred, hidden_prev = model(input, hidden_prev)
input = pred
predictions.append(pred.detach().numpy().ravel()[0])
'''绘图'''
x = x.data.numpy().ravel()
y = y.data.numpy()
plt.scatter(time_steps[:-1], x.ravel(), s=90)
plt.plot(time_steps[:-1], x.ravel())
plt.scatter(time_steps[1:], predictions)
plt.show()
|