RNN输入数据加工问题与循环流程分析(torch)
-
RNN 输入序列数据加工处理 1.1 原始数据
'''
text = [我,
我 爱 你,
爱 ,
你
]
--》[[1],[1,2,3],[2],[3]]
'''
1.2 形成dataset import torch
from torch.nn.utils.rnn import pack_padded_sequence,pad_packed_sequence
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
text = [[1], [1, 2, 3], [2], [3]]
feature_size = 1
class Datasets:
def __init__(self, data, feature_size):
self.data = data
self.feature_size = feature_size
self.max_seq_len = max([len(i) for i in text])
self.data_len = len(data)
def __getitem__(self, index):
seq_zero = torch.zeros(size=(self.max_seq_len, self.feature_size))
seq_data = torch.tensor(self.data[index]).reshape(-1, self.feature_size)
seq_zero[0:seq_data.shape[0], 0:seq_data.shape[1]] = seq_data
return seq_zero
def __len__(self):
return self.data_len
data = Datasets(text, feature_size=feature_size)
print(data[0])
print(data[1])
''' 如结果所示,已将序列按序列最大长度,进行等长处理
tensor([[1.],
[0.],
[0.]])
tensor([[1.],
[2.],
[3.]])
'''
1.3 形成批数据
batch_size = 2
dataloader = DataLoader(dataset=data, batch_size=batch_size)
for i in dataloader:
print(i)
break
'''
tensor([[[1.],
[0.],
[0.]],
[[1.],
[2.],
[3.]]])
'''
-
验证RNN内部数据循环过程(RNN原理) 2.1 建立RNN模型
class RNN(nn.Module):
def __init__(self):
super().__init__()
self.Rnn = nn.RNN(input_size=1
, hidden_size=2
, num_layers=1
, nonlinearity='tanh'
, bias=False
, batch_first=True
, dropout=0
, bidirectional=False
)
def forward(self, x):
x = self.Rnn(x)
return x
2.2 rnn 向前传播结果
rnn = RNN()
for i in dataloader:
out, hn = rnn(i)
print(out)
print(hn)
break
'''
tensor([[[ 0.2950, -0.1102],
[-0.1728, -0.0011],
[ 0.0981, -0.0234]],
[[ 0.2950, -0.1102],
[ 0.4083, -0.2188],
[ 0.5828, -0.3432]]], grad_fn=<TransposeBackward1>)
tensor([[[ 0.0981, -0.0234],
[ 0.5828, -0.3432]]], grad_fn=<StackBackward>)
'''
2.3 手动进行rnn 各节点的计算
W = rnn.Rnn.all_weights[0][0].data
U = rnn.Rnn.all_weights[0][1].data
h0 = torch.zeros(1, 2)
test1 = torch.tensor([[1], [0], [0]], dtype=torch.float)
h1 = torch.tanh(torch.mm(test1[0].unsqueeze(dim=0), torch.t(W)) + torch.mm(h0, torch.t(U)))
h2 = torch.tanh(torch.mm(test1[1].unsqueeze(dim=0), torch.t(W)) + torch.mm(h1, torch.t(U)))
h3 = torch.tanh(torch.mm(test1[2].unsqueeze(dim=0), torch.t(W)) + torch.mm(h2, torch.t(U)))
out1 = torch.stack((h1, h2, h3), dim=0)
print(out1)
'''
tensor([[[ 0.2950, -0.1102]],
[[-0.1728, -0.0011]],
[[ 0.0981, -0.0234]]])
'''
2.4 结果验证
![在这里插入图片描述](https://img-blog.csdnimg.cn/f43d0952d3d443eab37483509943d531.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBA5oOz6ICD5Liq56CU,size_20,color_FFFFFF,t_70,g_se,x_16
-
torch内部pack/pad 优化短序列中填充部分(计算0浪费资源)的计算 '''
如上图所示,填充的0特征也一直参与计算,但实际中0只是为了填充矩阵形状,计算无意义,浪费资源
为优化此类问题,torch提供了pack/pad函数
'''
a = torch.tensor([[[1.],
[0.],
[0.]],
[[1.],
[2.],
[3.]]])
print(rnn(a)[0])
'''
tensor([[[ 0.1739, -0.0812],
[-0.1102, 0.1351],
[ 0.1213, -0.1079]],
[[ 0.1739, -0.0812],
[ 0.2361, -0.0268],
[ 0.4040, -0.0814]]], grad_fn=<TransposeBackward1>)
'''
sort = sorted([(torch.sum(a[i] > 0).item(), i) for i in range(a.shape[0])], key=lambda x: x[0], reverse=True)
sort_index = [i[1] for i in sort]
sort_len = [i[0] for i in sort]
pack_a = pack_padded_sequence(input=a[sort_index]
, lengths=sort_len
, batch_first=True
, enforce_sorted=True
)
print(pack_a)
'''
PackedSequence(data=tensor([[1.],
[1.],
[2.],
[3.]]), batch_sizes=tensor([2, 1, 1]), sorted_indices=None, unsorted_indices=None)
解读:原本一批数据batch_size=2,样本1[1,0,0]和样本2[1,2,3]
两两组合着一起运算(像多线程一样)[1,1]两个样本同时计算第一个节点数据
[0,2],[0,3]计算剩下节点数据,因为batch_size=2,所有torch相当于两个样本一起计算的
经过pack压缩后,样本1 [1],样本2[1,2,3], batch_size=tensor([2, 1, 1])
这样计算第一个节点时按batch_size=2 [1,1]一起计算
计算剩下节点时,batch_size=1,[2],[3]单独计算剩下节点
'''
print(rnn(pack_a))
'''
PackedSequence(data=tensor([[-0.4410, 0.5417],
[-0.4410, 0.5417],
[-0.7087, 0.9123],
[-0.8708, 0.9822]], grad_fn=<CatBackward>), batch_sizes=tensor([2, 1, 1]), sorted_indices=None, unsorted_indices=None)
'''
print(pad_packed_sequence(sequence=rnn(pack_a)[0]
, batch_first=True
, padding_value=0.0
, total_length=None))
'''
(tensor([[[-0.4410, 0.5417],
[-0.7087, 0.9123],
[-0.8708, 0.9822]],
[[-0.4410, 0.5417],
[ 0.0000, 0.0000],
[ 0.0000, 0.0000]]], grad_fn=<TransposeBackward0>), tensor([3, 1]))
'''
|