1. torch.permute函数
2. x.permute(0,2,1)
x = torch.arange(60).reshape((3,4,5))
y = x.permute(0,2,1)
print(f"x={x}")
print(f"x.shape={x.shape}")
print(f"y={y}")
print(f"y.shape={y.shape}")
x=tensor([[[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19]],
[[20, 21, 22, 23, 24],
[25, 26, 27, 28, 29],
[30, 31, 32, 33, 34],
[35, 36, 37, 38, 39]],
[[40, 41, 42, 43, 44],
[45, 46, 47, 48, 49],
[50, 51, 52, 53, 54],
[55, 56, 57, 58, 59]]])
x.shape=torch.Size([3, 4, 5])
y=tensor([[[ 0, 5, 10, 15],
[ 1, 6, 11, 16],
[ 2, 7, 12, 17],
[ 3, 8, 13, 18],
[ 4, 9, 14, 19]],
[[20, 25, 30, 35],
[21, 26, 31, 36],
[22, 27, 32, 37],
[23, 28, 33, 38],
[24, 29, 34, 39]],
[[40, 45, 50, 55],
[41, 46, 51, 56],
[42, 47, 52, 57],
[43, 48, 53, 58],
[44, 49, 54, 59]]])
y.shape=torch.Size([3, 5, 4])
3. X.permute(1,0,2)
- 作用:将第0维和第1维进行互换;具体操作如下图操作
x=tensor([[[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19]],
[[20, 21, 22, 23, 24],
[25, 26, 27, 28, 29],
[30, 31, 32, 33, 34],
[35, 36, 37, 38, 39]],
[[40, 41, 42, 43, 44],
[45, 46, 47, 48, 49],
[50, 51, 52, 53, 54],
[55, 56, 57, 58, 59]]])
x.shape=torch.Size([3, 4, 5])
y=tensor([[[ 0, 1, 2, 3, 4],
[20, 21, 22, 23, 24],
[40, 41, 42, 43, 44]],
[[ 5, 6, 7, 8, 9],
[25, 26, 27, 28, 29],
[45, 46, 47, 48, 49]],
[[10, 11, 12, 13, 14],
[30, 31, 32, 33, 34],
[50, 51, 52, 53, 54]],
[[15, 16, 17, 18, 19],
[35, 36, 37, 38, 39],
[55, 56, 57, 58, 59]]])
y.shape=torch.Size([4, 3, 5])
4. 方法
如果想把一个三维的矩阵进行分割,那么我们就可分作两步 (1)先将后两维矩阵进行分割;(4,6) ->(4,2,3) ->(2,4,3) (2)将前面两维进行乘积;(2,4,6) -> (2,4,2,3) -> (2,2,4,3) -> (4,4,3)
5. 测试
目标:需要将一个矩阵A=(2,4,6)形状分割成矩阵 B=(4,4,3),并将其复原为(2,4,6)
import torch
from torch import nn
x = torch.arange(48).reshape(2, 4, 6)
def transpose_qvk(X, num_heads):
"""
作用:将输入矩阵X按照 num_heads进行分割
:param X:输入的矩阵大小
:param num_heads: 需要分割为几部分
:return: 分割后的矩阵大小
"""
X = X.reshape(X.shape[0], X.shape[1], num_heads, -1)
print(f"X.shape={X.shape}")
X = X.permute(0, 2, 1, 3)
print(f"X.shape={X.shape}")
return X.reshape(-1, X.shape[2], X.shape[3])
y = transpose_qvk(x, 2)
print(f"x={x}")
print(f"x.shape={x.shape}")
print(f"y={y}")
print(f"y.shape={y.shape}")
def transpose_outputs(X, num_heads):
"""
作用:将分割后的矩阵X进行复原成分割前的状态
:param X: 分割后的矩阵X
:param num_heads: 分割头数
:return: 分割前的矩阵状态
"""
X = X.reshape(-1, num_heads, X.shape[1], X.shape[2])
X = X.permute(0, 2, 1, 3)
return X.reshape(X.shape[0], X.shape[1], -1)
w = transpose_outputs(y, 2)
print(f"w={w}")
print(f"w.shape={w.shape}")
print(f"x==w[判断是否还原成功,True表示成功,False表示失败]{x == w}")
X.shape=torch.Size([2, 4, 2, 3])
X.shape=torch.Size([2, 2, 4, 3])
x=tensor([[[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]],
[[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35],
[36, 37, 38, 39, 40, 41],
[42, 43, 44, 45, 46, 47]]])
x.shape=torch.Size([2, 4, 6])
y=tensor([[[ 0, 1, 2],
[ 6, 7, 8],
[12, 13, 14],
[18, 19, 20]],
[[ 3, 4, 5],
[ 9, 10, 11],
[15, 16, 17],
[21, 22, 23]],
[[24, 25, 26],
[30, 31, 32],
[36, 37, 38],
[42, 43, 44]],
[[27, 28, 29],
[33, 34, 35],
[39, 40, 41],
[45, 46, 47]]])
y.shape=torch.Size([4, 4, 3])
w=tensor([[[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]],
[[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35],
[36, 37, 38, 39, 40, 41],
[42, 43, 44, 45, 46, 47]]])
w.shape=torch.Size([2, 4, 6])
x==w[判断是否还原成功,True表示成功,False表示失败]tensor([[[True, True, True, True, True, True],
[True, True, True, True, True, True],
[True, True, True, True, True, True],
[True, True, True, True, True, True]],
[[True, True, True, True, True, True],
[True, True, True, True, True, True],
[True, True, True, True, True, True],
[True, True, True, True, True, True]]])
|