1. torch.permute函数

作用：将指定的矩阵维度进行重新排序

2. x.permute(0,2,1)

作用：将第1维和第2维进行互换

# 可以看成是有3个（行4，列5）的矩阵
x = torch.arange(60).reshape((3,4,5))
# 将第1维和第2维进行互换
# 其实就是将矩阵(4,5)转置成为(5,4)
y = x.permute(0,2,1)
print(f"x={x}")
print(f"x.shape={x.shape}")
print(f"y={y}")
print(f"y.shape={y.shape}")

x=tensor([[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]],

        [[20, 21, 22, 23, 24],
         [25, 26, 27, 28, 29],
         [30, 31, 32, 33, 34],
         [35, 36, 37, 38, 39]],

        [[40, 41, 42, 43, 44],
         [45, 46, 47, 48, 49],
         [50, 51, 52, 53, 54],
         [55, 56, 57, 58, 59]]])
x.shape=torch.Size([3, 4, 5])
y=tensor([[[ 0,  5, 10, 15],
         [ 1,  6, 11, 16],
         [ 2,  7, 12, 17],
         [ 3,  8, 13, 18],
         [ 4,  9, 14, 19]],

        [[20, 25, 30, 35],
         [21, 26, 31, 36],
         [22, 27, 32, 37],
         [23, 28, 33, 38],
         [24, 29, 34, 39]],

        [[40, 45, 50, 55],
         [41, 46, 51, 56],
         [42, 47, 52, 57],
         [43, 48, 53, 58],
         [44, 49, 54, 59]]])
y.shape=torch.Size([3, 5, 4])

在这里插入图片描述

3. X.permute(1,0,2)

作用：将第0维和第1维进行互换；具体操作如下图操作

x=tensor([[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]],

        [[20, 21, 22, 23, 24],
         [25, 26, 27, 28, 29],
         [30, 31, 32, 33, 34],
         [35, 36, 37, 38, 39]],

        [[40, 41, 42, 43, 44],
         [45, 46, 47, 48, 49],
         [50, 51, 52, 53, 54],
         [55, 56, 57, 58, 59]]])
x.shape=torch.Size([3, 4, 5])
y=tensor([[[ 0,  1,  2,  3,  4],
         [20, 21, 22, 23, 24],
         [40, 41, 42, 43, 44]],

        [[ 5,  6,  7,  8,  9],
         [25, 26, 27, 28, 29],
         [45, 46, 47, 48, 49]],

        [[10, 11, 12, 13, 14],
         [30, 31, 32, 33, 34],
         [50, 51, 52, 53, 54]],

        [[15, 16, 17, 18, 19],
         [35, 36, 37, 38, 39],
         [55, 56, 57, 58, 59]]])
y.shape=torch.Size([4, 3, 5])

4. 方法

如果想把一个三维的矩阵进行分割，那么我们就可分作两步
（1）先将后两维矩阵进行分割；(4,6) ->(4,2,3) ->(2,4,3)
（2）将前面两维进行乘积；(2,4,6) -> (2,4,2,3) -> (2,2,4,3) -> (4,4,3)

5. 测试

目标：需要将一个矩阵A=(2,4,6)形状分割成矩阵 B=(4,4,3)，并将其复原为(2,4,6)

# -*- coding: utf-8 -*-
# @Project: zc
# @Author: zc
# @File name: permute_new_test
# @Create time: 2022/2/25 7:13
import torch
from torch import nn

x = torch.arange(48).reshape(2, 4, 6)


def transpose_qvk(X, num_heads):
	"""
	作用：将输入矩阵X按照 num_heads进行分割
	:param X：输入的矩阵大小
	:param num_heads: 需要分割为几部分
	:return: 分割后的矩阵大小
	"""
	# 1. 先将矩阵最后一维进行分割 (2,4,6) -> (2,4,2,3)
	X = X.reshape(X.shape[0], X.shape[1], num_heads, -1)
	print(f"X.shape={X.shape}")
	# 2. 调换顺序实现(4,6)->(4,2,3) ->(2,4,3)分割成功
	X = X.permute(0, 2, 1, 3)
	print(f"X.shape={X.shape}")
	# 3. 将前两维进行合并，(2,2,4,3) ->(2*2,4,3) ->(4,4,3)
	return X.reshape(-1, X.shape[2], X.shape[3])


y = transpose_qvk(x, 2)
print(f"x={x}")
print(f"x.shape={x.shape}")
print(f"y={y}")
print(f"y.shape={y.shape}")


def transpose_outputs(X, num_heads):
	"""
	作用：将分割后的矩阵X进行复原成分割前的状态
	:param X: 分割后的矩阵X
	:param num_heads: 分割头数
	:return: 分割前的矩阵状态
	"""
	# 将第 0 维进行分割
	X = X.reshape(-1, num_heads, X.shape[1], X.shape[2])
	# 调换第 1 维和第 2 维顺序
	X = X.permute(0, 2, 1, 3)
	# 将第 2 维和第 3 维进行合并
	return X.reshape(X.shape[0], X.shape[1], -1)


# 将 y 进行还原
w = transpose_outputs(y, 2)
print(f"w={w}")
print(f"w.shape={w.shape}")
print(f"x==w[判断是否还原成功，True表示成功，False表示失败]{x == w}")

结果：

X.shape=torch.Size([2, 4, 2, 3])
X.shape=torch.Size([2, 2, 4, 3])
x=tensor([[[ 0,  1,  2,  3,  4,  5],
         [ 6,  7,  8,  9, 10, 11],
         [12, 13, 14, 15, 16, 17],
         [18, 19, 20, 21, 22, 23]],

        [[24, 25, 26, 27, 28, 29],
         [30, 31, 32, 33, 34, 35],
         [36, 37, 38, 39, 40, 41],
         [42, 43, 44, 45, 46, 47]]])
x.shape=torch.Size([2, 4, 6])
y=tensor([[[ 0,  1,  2],
         [ 6,  7,  8],
         [12, 13, 14],
         [18, 19, 20]],

        [[ 3,  4,  5],
         [ 9, 10, 11],
         [15, 16, 17],
         [21, 22, 23]],

        [[24, 25, 26],
         [30, 31, 32],
         [36, 37, 38],
         [42, 43, 44]],

        [[27, 28, 29],
         [33, 34, 35],
         [39, 40, 41],
         [45, 46, 47]]])
y.shape=torch.Size([4, 4, 3])
w=tensor([[[ 0,  1,  2,  3,  4,  5],
         [ 6,  7,  8,  9, 10, 11],
         [12, 13, 14, 15, 16, 17],
         [18, 19, 20, 21, 22, 23]],

        [[24, 25, 26, 27, 28, 29],
         [30, 31, 32, 33, 34, 35],
         [36, 37, 38, 39, 40, 41],
         [42, 43, 44, 45, 46, 47]]])
w.shape=torch.Size([2, 4, 6])
x==w[判断是否还原成功，True表示成功，False表示失败]tensor([[[True, True, True, True, True, True],
         [True, True, True, True, True, True],
         [True, True, True, True, True, True],
         [True, True, True, True, True, True]],

        [[True, True, True, True, True, True],
         [True, True, True, True, True, True],
         [True, True, True, True, True, True],
         [True, True, True, True, True, True]]])