1、自定义激活函数算子
Pytorch 自定义激活函数,都需要继承 torch.autograd.Function 类 其内部需要定义两个静态方法【@staticmethod】:forward & backward
Examples:
class Exp(Function):
@staticmethod
def forward(ctx, input):
result = input.exp()
ctx.save_for_backward(result)
return result
@staticmethod
def backward(ctx, grad_output):
result, = ctx.saved_tensors
return grad_output * result
exp = Exp.apply
output = Exp.apply(input)
torch.autgrad.gradcheck(func, torch.randn(10, requires_grad=True, dtype=torch.double))
- forward方法:定义了输入张量(可以是多个),返回对应的输出(也可以是多个张量);
- backward方法:定义了输出的剃度张量(可以是多个,必须和输出张量一一对应),返回输入张量(可以是多个,必须喝输入张量一一对应)【之所以一一对应,是因为计算图中的每个张量在方向传播的时候,输出张量和输入张量对应的梯度绑定,输入张量和输入张良对应的梯度绑定】;
对于类 torch.autograd.Function
- 参数 ctx,即计算的上下文环境 Context,这个参数是一个特殊的参数,用于在前向计算和反向传播之间共享张量;经常使用 ctx 保存输入张量的值,然后可以在反向传播中使用 ctx,通过输入张量的值,结合后一层的输出梯度 grad_output,来计算前一层的输入梯度 grad_input;
- 定义的 forward & backward 都是静态方法,因此这个类并不需要实例化,可以直接当做函数来使用;
2、自定义
class _involution(Function):
@staticmethod
def forward(ctx, input, weight, stride, padding, dilation):
assert input.dim() == 4 and input.is_cuda
assert weight.dim() == 6 and weight.is_cuda
batch_size, channels, height, width = input.size()
kernel_h, kernel_w = weight.size()[2:4]
output_h = int((height + 2 * padding[0] - (dilation[0] * (kernel_h - 1) + 1)) / stride[0] + 1)
output_w = int((width + 2 * padding[1] - (dilation[1] * (kernel_w - 1) + 1)) / stride[1] + 1)
output = input.new(batch_size, channels, output_h, output_w)
n = output.numel()
with torch.cuda.device_of(input):
f = load_kernel('involution_forward_kernel', _involution_kernel,
Dtype=Dtype(input) , nthreads=n,
num=batch_size , channels=channels, groups=weight.size()[1],
bottom_height=height , bottom_width=width,
top_height=output_h , top_width=output_w,
kernel_h=kernel_h , kernel_w=kernel_w,
stride_h=stride[0] , stride_w=stride[1],
dilation_h=dilation[0] , dilation_w=dilation[1],
pad_h=padding[0] , pad_w=padding[1])
f( block=(CUDA_NUM_THREADS,1,1), grid=(GET_BLOCKS(n),1,1),
args=[input.data_ptr(), weight.data_ptr(), output.data_ptr()],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
ctx.save_for_backward(input, weight)
ctx.stride, ctx.padding, ctx.dilation = stride, padding, dilation
return output
@staticmethod
def backward(ctx, grad_output):
assert grad_output.is_cuda and grad_output.is_contiguous()
input, weight = ctx.saved_tensors
stride, padding, dilation = ctx.stride, ctx.padding, ctx.dilation
batch_size, channels, height, width = input.size()
kernel_h, kernel_w = weight.size()[2:4]
output_h, output_w = grad_output.size()[2:]
grad_input, grad_weight = None, None
opt = dict(Dtype=Dtype(grad_output),
num=batch_size, channels=channels, groups=weight.size()[1],
bottom_height=height, bottom_width=width,
top_height=output_h, top_width=output_w,
kernel_h=kernel_h, kernel_w=kernel_w,
stride_h=stride[0], stride_w=stride[1],
dilation_h=dilation[0], dilation_w=dilation[1],
pad_h=padding[0], pad_w=padding[1])
with torch.cuda.device_of(input):
if ctx.needs_input_grad[0]:
grad_input = input.new(input.size())
n = grad_input.numel()
opt['nthreads'] = n
f = load_kernel('involution_backward_grad_input_kernel',
_involution_kernel_backward_grad_input, **opt)
f(block=(CUDA_NUM_THREADS,1,1),
grid=(GET_BLOCKS(n),1,1),
args=[grad_output.data_ptr(), weight.data_ptr(), grad_input.data_ptr()],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
if ctx.needs_input_grad[1]:
grad_weight = weight.new(weight.size())
n = grad_weight.numel()
opt['nthreads'] = n
f = load_kernel('involution_backward_grad_weight_kernel',
_involution_kernel_backward_grad_weight, **opt)
f(block=(CUDA_NUM_THREADS,1,1),
grid=(GET_BLOCKS(n),1,1),
args=[grad_output.data_ptr(), input.data_ptr(), grad_weight.data_ptr()],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
return grad_input, grad_weight, None, None, None
|