原理
FPN总体架构主要包含自下而上网络、自上而下网络、横向连接与卷积融合4个部分。
自下而上:C2到C5代表不同的ResNet卷积组,这些卷积组包含了多个Bottleneck结构,组内的特征图大小相同,组间大小递减。
自下而上:首先对C5进行1x1卷积降低通道数得到P5,然后对P5依次进行上采样得到P4、P3、P2.目的是得到与C4、C3与C2长宽相同的特征,方便下次进行逐元素相加。采用2倍最近邻上采样(直接对临近元素进行复制,而非线性插值)。
横向连接:(Lateral Connection)目的是为了将上采样后的高语义特征与浅层的定位细节进行融合。高语义特征经过上采样后,其长宽与对应的浅层特征相同,而通道数固定为256。因此需要对特征C2——C4进行1x1卷积使得其通道数变为256.,然后两者进行逐元素相加得到P4、P3与P2。
卷积融合:在得到相加后的特征后,利用3x3卷积对生成的P2,P3,P4进行融合。目的是消除上采样过程中带来的重叠效应,以生成最终的特征图。
代码实现
# FPN 金字塔网络
# 主要包含自下而上网络、自上而下网络、横向连接与卷积融合4个部分
import torch
import torch.nn as nn
import torch.nn.functional as F
class Bottleneck(nn.Module): # ResNet Bottleneck
expansion = 4 # 通道倍增数
def __init__(self, in_planes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.bottleneck = nn.Sequential(
nn.Conv2d(in_planes, planes, 1, bias=False),
nn.BatchNorm2d(planes),
nn.ReLU(inplace=True),
nn.Conv2d(planes, planes, 3, stride, 1, bias=False),
nn.BatchNorm2d(planes),
nn.ReLU(inplace=True),
nn.Conv2d(planes, self.expansion*planes, 1, bias=False),
nn.BatchNorm2d(self.expansion*planes),
)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
identity = x
out = self.bottleneck(x)
if self.downsample is not None:
identity = self.downsample(x)
out += identity # shortcut
out = self.relu(out)
return out
class FPN(nn.Module):
def __init__(self, layers):
super(FPN, self).__init__()
self.inplanes = 64
# 处理输入的C1模块(C1代表了RestNet的前几个零散的卷积与池化层)
self.conv1 = nn.Conv2d(3, 64, 7, 2, 3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(3, 2, 1)
# 搭建自下而上的C2,C3,C4,C5
self.layer1 = self._make_layer(64, layers[0])
self.layer2 = self._make_layer(128, layers[1], 2)
self.layer3 = self._make_layer(256, layers[2], 2)
self.layer4 = self._make_layer(512, layers[3], 2)
# 对C5 conv,减少通道数,得到P5 # C5 ==> 1x1卷积 ==> P5
self.toplayer = nn.Conv2d(2048, 256, 1, 1, 0)
# 3x3卷积 融合特征
self.smooth1 = nn.Conv2d(256, 256, 3, 1, 1)
self.smooth2 = nn.Conv2d(256, 256, 3, 1, 1)
self.smooth3 = nn.Conv2d(256, 256, 3, 1, 1)
# 横向连接,需要保证通道数相同
self.latlayer1 = nn.Conv2d(1024, 256, 1, 1, 0)
self.latlayer2 = nn.Conv2d(512, 256, 1, 1, 0)
self.latlayer3 = nn.Conv2d(256, 256, 1, 1, 0)
def _make_layer(self, planes, blocks, stride=1): # C2,C3,C4,C5
downsample = None
if stride != 1 or self.inplanes != Bottleneck.expansion*planes:
downsample = nn.Sequential( # 统一输出
nn.Conv2d(self.inplanes, Bottleneck.expansion * \
planes, 1, stride, bias=False),
nn.BatchNorm2d(Bottleneck.expansion*planes)
) # 是否上采样
layers = []
layers.append(Bottleneck(self.inplanes, planes, stride, downsample))
self.inplanes = planes*Bottleneck.expansion
for i in range(1, blocks):
layers.append(Bottleneck(self.inplanes, planes))
return nn.Sequential(*layers)
def _upsample_add(self, x, y): # 自上而下的采样模块 P5 ==> P4 ==> P3 ==> P2
_, _, H, W = y.shape # 获取 H W
# return F.upsample(x, size=(H, W), mode='bilinear') + y # 双线性插值采样后 相加
return F.interpolate(x, size=(H, W), mode="bilinear", align_corners=True) + y # pytorch版本问题 去除报错
def forward(self, x):
# 自下而上
c1 = self.maxpool(self.relu(self.bn1(self.conv1(x))))
c2 = self.layer1(c1)
c3 = self.layer2(c2)
c4 = self.layer3(c3)
c5 = self.layer4(c4)
# 自上而下
p5 = self.toplayer(c5)
p4 = self._upsample_add(p5, self.latlayer1(c4)) # 先对c4进行1x1conv
p3 = self._upsample_add(p4, self.latlayer2(c3))
p2 = self._upsample_add(p3, self.latlayer3(c2))
# 卷积的融合,平滑处理
p4 = self.smooth1(p4) # 3x3
p3 = self.smooth2(p3)
p2 = self.smooth3(p2)
return p2, p3, p4, p5
if __name__ == '__main__':
layers_50 = [3,4,6,3]
layers_101 = [2,4,23,3]
net = FPN(layers_50)
print(net)
fms = net(torch.randn(1,3,300,300))
for fm in fms:
print(fm.size())
参考文章:
FPN网络结构及Pytorch实现_乐亦亦乐的博客-CSDN博客_fpn网络pytorch实现
|