FCN论文精读
本文的创新点
- 将分类网络改编为全卷积网络,具体为全连接层转化为卷积层以及通过反卷积进行上采样
- 使用迁移学习的方法进行微调
- 使用跳跃结构使得局部信息和全局信息相结合,产生准确而精细的分割
先验知识
感受野(感受域) Receptive field
感受野
全局信息与局部信息
全局信息:
- 深层网络中提取全局信息
- 感受野较大
- 有助于分割尺寸较大的目标
局部信息:
- 浅层网络中提取局部信息
- 感受野较小
- 有助于分割尺寸较小的目标
算法架构
训练技巧
- 加载预训练模型
- 初始化反卷积参数
- 至少175个epoch后算法才会有不错的表现
- 学习率在100次后进行调整
- pool3之前的特征图不需要融合(因为效果差)
算法实现
定义配置文件 cfg.py
BATCH_SIZE = 4
EPOCH_NUMBER = 200
DATASET = ['CamVid', 12]
crop_size = (352, 480) # 选择这个参数的原因:为了方便几次下采样后能够整除
class_dict_path = './Datasets/' + DATASET[0] + '/class_dict.csv'
TRAIN_ROOT = './Datasets/' + DATASET[0] + '/train'
TRAIN_LABEL = './Datasets/' + DATASET[0] + '/train_labels'
VAL_ROOT = './Datasets/' + DATASET[0] + '/val'
VAL_LABEL = './Datasets/' + DATASET[0] + '/val_labels'
TEST_ROOT = './Datasets/' + DATASET[0] + '/test'
TEST_LABEL = './Datasets/' + DATASET[0] + '/test_labels'
数据集处理 dataset.py
导入包
import pandas as pd
import os
import torch as t
import numpy as np
import torchvision.transforms.functional as ff
from torch.utils.data import Dataset
from PIL import Image
import torchvision.transforms as transforms
import cfg
※※※ 对标签图像编码
为什么要对标签图像编码
在图像分割中,我们的标签数据集是染色好的图像,如下图所示
在forward pass后我们会得到一个尺寸为(4, 352, 480)的图像,其中第0个维度为batch size,这是整个二维图像中的像素值为预测的类别;也就是说我们需要将标签图像也转化为类似的像素值为类别的数据。
代码实现
- 得到所谓的color_map,即简单地处理csv文件,将12种RGB值放入一个列表(color_map)中
- 根据color_map得到一个哈希表,该哈希表有256**3的大小,其中12个索引对应了12个标签
- 最后通过哈希方程计算某一张图片rgb三个通道的值,得到一张像素点为哈希值的图片;再通过哈希表快速地返回其每个像素点的标签值
class LabelProcessor: # 对应data process and load.ipynb 1.处理标签文件中colormap的数据
"""对标签图像的编码"""
def __init__(self, file_path):
self.colormap = self.read_color_map(file_path)
self.cm2lbl = self.encode_label_pix(self.colormap)
# 静态方法装饰器, 可以理解为定义在类中的普通函数,可以用self.<name>方式调用
# 在静态方法内部不可以实例属性和实例对象,即不可以调用self.相关的内容
# 使用静态方法的原因之一是程序设计的需要(简洁代码,封装功能等)
@staticmethod
def read_color_map(file_path): # data process and load.ipynb: 处理标签文件中colormap的数据
pd_label_color = pd.read_csv(file_path, sep=',')
colormap = []
for i in range(len(pd_label_color.index)):
tmp = pd_label_color.iloc[i]
color = [tmp['r'], tmp['g'], tmp['b']]
colormap.append(color)
return colormap
@staticmethod
def encode_label_pix(colormap): # data process and load.ipynb: 标签编码,返回哈希表
cm2lbl = np.zeros(256 ** 3) # 共存在256**3个颜色
# 将其中使用的12个颜色标记出label
for i, cm in enumerate(colormap):
cm2lbl[(cm[0] * 256 + cm[1]) * 256 + cm[2]] = i
return cm2lbl
def encode_label_img(self, img):
data = np.array(img, dtype='int32')
# np.array的图像数据格式为(h, w, 3)
idx = (data[:, :, 0] * 256 + data[:, :, 1]) * 256 + data[:, :, 2]
return np.array(self.cm2lbl[idx], dtype='int64')
加载数据集
注意:加载数据集时要同时加载原图片和标签图片
ff.center_crop() 用法
# 先创造一个LabelProcessor的实例,在加载数据集的类中要使用
label_processor = LabelProcessor(cfg.class_dict_path)
class LoadDataset(Dataset):
def __init__(self, file_path=[], crop_size=None):
"""para:
file_path(list): 数据和标签路径,列表元素第一个为图片路径,第二个为标签路径
"""
# 1 正确读入图片和标签路径
if len(file_path) != 2:
raise ValueError("同时需要图片和标签文件夹的路径,图片路径在前")
self.img_path = file_path[0]
self.label_path = file_path[1]
# 2 从路径中取出图片和标签数据的文件名保持到两个列表当中(返回值为图片的绝对路径)
self.imgs = self.read_file(self.img_path)
self.labels = self.read_file(self.label_path)
# 3 初始化数据处理函数设置
self.crop_size = crop_size
def __getitem__(self, index):
img = self.imgs[index]
label = self.labels[index]
# 从文件名中读取数据(图片和标签都是png格式的图像数据)
img = Image.open(img) # image为Image类型的数据,size为(width,height),但是为三通道彩色图片
label = Image.open(label).convert('RGB')
# 共同裁剪img和label
img, label = self.center_crop(img, label, self.crop_size)
# 共同transform img和label
img, label = self.img_transform(img, label)
sample = {'img': img, 'label': label}
return sample
def __len__(self):
return len(self.imgs)
def read_file(self, path):
"""从文件夹中读取数据"""
files_list = os.listdir(path) # 获得一个列表,其中包含该文件夹下所有文件的名称
file_path_list = [os.path.join(path, img) for img in files_list] # 获得一个列表,其中包含所有图片的绝对路径
file_path_list.sort()
return file_path_list
def center_crop(self, data, label, crop_size):
"""裁剪输入的图片和标签大小"""
data = ff.center_crop(data, crop_size)
label = ff.center_crop(label, crop_size)
return data, label
def img_transform(self, img, label):
"""对图片和标签做一些数值处理"""
label = np.array(label) # 以免不是np格式的数据
# 处理标签图像的函数接受image,所以还得将label转回image
label = Image.fromarray(label.astype('uint8')) # 把所有像素值转为整型
transform_img = transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]
)
img = transform_img(img)
label = label_processor.encode_label_img(label)
label = t.from_numpy(label)
return img, label
模型的建立 FCN.py
导入包
import numpy as np
import torch
from torchvision import models
from torch import nn
自定义双向线性插值卷积核
使用双线性插值的方法初始化卷积核
线性插值与双线性插值
代码实现
numpy.ogrid() 用法
注意:在该项目中返回的卷积核形状应为[in_channels, out_channels, kernel_size, kernel_size]
如图官方文档给出的weight说明(作为layer的Variables之一)
def bilinear_kernel(in_channels, out_channels, kernel_size):
"""Define a bilinear kernel according to in channels and out channels.
Returns:
return a bilinear filter tensor
"""
factor = (kernel_size + 1) // 2
if kernel_size % 2 == 1:
center = factor - 1
else:
center = factor - 0.5
og = np.ogrid[:kernel_size, :kernel_size]
bilinear_filter = (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor)
weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size), dtype=np.float32)
weight[range(in_channels), range(out_channels), :, :] = bilinear_filter
return torch.from_numpy(weight)
卷积核效果
例如bilinier_kernel(1, 1, 3)返回的卷积核为:
tensor([[[[0.2500, 0.5000, 0.2500],
[0.5000, 1.0000, 0.5000],
[0.2500, 0.5000, 0.2500]]]])
构建网络
卷积与反卷积尺寸计算公式
vgg16的结构
Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(9): ReLU(inplace=True)
(10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(12): ReLU(inplace=True)
(13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(16): ReLU(inplace=True)
(17): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): ReLU(inplace=True)
(20): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(21): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(26): ReLU(inplace=True)
(27): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(28): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(29): ReLU(inplace=True)
(30): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(31): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(32): ReLU(inplace=True)
(33): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(34): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(35): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(36): ReLU(inplace=True)
(37): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(38): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(39): ReLU(inplace=True)
(40): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(41): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(42): ReLU(inplace=True)
(43): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
代码实现
torch.nn.ConvTranspose2d() 用法
# 加载预训练模型
pretrained_net = models.vgg16_bn(pretrained=True)
class FCN(nn.Module):
def __init__(self, num_classes):
super().__init__()
self.stage1 = pretrained_net.features[:7] # 一个下采样块(每一个下采样块到pooling为止)
self.stage2 = pretrained_net.features[7:14]
self.stage3 = pretrained_net.features[14:24]
self.stage4 = pretrained_net.features[24:34]
self.stage5 = pretrained_net.features[34:]
# 以下两个层用来降维
self.conv_trans1 = nn.Conv2d(512, 256, 1)
self.conv_trans2 = nn.Conv2d(256, num_classes, 1)
self.upsample_8x = nn.ConvTranspose2d(num_classes, num_classes, 16, 8, 4, bias=False)
self.upsample_8x.weight.data = bilinear_kernel(num_classes, num_classes, 16)
self.upsample_2x_1 = nn.ConvTranspose2d(512, 512, 4, 2, 1, bias=False)
self.upsample_2x_1.weight.data = bilinear_kernel(512, 512, 4)
self.upsample_2x_2 = nn.ConvTranspose2d(256, 256, 4, 2, 1, bias=False)
self.upsample_2x_2.weight.data = bilinear_kernel(256, 256, 4)
def forward(self, x):
s1 = self.stage1(x) # (64, 176, 240)
s2 = self.stage2(s1) # (128, 88, 120)
s3 = self.stage3(s2) # (256, 44, 60)
s4 = self.stage4(s3) # (512, 22, 30)
s5 = self.stage5(s4) # (512, 11, 15)
scores1 = self.scores1(s5)
s5 = self.upsample_2x_1(s5) # (512, 22, 30)
add1 = s5 + s4 # (512, 22, 30)
scores2 = self.scores2(add1)
add1 = self.conv_trans1(add1) # (256, 22, 30)
add1 = self.upsample_2x_2(add1) # (256, 44, 60)
add2 = add1 + s3 # (256, 44, 60)
output = self.conv_trans2(add2) # (12, 44, 60)![在这里插入图片描述](https://img-blog.csdnimg.cn/536a2d63e0de4963874569bb98da8146.png?x-oss-process=image/watermark,type_ZHJvaWRzYW5zZmFsbGJhY2s,shadow_50,text_Q1NETiBAQnJvX0p1bg==,size_20,color_FFFFFF,t_70,g_se,x_16#pic_center)
output = self.upsample_8x(output) # (12, 352, 480)
return output
图像分割的指标计算
语义分割的几种评价指标
下图截取自方程文档: 其中,
N
i
j
N_{ij}
Nij?代表标签为
i
i
i、预测值为
j
j
j的像素点的数量
这几种指标中,
m
I
o
U
mIoU
mIoU最为重要 mIoU的计算
代码实现
导入包
from future import division 的作用
from __future__ import division
import numpy as np
import six
import cfg
计算混淆矩阵
numpy.bincount() 用法 next() 用法
def calc_semantic_segmentation_confusion(pred_labels, gt_labels):
pred_labels = iter(pred_labels) # (352, 480)
gt_labels = iter(gt_labels) # (352, 480)
n_class = cfg.DATASET[1]
confusion = np.zeros((n_class, n_class), dtype=np.int64) # (12, 12)
for pred_label, gt_label in six.moves.zip(pred_labels, gt_labels):
if pred_label.ndim != 2 or gt_label.ndim != 2:
raise ValueError('ndim of labels should be two.')
if pred_label.shape != gt_label.shape:
raise ValueError('Shape of ground truth and prediction should'
' be same.')
pred_label = pred_label.flatten() # (168960, )
gt_label = gt_label.flatten() # (168960, )
# Dynamically expand the confusion matrix if necessary.
# (可能实际的类别数超过了原本设定的类别数,如果这样,修正类别数)
lb_max = np.max((pred_label, gt_label))
if lb_max >= n_class:
expanded_confusion = np.zeros(
(lb_max + 1, lb_max + 1), dtype=np.int64)
expanded_confusion[0:n_class, 0:n_class] = confusion
n_class = lb_max + 1
confusion = expanded_confusion
# Count statistics from valid pixels. 极度巧妙 × class_nums 正好使得每个ij能够对应.
mask = gt_label >= 0 # 做一个掩码,过滤标签为负的像素点
confusion += np.bincount(
n_class * gt_label[mask].astype(int) + pred_label[mask], # 根据这行代码可以判断混淆矩阵的行表示真实标签,列表示预测值
minlength=n_class ** 2)\
.reshape((n_class, n_class))
for iter_ in (pred_labels, gt_labels):
# This code assumes any iterator does not contain None as its items.
if next(iter_, None) is not None:
raise ValueError('Length of input iterables need to be same')
return confusion
计算各种指标
numpy.nanmean() 用法
注意:关于nd.array.sum(axis=) 示例:
a = np.array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
print(a.shape)
b = a.sum(axis=0)
print(b)
print(b.shape)
c = a.sum(axis=1)
print(c)
print(c.shape)
d = a.sum()
print(d)
print(d.shape)
输出结果为:
(3, 3)
[12 15 18]
(3,)
[ 6 15 24]
(3,)
45
()
def calc_semantic_segmentation_iou(confusion):
# 分母等于混淆矩阵分别按行按列求和的数组加上对角线值的数组
iou_denominator = (confusion.sum(axis=1) + confusion.sum(axis=0)
- np.diag(confusion))
iou = np.diag(confusion) / iou_denominator
return iou
def eval_semantic_segmentation(pred_labels, gt_labels):
confusion = calc_semantic_segmentation_confusion(
pred_labels, gt_labels)
iou = calc_semantic_segmentation_iou(confusion) # (12, )
pixel_accuracy = np.diag(confusion).sum() / confusion.sum()
class_accuracy = np.diag(confusion) / (np.sum(confusion, axis=1) + 1e-10) # (12, )
return {'iou': iou, 'miou': np.nanmean(iou),
'pixel_accuracy': pixel_accuracy,
'class_accuracy': class_accuracy,
'mean_class_accuracy': np.nanmean(class_accuracy)}
train
导入包
import torch as t
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from datetime import datetime
from dataset import LoadDataset
from evalution_segmentaion import eval_semantic_segmentation
from Models import FCN
import cfg
导入数据以及实例化网络
device = t.device('cuda') if t.cuda.is_available() else t.device('cpu')
num_class = cfg.DATASET[1]
Load_train = LoadDataset([cfg.TRAIN_ROOT, cfg.TRAIN_LABEL], cfg.crop_size)
Load_val = LoadDataset([cfg.VAL_ROOT, cfg.VAL_LABEL], cfg.crop_size)
train_data = DataLoader(Load_train, batch_size=cfg.BATCH_SIZE, shuffle=True, num_workers=1)
val_data = DataLoader(Load_val, batch_size=cfg.BATCH_SIZE, shuffle=True, num_workers=1)
fcn = FCN.FCN(num_class)
fcn = fcn.to(device)
criterion = nn.NLLLoss().to(device)
optimizer = optim.Adam(fcn.parameters(), lr=1e-4)
train()
注意:optimizer的参数存放在optimizer.param_groups中,其格式为列表,如图为官方文档中的示例: 关于Variable
def train(model):
best = [0]
net = model.train()
# 训练轮次
for epoch in range(cfg.EPOCH_NUMBER):
print('Epoch is [{}/{}]'.format(epoch + 1, cfg.EPOCH_NUMBER))
# 学习率调整
if epoch % 50 == 0 and epoch != 0:
for group in optimizer.param_groups:
group['lr'] *= 0.5
train_loss = 0
train_acc = 0
train_miou = 0
train_class_acc = 0
# 训练批次
for i, sample in enumerate(train_data):
# 载入数据
img_data = Variable(sample['img'].to(device))
img_label = Variable(sample['label'].to(device))
# 训练
out = net(img_data)
out = F.log_softmax(out, dim=1)
loss = criterion(out, img_label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.item()
# 评估
pre_label = out.max(dim=1)[1].data.cpu().numpy()
pre_label = [i for i in pre_label] # 将batch中的每个图片分别放入列表中
true_label = img_label.data.cpu().numpy()
true_label = [i for i in true_label]
eval_metrix = eval_semantic_segmentation(pre_label, true_label)
train_acc += eval_metrix['mean_class_accuracy']
train_miou += eval_metrix['miou']
train_class_acc += eval_metrix['class_accuracy']
print('|batch[{}/{}]|batch_loss {: .8f}|'.format(i + 1, len(train_data), loss.item())) # train_data的长度是batch的数量
metric_description = '|Train Acc|: {:.5f}|Train Mean IU|: {:.5f}\n|Train_class_acc|:{:}'.format(
train_acc / len(train_data),
train_miou / len(train_data),
train_class_acc / len(train_data),
)
print(metric_description)
# 当这次epoch得到的指标是最好的时,将该指标存入best列表,并保存此时的模型
if max(best) <= train_miou / len(train_data):
best.append(train_miou / len(train_data))
t.save(net.state_dict(), './Results/weights/FCN_weight/{}.pth'.format(epoch))
evaluate()
def evaluate(model):
net = model.eval()
eval_loss = 0
eval_acc = 0
eval_miou = 0
eval_class_acc = 0
prec_time = datetime.now()
for j, sample in enumerate(val_data):
valImg = Variable(sample['img'].to(device))
valLabel = Variable(sample['label'].long().to(device))
out = net(valImg)
out = F.log_softmax(out, dim=1)
loss = criterion(out, valLabel)
eval_loss = loss.item() + eval_loss
pre_label = out.max(dim=1)[1].data.cpu().numpy()
pre_label = [i for i in pre_label]
true_label = valLabel.data.cpu().numpy()
true_label = [i for i in true_label]
eval_metrics = eval_semantic_segmentation(pre_label, true_label)
eval_acc = eval_metrics['mean_class_accuracy'] + eval_acc
eval_miou = eval_metrics['miou'] + eval_miou
cur_time = datetime.now()
h, remainder = divmod((cur_time - prec_time).seconds, 3600)
m, s = divmod(remainder, 60)
time_str = 'Time: {:.0f}:{:.0f}:{:.0f}'.format(h, m, s)
val_str = ('|Valid Loss|: {:.5f} \n|Valid Acc|: {:.5f} \n|Valid Mean IU|: {:.5f} \n|Valid Class Acc|:{:}'.format(
eval_loss / len(train_data),
eval_acc / len(val_data),
eval_miou / len(val_data),
eval_class_acc / len(val_data)))
print(val_str)
print(time_str)
train过程中放在GPU以及CPU上变量的汇总
GPU:
- 神经网络(FCN)
- Loss计算(nn.NLLLoss)
- 训练数据与标签数据
CPU:
- 评估指标时,要把预测数据和标签数据放在CPU、转numpy格式
test
导入包
import torch as t
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader
from evalution_segmentaion import eval_semantic_segmentation
from dataset import LoadDataset
from Models import FCN
import cfg
代码实现
和train过程差别不大
device = t.device('cuda') if t.cuda.is_available() else t.device('cpu')
num_class = cfg.DATASET[1]
BATCH_SIZE = 4
miou_list = [0]
Load_test = LoadDataset([cfg.TEST_ROOT, cfg.TEST_LABEL], cfg.crop_size)
test_data = DataLoader(Load_test, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
net = FCN.FCN(num_class)
net.eval()
net.to(device)
net.load_state_dict(t.load("./Results/weights/xxx.pth"))
train_acc = 0
train_miou = 0
train_class_acc = 0
train_mpa = 0
error = 0
for i, sample in enumerate(test_data):
data = Variable(sample['img']).to(device)
label = Variable(sample['label']).to(device)
out = net(data)
out = F.log_softmax(out, dim=1)
pre_label = out.max(dim=1)[1].data.cpu().numpy()
pre_label = [i for i in pre_label]
true_label = label.data.cpu().numpy()
true_label = [i for i in true_label]
eval_metrix = eval_semantic_segmentation(pre_label, true_label)
train_acc = eval_metrix['mean_class_accuracy'] + train_acc
train_miou = eval_metrix['miou'] + train_miou
train_mpa = eval_metrix['pixel_accuracy'] + train_mpa
if len(eval_metrix['class_accuracy']) < 12:
eval_metrix['class_accuracy'] = 0
train_class_acc = train_class_acc + eval_metrix['class_accuracy']
error += 1
else:
train_class_acc = train_class_acc + eval_metrix['class_accuracy']
print(eval_metrix['class_accuracy'], '================', i)
epoch_str = ('test_acc :{:.5f} ,test_miou:{:.5f}, test_mpa:{:.5f}, test_class_acc :{:}'.format(train_acc /(len(test_data)-error),
train_miou/(len(test_data)-error), train_mpa/(len(test_data)-error),
train_class_acc/(len(test_data)-error)))
# 每当得到一个当前的最佳指标,再输出
if train_miou/(len(test_data)-error) > max(miou_list):
miou_list.append(train_miou/(len(test_data)-error))
print(epoch_str+'==========last')
观察分割效果 predict
仅凭各项指标还不够评价网络的效果,还要观察预测图的实际的分割情况。
导入包
import pandas as pd
import numpy as np
import torch as t
import torch.nn.functional as F
from torch.utils.data import DataLoader
from PIL import Image
from dataset import LoadDataset
from Models import FCN
import cfg
代码实现
注意:tensor的max()方法返回值有两个,如图为官方文档中的说明:
关于nd.array索引值为一个nd.array: 示例:
colormap = np.array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
print(colormap.shape)
indices = np.array([[0, 1],
[2, 2]])
print(indices.shape)
result = colormap[indices]
print(result.shape)
print(result)
运行后输出为:
(3, 3)
(2, 2)
(2, 2, 3)
[[[1 2 3]
[4 5 6]]
[[7 8 9]
[7 8 9]]]
预测部分代码:
device = t.device('cuda') if t.cuda.is_available() else t.device('cpu')
num_class = cfg.DATASET[1]
Load_test = LoadDataset([cfg.TEST_ROOT, cfg.TEST_LABEL], cfg.crop_size)
test_data = DataLoader(Load_test, batch_size=1, shuffle=True, num_workers=4)
net = FCN.FCN(num_class).to(device)
### 加载模型
net.load_state_dict(t.load("./Results/weights/xxx.pth"))
net.eval()
# 生成colormap,与处理标签图像时无异
pd_label_color = pd.read_csv(cfg.class_dict_path, sep=',')
name_value = pd_label_color['name'].values
num_class = len(name_value)
colormap = []
for i in range(num_class):
tmp = pd_label_color.iloc[i]
color = [tmp['r'], tmp['g'], tmp['b']]
colormap.append(color)
cm = np.array(colormap).astype('uint8')
# 设定输出图片的路径
dir = "./Results/result_pics/"
for i, sample in enumerate(test_data):
valImg = sample['img'].to(device)
valLabel = sample['label'].long().to(device)
out = net(valImg)
out = F.log_softmax(out, dim=1) # FAN中的out: (1, 12, 352, 480)
pre_label = out.max(1)[1].squeeze().cpu().data.numpy() # squeeze(): 删除长度为1的维度,也可以指定
pre = cm[pre_label]
pre1 = Image.fromarray(pre)
pre1.save(dir + str(i) + '.png')
print('Done')
|