Camvid数据集
官网 :http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/
类别数:Object Recognition in Video Dataset (cam.ac.uk)
1. 简介
1.1 下载
CamVid (Cambridge-Driving Labeled Video Database) | Kaggle
1.1 简介
CamVid全称:The Cambridge-driving Labeled Video Database,该数据集由剑桥大学工程系于 2008 年发布,相关论文有《Segmentation and Recognition Using Structure from Motion Point Clouds》,是第一个具有目标类别语义标签的视频集合。数据库提供32个ground truth语义标签,将每个像素与语义类别之一相关联。该数据库解决了对实验数据的需求,以定量评估新兴算法。数据是从驾驶汽车的角度拍摄的,驾驶场景增加了观察目标的数量和异质性。
1.2 格式介绍
class_dict.csv内容如下
Camvid 数据一共11个类。加上背景一共12类
class_11 0表示数据中没有这个类,1表示有这个类
name | r | g | b | class_11 |
---|
Animal | 64 | 128 | 64 | 0 | Archway | 192 | 0 | 128 | 0 | Bicyclist | 0 | 128 | 192 | 1 | Bridge | 0 | 128 | 64 | 0 | Building | 128 | 0 | 0 | 1 | Car | 64 | 0 | 128 | 1 | CartLuggagePram | 64 | 0 | 192 | 0 | Child | 192 | 128 | 64 | 0 | Column_Pole | 192 | 192 | 128 | 1 | Fence | 64 | 64 | 128 | 1 | LaneMkgsDriv | 128 | 0 | 192 | 0 | LaneMkgsNonDriv | 192 | 0 | 64 | 0 | Misc_Text | 128 | 128 | 64 | 0 | MotorcycleScooter | 192 | 0 | 192 | 0 | OtherMoving | 128 | 64 | 64 | 0 | ParkingBlock | 64 | 192 | 128 | 0 | Pedestrian | 64 | 64 | 0 | 1 | Road | 128 | 64 | 128 | 1 | RoadShoulder | 128 | 128 | 192 | 0 | Sidewalk | 0 | 0 | 192 | 1 | SignSymbol | 192 | 128 | 128 | 1 | Sky | 128 | 128 | 128 | 1 | SUVPickupTruck | 64 | 128 | 192 | 0 | TrafficCone | 0 | 0 | 64 | 0 | TrafficLight | 0 | 64 | 64 | 0 | Train | 192 | 64 | 128 | 0 | Tree | 128 | 128 | 0 | 1 | Truck_Bus | 192 | 128 | 192 | 0 | Tunnel | 64 | 0 | 64 | 0 | VegetationMisc | 192 | 192 | 0 | 0 | Void | 0 | 0 | 0 | 0 | Wall | 64 | 192 | 0 | 0 |
2. 制作dataloader
import glob
import os
from torchvision import transforms
import torch.nn as nn
import torch
from torch.nn import functional as F
from PIL import Image
import numpy as np
import pandas as pd
import random
import numbers
import torchvision
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import InterpolationMode
def bool_to_num(num):
number = np.zeros(num.shape)
height = num.shape[0]
width = num.shape[1]
for h in range(height):
for w in range(width):
if num[h][w] == True:
number[h][w] = 1
else:
number[h][w] = 0
return number
def poly_lr_scheduler(optimizer, init_lr, iter, lr_decay_iter=1,
max_iter=300, power=0.9):
"""Polynomial decay of learning rate
:param init_lr is base learning rate
:param iter is a current iteration
:param lr_decay_iter how frequently decay occurs, default is 1
:param max_iter is number of maximum iterations
:param power is a polymomial power
"""
lr = init_lr * (1 - iter / max_iter) ** power
optimizer.param_groups[0]['lr'] = lr
return lr
def get_label_info(csv_path):
ann = pd.read_csv(csv_path)
label = {}
for iter, row in ann.iterrows():
label_name = row['name']
r = row['r']
g = row['g']
b = row['b']
class_11 = row['class_11']
label[label_name] = [int(r), int(g), int(b), class_11]
return label
def get_label_info_2():
return {
'Animal': [64, 128, 64, 0],
'Archway': [192, 0, 128, 0],
'Bicyclist': [0, 128, 192, 1],
'Bridge': [0, 128, 64, 0],
'Building': [128, 0, 0, 1],
'Car': [64, 0, 128, 1],
'CartLuggagePram': [64, 0, 192, 0],
'Child': [192, 128, 64, 0],
'Column_Pole': [192, 192, 128, 1],
'Fence': [64, 64, 128, 1],
'LaneMkgsDriv': [128, 0, 192, 0],
'LaneMkgsNonDriv': [192, 0, 64, 0],
'Misc_Text': [128, 128, 64, 0],
'MotorcycleScooter': [192, 0, 192, 0],
'OtherMoving': [128, 64, 64, 0],
'ParkingBlock': [64, 192, 128, 0],
'Pedestrian': [64, 64, 0, 1],
'Road': [128, 64, 128, 1],
'RoadShoulder': [128, 128, 192, 0],
'Sidewalk': [0, 0, 192, 1],
'SignSymbol': [192, 128, 128, 1],
'Sky': [128, 128, 128, 1],
'SUVPickupTruck': [64, 128, 192, 0],
'TrafficCone': [0, 0, 64, 0],
'TrafficLight': [0, 64, 64, 0],
'Train': [192, 64, 128, 0],
'Tree': [128, 128, 0, 1],
'Truck_Bus': [192, 128, 192, 0],
'Tunnel': [64, 0, 64, 0],
'VegetationMisc': [192, 192, 0, 0],
'Void': [0, 0, 0, 0],
'Wall': [64, 192, 0, 0]
}
def one_hot_it(label, label_info):
"""
Args:
label:
label_info:
Returns:
"""
semantic_map = np.zeros(label.shape[:-1])
for index, info in enumerate(label_info):
color = label_info[info]
equality = np.equal(label, color)
class_map = np.all(equality, axis=-1)
semantic_map[class_map] = index
return semantic_map
def one_hot_it_v11(label, label_info):
"""
返回图片信息 -> 转换成one-hot 编码
Args:
label:
label_info:
Returns:
"""
semantic_map = np.zeros(label.shape[:-1])
class_index = 0
for index, info in enumerate(label_info):
color = label_info[info][:3]
class_11 = label_info[info][3]
if class_11 == 1:
equality = np.equal(label, color)
class_map = np.all(equality, axis=-1)
semantic_map[class_map] = class_index
class_index += 1
else:
equality = np.equal(label, color)
class_map = np.all(equality, axis=-1)
semantic_map[class_map] = 11
return semantic_map
def one_hot_it_v11_dice(label, label_info):
semantic_map = []
void = np.zeros(label.shape[:2])
dis_all = np.zeros(label.shape[:2])
for index, info in enumerate(label_info):
color = label_info[info][:3]
class_11 = label_info[info][3]
if class_11 == 1:
equality = np.equal(label, color)
class_map = np.all(equality, axis=-1)
dis_all[class_map] = 1
semantic_map.append(class_map)
else:
equality = np.equal(label, color)
class_map = np.all(equality, axis=-1)
void[class_map] = 1
semantic_map.append(void)
semantic_map = np.stack(semantic_map, axis=-1).astype(np.float32)
return semantic_map
def reverse_one_hot(image):
"""
转换一个2D数组为一热格式(depth为num_classes),
到一个只有一个通道的2D数组,其中每个像素值是
分类密钥。
# Arguments
image: The one-hot format image
# Returns
A 2D array with the same width and height as the input, but
with a depth size of 1, where each pixel value is the classified
class key.
"""
image = image.permute(1, 2, 0)
x = torch.argmax(image, dim=-1)
return x
def colour_code_segmentation(image, label_values):
"""
Given a 1-channel array of class keys, colour code the segmentation results.
# Arguments
image: single channel array where each value represents the class key.
label_values
# Returns
Colour coded image for segmentation visualization
"""
label_values = [label_values[key][:3] for key in label_values if label_values[key][3] == 1]
label_values.append([0, 0, 0])
colour_codes = np.array(label_values)
x = colour_codes[image.astype(int)]
return x
def compute_global_accuracy(pred, label):
pred = pred.flatten()
label = label.flatten()
total = len(label)
count = 0.0
for i in range(total):
if pred[i] == label[i]:
count = count + 1.0
return float(count) / float(total)
def fast_hist(a, b, n):
'''
a and b are predict and mask respectively
n is the number of classes
'''
k = (a >= 0) & (a < n)
return np.bincount(n * a[k].astype(int) + b[k], minlength=n ** 2).reshape(n, n)
def per_class_iu(hist):
epsilon = 1e-5
return (np.diag(hist) + epsilon) / (hist.sum(1) + hist.sum(0) - np.diag(hist) + epsilon)
class RandomCrop(object):
"""Crop the given PIL Image at a random location.
Args:
size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is
made.
padding (int or sequence, optional): Optional padding on each border
of the image. Default is 0, i.e no padding. If a sequence of length
4 is provided, it is used to pad left, top, right, bottom borders
respectively.
pad_if_needed (boolean): It will pad the image if smaller than the
desired size to avoid raising an exception.
"""
def __init__(self, size, seed, padding=0, pad_if_needed=False):
if isinstance(size, numbers.Number):
self.size = (int(size), int(size))
else:
self.size = size
self.padding = padding
self.pad_if_needed = pad_if_needed
self.seed = seed
@staticmethod
def get_params(img, output_size, seed):
"""Get parameters for ``crop`` for a random crop.
Args:
img (PIL Image): Image to be cropped.
output_size (tuple): Expected output size of the crop.
Returns:
tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
"""
random.seed(seed)
w, h = img.size
th, tw = output_size
if w == tw and h == th:
return 0, 0, h, w
i = random.randint(0, h - th)
j = random.randint(0, w - tw)
return i, j, th, tw
def __call__(self, img):
"""
Args:
img (PIL Image): Image to be cropped.
Returns:
PIL Image: Cropped image.
"""
if self.padding > 0:
img = torchvision.transforms.functional.pad(img, self.padding)
if self.pad_if_needed and img.size[0] < self.size[1]:
img = torchvision.transforms.functional.pad(img, (int((1 + self.size[1] - img.size[0]) / 2), 0))
if self.pad_if_needed and img.size[1] < self.size[0]:
img = torchvision.transforms.functional.pad(img, (0, int((1 + self.size[0] - img.size[1]) / 2)))
i, j, h, w = self.get_params(img, self.size, self.seed)
return torchvision.transforms.functional.crop(img, i, j, h, w)
def __repr__(self):
return self.__class__.__name__ + '(size={0}, padding={1})'.format(self.size, self.padding)
def cal_miou(miou_list, csv_path):
ann = pd.read_csv(csv_path)
miou_dict = {}
cnt = 0
for iter, row in ann.iterrows():
label_name = row['name']
class_11 = int(row['class_11'])
if class_11 == 1:
miou_dict[label_name] = miou_list[cnt]
cnt += 1
return miou_dict, np.mean(miou_list)
class OHEM_CrossEntroy_Loss(nn.Module):
def __init__(self, threshold, keep_num):
super(OHEM_CrossEntroy_Loss, self).__init__()
self.threshold = threshold
self.keep_num = keep_num
self.loss_function = nn.CrossEntropyLoss(reduction='none')
def forward(self, output, target):
loss = self.loss_function(output, target).view(-1)
loss, loss_index = torch.sort(loss, descending=True)
threshold_in_keep_num = loss[self.keep_num]
if threshold_in_keep_num > self.threshold:
loss = loss[loss > self.threshold]
else:
loss = loss[:self.keep_num]
return torch.mean(loss)
def group_weight(weight_group, module, norm_layer, lr):
group_decay = []
group_no_decay = []
for m in module.modules():
if isinstance(m, nn.Linear):
group_decay.append(m.weight)
if m.bias is not None:
group_no_decay.append(m.bias)
elif isinstance(m, (nn.Conv2d, nn.Conv3d)):
group_decay.append(m.weight)
if m.bias is not None:
group_no_decay.append(m.bias)
elif isinstance(m, norm_layer) or isinstance(m, nn.GroupNorm):
if m.weight is not None:
group_no_decay.append(m.weight)
if m.bias is not None:
group_no_decay.append(m.bias)
assert len(list(module.parameters())) == len(group_decay) + len(
group_no_decay)
weight_group.append(dict(params=group_decay, lr=lr))
weight_group.append(dict(params=group_no_decay, weight_decay=.0, lr=lr))
return weight_group
def augmentation():
pass
def augmentation_pixel():
pass
class CamVid(Dataset):
"""
因为数据集较少,需要进行数据加强
"""
def __init__(self, image_path, label_path, csv_path, scale, loss='crossentropy', mode='train'):
super().__init__()
self.mode = mode
self.image_list = []
if not isinstance(image_path, list):
image_path = [image_path]
for image_path_ in image_path:
self.image_list.extend(glob.glob(os.path.join(image_path_, '*.png')))
self.image_list.sort()
self.label_list = []
if not isinstance(label_path, list):
label_path = [label_path]
for label_path_ in label_path:
self.label_list.extend(glob.glob(os.path.join(label_path_, '*.png')))
self.label_list.sort()
self.label_info = get_label_info(csv_path)
self.to_tensor = transforms.Compose([
transforms.ToTensor(),
])
self.image_size = scale
self.scale = [0.5, 1, 1.25, 1.5, 1.75, 2]
self.loss = loss
def __getitem__(self, index):
seed = random.random()
img = Image.open(self.image_list[index])
scale = random.choice(self.scale)
scale = (int(self.image_size[0] * scale), int(self.image_size[1] * scale))
if self.mode == 'train':
img = transforms.Resize(scale, InterpolationMode.BICUBIC)(img)
img = RandomCrop(self.image_size, seed, pad_if_needed=True)(img)
img = np.array(img)
img = Image.fromarray(img)
img = self.to_tensor(img).float()
label = Image.open(self.label_list[index])
if self.mode == 'train':
label = transforms.Resize(scale, InterpolationMode.NEAREST)(label)
label = RandomCrop(self.image_size, seed, pad_if_needed=True)(label)
label = np.array(label)
if self.loss == 'dice':
label = one_hot_it_v11_dice(label, self.label_info).astype(np.uint8)
label = np.transpose(label, [2, 0, 1]).astype(np.float32)
label = torch.from_numpy(label)
return img, label
elif self.loss == 'crossentropy':
label = one_hot_it_v11(label, self.label_info)
label = torch.from_numpy(label).long()
return img, label
def __len__(self):
return len(self.image_list)
def get_dataloader(mode=True, batch_size=2):
path = r"E:\note\cv\data\CamVid"
train_path = os.path.join(path, "train")
val_path = os.path.join(path, "val")
train_labels_path = os.path.join(path, "train_labels")
val_labels_path = os.path.join(path, "val_labels")
class_dict_path = os.path.join(path, "class_dict.csv")
test_path = os.path.join(path, "test")
test_labels_path = os.path.join(path, "test_labels")
if mode:
train_dataset = CamVid([train_path, val_path],
[train_labels_path, val_labels_path], class_dict_path,
(720, 960), loss='crossentropy', mode='train')
loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True, )
else:
train_dataset = CamVid([test_path],
[test_labels_path], class_dict_path,
(720, 960), loss='crossentropy', mode='val')
loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True, )
return loader
def show_image(image):
plt.figure()
image = image.numpy().transpose(1, 2, 0)
print(image)
plt.imshow(image)
plt.show()
def show_label(mask):
plt.figure()
image = mask.numpy()
print(np.unique(np.array(image)))
plt.imshow(image, cmap="gray")
plt.title("label")
plt.show()
if __name__ == '__main__':
train_dataloader = get_dataloader(True, 2)
for images, labels in train_dataloader:
print(images.shape)
print(labels.shape)
show_image(images[0])
show_label(labels[0].argmax(dim=0))
break
参考资料
CamVid数据集介绍及读取,附代码(Pytorch版本)_牛哥说的博客-CSDN博客_camvid
|