无论是做检测还是分割,大多数数据集都会以VOC格式或COCO数据集来准备。本文将立足于目标检测,对VOC数据集进行分析。
VOC数据集的格式为:
$ tree -L 1 VOCdevkit/VOC2007
VOCdevkit/VOC2007
├── Annotations
├── ImageSets
├── JPEGImages
├── SegmentationClass
└── SegmentationObject
5 directories, 0 files
1 检查数据集
检查数据集的目地是对数据质量、数据的整体指标做一个分析。 主要检查有:
- 图片没有标注数文件及有标注文件没有图片的数据,不区分训练验证和测试。对于正确的直接跳过,对于错误的在对应文件夹下生成result文件,把有图没标注和有标注没图的文件合部保存进去
- 检查标注文件中没有图片尺寸的情况
- 检查标注文件中有框但没有类名称的情况
- 获取数据集的统计信息:
- 图片总数、标注框总数及每张图的平均框数
- 按类进行图片数和标注框数量的统计
- 统计标注文件中所有的标注类别(有错误类别如车标注成car和carr,不论对错全部统计)
- 单图标注框数量的分布(已画图)
- 图片高度分布(未画图)
- 图片宽度分布(未画图)
- 图片面积分布(未画图)
- 图片高宽比分布(已画图)
- 按类别进行框与图面积比
这些统计值的应用主要是加深对数据的理解,有助于针对性的提升检测精度
import os
import shutil
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from collections import defaultdict
import xml.etree.ElementTree as ET
def check(year='VOC2007',show=False):
"""
输入数据文件名,返回有图没标注文件和有标注文件没图的数据路径
"""
data_path=os.path.join("VOCdevkit",year)
imgs_path = os.path.join(data_path,'JPEGImages')
anns_path = os.path.join(data_path,'Annotations')
img_names = set([os.path.splitext(i)[0] for i in os.listdir(imgs_path)])
ann_names = set([os.path.splitext(i)[0] for i in os.listdir(anns_path)])
print("########################################################################################数据集{}检验结果如下:######################################################################################################".format(year))
if not len(img_names):
print(' 该数据集没有图片')
return
img_ann = img_names-ann_names
ann_img = ann_names-img_names
if len(img_ann):
print(" 有图片没标注文件的图片是:{} 等(只列前50个) 注意检查这些图片是否是背景图片".format({v for k,v in enumerate(img_ann) if k<50}))
else:
print(" 所有图片都有对应标注文件")
if len(ann_img):
print(" 有标注文件没有图片的标注文件是:{}(只列前50个)".format({v for k,v in enumerate(ann_img) if k<50}))
else:
print(" 所有标注文件都有对应图片")
result_path = os.path.join(data_path,year+'_result')
if os.path.exists(result_path):
print(' 结果文件{}已经存在,请检查'.format(result_path))
if len(ann_img)+len(img_ann):
if (not os.path.exists(result_path)):
os.makedirs(result_path)
else:
print(' 存在有图无标注或有标注无图的文件,另结果文件{}已经存在,请检查'.format(result_path))
img_anns = [os.path.join(imgs_path,i+'.jpg') for i in img_ann]
ann_imgs = [os.path.join(anns_path,i+'.xml') for i in ann_img]
if len(img_anns):
for img in img_anns:
shutil.move(img,result_path)
print(' 移动只有图无标注文件完成')
if len(ann_img):
for ann in ann_imgs:
shutil.move(ann,result_path)
print(' 移动只有标注文件无图完成')
ann_names_new = [os.path.join(anns_path,i) for i in os.listdir(anns_path)]
total_images_num = len(ann_names_new)
classes=list()
img_boxes = list()
hw_percents = list()
num_imgs = defaultdict(int)
num_boxes = dict()
h_imgs = list()
w_imgs = list()
area_imgs = list()
h_boxes = defaultdict(list)
w_boxes = defaultdict(list)
area_boxes = defaultdict(list)
area_percents = defaultdict(list)
for ann in tqdm(ann_names_new):
try:
in_file=open(ann)
tree=ET.parse(in_file)
except:
print("打开标注文件失败:",ann)
root =tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
img_area = w * h
if img_area< 100:
print(f"有标注文件{ann}无图片尺寸,将被处理")
shutil.move(ann,result_path)
im_path=os.path.join(ann.split(os.sep)[0],ann.split(os.sep)[1],'JPEGImages',os.path.splitext(ann)[0].split(os.sep)[-1]+'.jpg')
shutil.move(im_path,result_path)
continue
img_boxes.append(len(root.findall('object')))
if not len(root.findall('object')):
print(f"有标注文件{ann}但没有标注框,将被处理")
shutil.move(ann,result_path)
i_path=os.path.join(ann.split(os.sep)[0],ann.split(os.sep)[1],'JPEGImages',os.path.splitext(ann)[0].split(os.sep)[-1]+'.jpg')
shutil.move(i_path,result_path)
continue
img_classes=[]
ok_flag=True
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls_name = obj.find('name').text
if isinstance(cls_name,type(None)) :
print(f"标注框类名有问题,标注文件将被处理,类名:{cls_name},标注文件:{ann}")
shutil.move(ann,result_path)
ok_flag=False
continue
elif isinstance(cls_name,str) and len(cls_name)<2:
ok_flag=False
print(f"标注框类名有问题,标注文件将被处理,类名:{cls_name},标注文件:{ann}")
shutil.move(ann,result_path)
continue
else:
pass
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
if int(b[1]-b[0])==0 or int(b[3]-b[2])==0:
ok_flag=False
print(f"有零存在,框为点或直线,将被处理,边框:{b},标注文件:{ann},类名称:{cls_name}")
shutil.move(ann,result_path)
box_area = (b[1]-b[0])*(b[3]-b[2])
area_percent = round(np.sqrt(box_area/float(img_area)),3)*100
hw_percents.append(float(h/w))
if not (cls_name in classes):
classes.append(cls_name)
img_classes.append(cls_name)
num_boxes[cls_name]= num_boxes.get(cls_name,0)+1
h_boxes[cls_name].append(int(b[3]-b[2]))
w_boxes[cls_name].append(int(b[1]-b[0]))
area_boxes[cls_name].append(int(box_area))
area_percents[cls_name].append(area_percent)
if ok_flag:
h_imgs.append(h)
w_imgs.append(w)
area_imgs.append(img_area)
for img_cls_name in set(img_classes):
num_imgs[img_cls_name] = num_imgs.get(img_cls_name,0)+1
classes=sorted(classes)
print(f"数据集{year}一共有{total_images_num}张合格的标注图片,{sum(img_boxes)}个标注框,平均每张图有{round(sum(img_boxes)/total_images_num,2)}个标注框;一共有{len(classes)}个分类,分别是{classes};图片中标注框个数最少是{min(img_boxes)}, \
最多是{max(img_boxes)}.图片高度最小值是{min(h_imgs)},最大值是{max(h_imgs)};图片宽度最小值是{min(w_imgs)},最大值是{max(w_imgs)}; \
图片面积最小值是{min(area_imgs)},最大值是{max(area_imgs)} ;图片高宽比最小值是{round(min(hw_percents),2)},图片高宽比最大值是{round(max(hw_percents),2)}")
num_imgs_class = [num_imgs[class_name] for class_name in classes]
num_boxes_class = [num_boxes[class_name] for class_name in classes]
min_h_boxes = [min(h_boxes[class_name]) for class_name in classes]
max_h_boxes = [max(h_boxes[class_name]) for class_name in classes]
min_w_boxes = [min(w_boxes[class_name]) for class_name in classes]
max_w_boxes = [max(w_boxes[class_name]) for class_name in classes]
min_area_boxes = [min(area_boxes[class_name]) for class_name in classes]
max_area_boxes = [max(area_boxes[class_name]) for class_name in classes]
min_area_percents = [min(area_percents[class_name]) for class_name in classes]
max_area_percents = [max(area_percents[class_name]) for class_name in classes]
result = {'cls_names':classes,'images':num_imgs_class,'objects':num_boxes_class,'min_h_bbox':min_h_boxes,'max_h_bbox':max_h_boxes,'min_w_bbox':min_w_boxes,
'max_w_bbox':max_w_boxes,'min_area_bbox':min_area_boxes,'max_area_bbox':max_area_boxes,'min_area_box/img':min_area_percents,'max_area_box/img':max_area_percents}
pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)
pd.set_option('max_colwidth',50)
pd.set_option('expand_frame_repr', False)
result_df = pd.DataFrame(result)
print(result_df)
if show:
plt.figure(figsize=(15,6.4))
x1 = [i+4*i for i in range(len(classes))]
x2 = [i+2 for i in x1]
y1= [int(num_boxes[cl]) for cl in classes]
y2 = [int(num_imgs[cl]) for cl in classes]
lb1=["" for i in x1]
lb2=classes
plt.bar(x1,y1,alpha=0.7,width=2,color='b',label='objects',tick_label=lb1)
plt.bar(x2,y2,alpha=0.7,width=2,color='r',label='images',tick_label=lb2)
plt.xticks(rotation=45)
plt.legend()
plt.figure(figsize=(15,6.4))
a = 1
group_num= int((max(img_boxes) - min(img_boxes)) / a)
n,bins,patches=plt.hist(x=img_boxes,bins=group_num,color='c',edgecolor='red',density=False,rwidth=0.8)
for k in range(len(n)):
plt.text(bins[k], n[k]*1.02, int(n[k]), fontsize=12, horizontalalignment="center")
distance=int((max(img_boxes)-min(img_boxes)) /group_num)
if distance<1:
distance=1
plt.xticks(range(min(img_boxes),max(img_boxes)+2,distance),fontsize=8)
plt.xlabel('number of bbox in each image')
plt.ylabel('image numbers')
plt.xticks(rotation=45)
plt.title(f"The number of bbox min:{round(np.min(img_boxes),2)},max:{round(np.max(img_boxes),2)} \n mean:{round(np.mean(img_boxes),2)} std:{round(np.std(img_boxes),2)}")
plt.grid(True)
plt.tight_layout()
plt.figure(figsize=(15,6.4))
a = 0.1
group_num= int((max(hw_percents) - min(hw_percents)) / a)
n,bins,patches=plt.hist(x=hw_percents,bins=group_num,color='c',edgecolor='red',density=False,rwidth=0.8)
for k in range(len(n)):
plt.text(bins[k], n[k]*1.02, int(n[k]), fontsize=12, horizontalalignment="center")
distance=int((max(hw_percents)-min(hw_percents)) /group_num)
if distance<1:
distance=1
plt.xticks(range(int(min(hw_percents)),int(max(hw_percents))+2,distance),fontsize=8)
plt.xlabel('image height/width in each image')
plt.ylabel('image numbers')
plt.xticks(rotation=45)
plt.title(f"image height/width min:{round(np.min(hw_percents))},max:{round(np.max(hw_percents),2)} \n mean:{round(np.mean(hw_percents),2)} std:{round(np.std(hw_percents),2)}")
plt.grid(True)
plt.tight_layout()
plt.figure(figsize=(8*3,8*round(len(classes)/3)))
for i,name in enumerate(classes):
plt.subplot(int(np.ceil(len(classes)/3)),3,i+1)
a = 5
group_num= int((max(area_percents[name]) - min(area_percents[name])) / a)
n,bins,patches=plt.hist(x=area_percents[name],bins=group_num,color='c',edgecolor='red',density=False,rwidth=0.8)
for k in range(len(n)):
plt.text(bins[k], n[k]*1.02, int(n[k]), fontsize=12, horizontalalignment="center")
distance=int((max(area_percents[name])-min(area_percents[name])) /group_num)
if distance<1:
distance=1
plt.xticks(range(int(min(area_percents[name])),int(max(area_percents[name]))+2,distance),fontsize=8)
plt.xlabel('area percent bbox/img')
plt.ylabel('boxes numbers')
plt.xticks(rotation=45)
plt.title(f"id {i+1} class {name} area percent min:{round(np.min(area_percents[name]),2)},max:{round(np.max(area_percents[name]),2)} \n mean:{round(np.mean(area_percents[name]),2)} std:{round(np.std(area_percents[name]),2)}")
plt.grid(True)
plt.tight_layout()
以VOC2007数据集为例,查看一下处理结果
check('VOC2007',True)
########################################################################################数据集VOC2007检验结果如下:######################################################################################################
所有图片都有对应标注文件
所有标注文件都有对应图片
100%|██████████| 9963/9963 [00:02<00:00, 4454.07it/s]
数据集VOC2007一共有9963张合格的标注图片,30638个标注框,平均每张图有3.08个标注框;一共有20个分类,分别是['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'];图片中标注框个数最少是1, 最多是42.图片高度最小值是96,最大值是500;图片宽度最小值是127,最大值是500; 图片面积最小值是43090,最大值是250000 ;图片高宽比最小值是0.19,图片高宽比最大值是3.21
cls_names images objects min_h_bbox max_h_bbox min_w_bbox max_w_bbox min_area_bbox max_area_bbox min_area_box/img max_area_box/img
0 aeroplane 445 642 13 465 14 499 208 213642 3.4 99.8
1 bicycle 505 807 9 499 10 499 110 186626 2.4 99.8
2 bird 622 1175 9 490 5 498 126 217070 2.6 99.5
3 boat 364 791 5 497 4 499 44 186127 1.6 99.6
4 bottle 502 1291 10 499 4 468 80 152195 2.1 99.7
5 bus 380 526 9 475 12 499 198 188325 3.2 99.7
6 car 1536 3185 4 497 6 499 48 196560 1.6 99.7
7 cat 676 759 22 499 25 499 625 241056 5.8 99.7
8 chair 1117 2806 5 499 4 499 78 212768 2.0 99.8
9 cow 273 685 7 490 7 499 56 179280 1.8 97.8
10 diningtable 510 609 10 476 21 499 567 185754 5.7 99.5
11 dog 863 1068 10 499 10 499 100 232035 2.3 99.7
12 horse 573 801 21 499 11 499 297 197691 4.0 99.3
13 motorbike 482 759 18 498 7 499 182 198000 3.1 99.4
14 person 4192 10674 8 499 4 499 48 248003 1.7 99.8
15 pottedplant 527 1217 6 499 6 498 84 226080 2.1 99.6
16 sheep 195 664 5 482 9 485 45 226980 1.5 96.2
17 sofa 727 821 31 499 24 499 1638 209237 9.4 99.8
18 train 522 630 20 499 20 499 725 186127 6.2 99.6
19 tvmonitor 534 728 11 498 11 499 176 185754 3.4 99.5
2 移动特定分类的数据
应用场景就是把某些分类数据从原始数据集中移出,比如人工标注的数据集有些标错了,如把car标成carr,会改变原始数据集,该工作也是接着上一步操作进行.移出原则是图中有一个框在要移出类别中,即要将整个图片和他对应的标注文件移出。对于移出后的数据,我们可以进行进一步处理。 移出后保存但置对应数VOCdevkit/数据名/据集名_result
import os
import shutil
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict
import xml.etree.ElementTree as ET
def remove_classes(year='VOC2007',classes=None):
"""
输入数据文件名,将指定分类数据移出
classes:如果是None,那么保持原数据集不变,否则是个列表,列出要移动的分类即可
"""
data_path=os.path.join("VOCdevkit",year)
imgs_path = os.path.join(data_path,'JPEGImages')
anns_path = os.path.join(data_path,'Annotations')
if not len(os.listdir(imgs_path)):
print(' 该数据集没有图片')
return
result_path = os.path.join(data_path,year+'_result')
if os.path.exists(result_path):
print(' 结果文件{}已经存在,请检查'.format(result_path))
else:
os.makedirs(result_path)
if classes is not None:
source_anns=os.listdir(anns_path)
for source_ann in tqdm(source_anns):
tree = ET.parse(os.path.join(anns_path,source_ann))
root = tree.getroot()
result = root.findall("object")
for obj in result:
if obj.find("name").text in classes:
shutil.move(os.path.join(anns_path,source_ann),result_path)
img_path = os.path.join(data_path,'JPEGImages',os.path.splitext(source_ann)[0])+'.jpg'
shutil.move(img_path,result_path)
break
else:
pass
比如数据集VOC2007,20个分类中车辆类有car,bicycle,motobike,bus四类,我们将其移出
remove_classes(year='VOC2007',classes=["bus",'bicycle','car','motorbike'])
100%|██████████| 9963/9963 [00:00<00:00, 19330.61it/s]
查看一下移出后结果:
$ tree -L 1 VOCdevkit/VOC2007
VOCdevkit/VOC2007
├── Annotations
├── ImageSets
├── JPEGImages
├── SegmentationClass
├── SegmentationObject
└── VOC2007_result #这是移出后数据的存放位置
6 directories, 0 files
看一下VOC2007_result中的结果:
$ tree -L 1 VOCdevkit/VOC2007/VOC2007_result/
VOCdevkit/VOC2007/VOC2007_result/
├── 000004.jpg
├── 000004.xml
├── 000007.jpg
├── 000007.xml
├── 000012.jpg
├── 000012.xml
......
├── 009959.xml
├── 009963.jpg
└── 009963.xml
0 directories, 4428 files
接着再看一下VOC2007数据集,移出4个分类以后,应该还有16个分类:
check('VOC2007')
########################################################################################数据集VOC2007检验结果如下:######################################################################################################
所有图片都有对应标注文件
所有标注文件都有对应图片
结果文件VOCdevkit/VOC2007/VOC2007_result已经存在,请检查
100%|██████████| 7362/7362 [00:00<00:00, 11036.42it/s]
数据集VOC2007一共有7362张合格的标注图片,21430个标注框,平均每张图有2.91个标注框;一共有16个分类,分别是['aeroplane', 'bird', 'boat', 'bottle', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'];图片中标注框个数最少是1, 最多是42.图片高度最小值是99,最大值是500;图片宽度最小值是127,最大值是500; 图片面积最小值是43750,最大值是250000 ;图片高宽比最小值是0.2,图片高宽比最大值是3.15
cls_names images objects min_h_bbox max_h_bbox min_w_bbox max_w_bbox min_area_bbox max_area_bbox min_area_box/img max_area_box/img
0 aeroplane 426 618 13 465 14 499 208 213642 3.4 99.8
1 bird 618 1171 9 490 5 498 126 217070 2.6 99.5
2 boat 342 743 5 497 4 499 44 186127 1.6 99.6
3 bottle 485 1264 10 499 4 468 80 152195 2.1 99.7
4 cat 668 751 22 499 25 499 625 241056 5.8 99.7
5 chair 1082 2740 5 499 4 499 78 212768 2.0 99.8
6 cow 264 652 8 490 7 499 56 179280 1.8 97.8
7 diningtable 506 605 10 476 21 499 567 185754 5.7 99.5
8 dog 841 1038 10 499 10 499 100 232035 2.3 99.7
9 horse 550 768 21 499 11 499 297 197691 4.0 99.3
10 person 2970 7185 8 499 6 499 48 248003 1.7 99.8
11 pottedplant 503 1153 6 499 6 498 84 226080 2.1 99.6
12 sheep 192 654 5 482 9 485 45 226980 1.5 96.2
13 sofa 703 791 33 499 24 499 1638 209237 10.0 99.8
14 train 491 594 20 499 20 499 725 185381 6.2 99.6
15 tvmonitor 516 703 11 498 13 499 176 185754 3.4 99.5
3 从VOC中提取特定分类的数据
应用场景是有的数据集比较大,但是我们需要的只是其中几个类别,此时要进行提取,不改变原始数据集,但会生成新的数据集。这种情况下,我们的主要目地是对提取的数据比较感兴趣。上一步中我们处理后的数据还原,接着进行本步骤实验。
import os
import xml.etree.ElementTree as ET
import shutil
from tqdm import tqdm
def get_needed_classes(source_dataset="VOCdevkit/VOC2007",dest_dataset="VOCdevkit/VOC2007_dest",classes=None):
"""
source_dataset:提取数据集位置
dest_daaset:提取后数据集存放位置
classes:列表,指定要提取的分类,所有出现在该参数中的类都会被提取,如果是None则复制整个数据集
"""
if os.path.exists(dest_dataset):
shutil.rmtree(dest_dataset)
os.mkdir(dest_dataset)
else:
os.mkdir(dest_dataset)
if classes is not None:
img_filepath=os.path.join(source_dataset,'JPEGImages')
ann_filepath=os.path.join(source_dataset,'Annotations')
img_savepath= os.path.join(dest_dataset,'JPEGImages')
ann_savepath=os.path.join(dest_dataset,'Annotations')
main_path = os.path.join(dest_dataset,"ImageSets/Main")
if not os.path.exists(img_savepath):
os.makedirs(img_savepath)
if not os.path.exists(ann_savepath):
os.makedirs(ann_savepath)
if not os.path.exists(main_path):
os.makedirs(main_path)
source_anns=os.listdir(ann_filepath)
for source_ann in tqdm(source_anns):
tree = ET.parse(os.path.join(ann_filepath,source_ann))
root = tree.getroot()
result = root.findall("object")
bool_num=0
for obj in result:
if obj.find("name").text not in classes:
root.remove(obj)
else:
bool_num = 1
if bool_num:
tree.write(os.path.join(ann_savepath,source_ann))
name_img =os.path.splitext(source_ann)[0]+'.jpg'
shutil.copy(os.path.join(img_filepath,name_img),os.path.join(img_savepath,name_img))
else:
shutil.copytree(source_dataset,dest_dataset)
获取四个类别的车辆类数据
get_needed_classes(classes=['bicycle','car','motorbike','bus'])
100%|██████████| 9963/9963 [00:02<00:00, 4188.22it/s]
接着查看提取后数据集VOC2007_dest:
tl@aiot:~/VOC/VOCdevkit$ tree -L 1 VOC2007_dest/
VOC2007_dest/
├── Annotations
├── ImageSets
└── JPEGImages
3 directories, 0 files
check("VOC2007_dest")
########################################################################################数据集VOC2007_dest检验结果如下:######################################################################################################
所有图片都有对应标注文件
所有标注文件都有对应图片
100%|██████████| 2601/2601 [00:00<00:00, 13031.95it/s]
数据集VOC2007_dest一共有2601张合格的标注图片,5277个标注框,平均每张图有2.03个标注框;一共有4个分类,分别是['bicycle', 'bus', 'car', 'motorbike'];图片中标注框个数最少是1, 最多是15.图片高度最小值是96,最大值是500;图片宽度最小值是156,最大值是500; 图片面积最小值是43090,最大值是250000 ;图片高宽比最小值是0.19,图片高宽比最大值是3.21
cls_names images objects min_h_bbox max_h_bbox min_w_bbox max_w_bbox min_area_bbox max_area_bbox min_area_box/img max_area_box/img
0 bicycle 505 807 9 499 10 499 110 186626 2.4 99.8
1 bus 380 526 9 475 12 499 198 188325 3.2 99.7
2 car 1536 3185 4 497 6 499 48 196560 1.6 99.7
3 motorbike 482 759 18 498 7 499 182 198000 3.1 99.4
4 提取特定分类并修改名称
应用场景是从多个原始数据集中提取特定分类,但同一个类别在不同数据集中使用不同的名称,这时需要统一名称,不改变原始数据集。比如VOC中叫motorbike,COCO数据集叫motorcycle
import os
import xml.etree.ElementTree as ET
import shutil
from tqdm import tqdm
def get_needed_classes_change_name(source_dataset="VOCdevkit/VOC2007",dest_dataset="VOCdevkit/VOC2007_dest",classes=None,new_classes=None):
"""
source_dataset:提取数据集位置
dest_daaset:提取后数据集存放位置
classes:指定要提取的分类,所有出现在该参数中的类都会被提取,如果是None则复制整个数据集
new_classes: 在classes 提取的分类中选取部分或全部进行修改,如果是None则不需要进行修改这个是默认的
"""
if os.path.exists(dest_dataset):
shutil.rmtree(dest_dataset)
os.mkdir(dest_dataset)
else:
os.mkdir(dest_dataset)
if classes is not None:
img_filepath=os.path.join(source_dataset,'JPEGImages')
ann_filepath=os.path.join(source_dataset,'Annotations')
img_savepath= os.path.join(dest_dataset,'JPEGImages')
ann_savepath=os.path.join(dest_dataset,'Annotations')
main_path = os.path.join(dest_dataset,"ImageSets/Main")
if not os.path.exists(img_savepath):
os.makedirs(img_savepath)
if not os.path.exists(ann_savepath):
os.makedirs(ann_savepath)
if not os.path.exists(main_path):
os.makedirs(main_path)
change=False
if new_classes:
change=True
for name in new_classes.keys():
if not name in classes:
print("要改的名称必须要在所提取的类别中")
return
source_anns=os.listdir(ann_filepath)
for source_ann in tqdm(source_anns):
tree = ET.parse(os.path.join(ann_filepath,source_ann))
root = tree.getroot()
result = root.findall("object")
bool_num=0
for obj in result:
if obj.find("name").text not in classes:
root.remove(obj)
else:
if change and obj.find("name").text in new_classes.keys():
obj.find("name").text = new_classes[obj.find("name").text]
bool_num = 1
if bool_num:
tree.write(os.path.join(ann_savepath,source_ann),encoding='utf-8')
name_img =os.path.splitext(source_ann)[0]+'.jpg'
shutil.copy(os.path.join(img_filepath,name_img),os.path.join(img_savepath,name_img))
else:
shutil.copytree(source_dataset,dest_dataset)
如我们提取VOC2007中机动车数据,但是要修改motorbike为motorcycle,car改为汽车的拼音
get_needed_classes_change_name(source_dataset="VOCdevkit/VOC2007",dest_dataset="VOCdevkit/VOC2007_dest",classes=['bicycle','car','motorbike','bus'],new_classes={"car":"qiche","motorbike":"motorcycle"})
100%|██████████| 9963/9963 [00:00<00:00, 10228.99it/s]
查看一下提取后的数据集
check("VOC2007_dest")
########################################################################################数据集VOC2007_dest检验结果如下:######################################################################################################
所有图片都有对应标注文件
所有标注文件都有对应图片
100%|██████████| 2601/2601 [00:00<00:00, 12146.91it/s]
数据集VOC2007_dest一共有2601张合格的标注图片,5277个标注框,平均每张图有2.03个标注框;一共有4个分类,分别是['bicycle', 'bus', 'motorcycle', 'qiche'];图片中标注框个数最少是1, 最多是15.图片高度最小值是96,最大值是500;图片宽度最小值是156,最大值是500; 图片面积最小值是43090,最大值是250000 ;图片高宽比最小值是0.19,图片高宽比最大值是3.21
cls_names images objects min_h_bbox max_h_bbox min_w_bbox max_w_bbox min_area_bbox max_area_bbox min_area_box/img max_area_box/img
0 bicycle 505 807 9 499 10 499 110 186626 2.4 99.8
1 bus 380 526 9 475 12 499 198 188325 3.2 99.7
2 motorcycle 482 759 18 498 7 499 182 198000 3.1 99.4
3 qiche 1536 3185 4 497 6 499 48 196560 1.6 99.7
5 VOC数据集可视化
将数据集画图后进行保存,可以进行查看,可视化主要是要将标注框画到图中,从而看训练数据的标注情况。 以下将提供两种画图方法,一种是对数据集整体画数据结果,另一种是对单张图片进行查看,结果自动保存在数据集所在文件夹下draw_result中
import os
import xml.etree.ElementTree as ET
import shutil
from tqdm import tqdm
import cv2
def draw(source_dataset="VOCdevkit/VOC2007_dest"):
draw_path = os.path.join(source_dataset,"draw_results")
if not os.path.exists(draw_path):
os.makedirs(draw_path)
else:
shutil.rmtree(draw_path)
os.makedirs(draw_path)
ann_filepath=os.path.join(source_dataset,'Annotations')
source_anns=os.listdir(ann_filepath)
for source_ann in tqdm(source_anns):
source_img = os.path.join(source_dataset,'JPEGImages',os.path.splitext(source_ann)[0]+'.jpg')
if not os.path.exists(source_img):
source_img = os.path.join(source_dataset,'JPEGImages',os.path.splitext(source_ann)[0]+'.JPG')
save_img = os.path.join(draw_path,os.path.splitext(source_ann)[0]+'.jpg')
img = cv2.imdecode(np.fromfile(source_img,dtype=np.uint8),-1)
if img is None or not img.any():
continue
tree = ET.parse(os.path.join(ann_filepath,source_ann))
root = tree.getroot()
result = root.findall("object")
for obj in result:
name = obj.find("name").text
x1=int(obj.find('bndbox').find('xmin').text)
y1=int(obj.find('bndbox').find('ymin').text)
x2=int(obj.find('bndbox').find('xmax').text)
y2=int(obj.find('bndbox').find('ymax').text)
cv2.rectangle(img,(x1,y1),(x2,y2),(0,0,255),2)
cv2.putText(img,name,(max(x1,15),max(y1,15)),cv2.FONT_ITALIC,1,(0,255,0,2))
cv2.imencode('.jpg',img)[1].tofile(save_img)
def draw_single_image(ann_path,img_path,save_path=None):
"""
ann_path:指定xml的绝对路径
img_path:指定xml的绝对路径
save_path:如果不是None,那么将是结果图的保存路径;反之则画出来
"""
img = cv2.imdecode(np.fromfile(img_path,dtype=np.uint8),-1)
if img is None or not img.any():
raise '有空图'
tree = ET.parse(ann_path)
root = tree.getroot()
result = root.findall("object")
for obj in result:
name = obj.find("name").text
x1=int(obj.find('bndbox').find('xmin').text)
y1=int(obj.find('bndbox').find('ymin').text)
x2=int(obj.find('bndbox').find('xmax').text)
y2=int(obj.find('bndbox').find('ymax').text)
cv2.rectangle(img,(x1,y1),(x2,y2),(0,0,255),2)
cv2.putText(img,name,(max(x1,15),max(y1,15)),cv2.FONT_ITALIC,1,(0,255,0,2))
if save_path is None:
imgrgb = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
plt.figure(figsize=(20,10))
plt.imshow(imgrgb)
else:
cv2.imencode('.jpg',img)[1].tofile(save_path)
draw(source_dataset="VOCdevkit/VOC2007_dest")
100%|██████████| 2601/2601 [00:12<00:00, 215.68it/s]
对于画好的多张图片,可以在本机直接查看,对于在jupyter中可以对于批量图(最少一张)来进行查看):
from IPython.display import clear_output, display, HTML
from PIL import Image
import matplotlib.pyplot as plt
import time
import cv2
import base64
import glob
current_time = 0
def processImg(img):
global current_time
if current_time == 0:
current_time = time.time()
else:
last_time = current_time
current_time = time.time()
fps = 1. / (current_time - last_time)
text = "FPS: %d" % int(fps)
cv2.putText(img, text , (0,100), cv2.FONT_HERSHEY_TRIPLEX, 3.65, (255, 0, 0), 2)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
return img
def arrayShow(imageArray):
return Image.fromarray(imageArray)
img_paths = glob.glob('VOCdevkit/VOC2007_dest/draw_results/*.jpg')
small=1
while(True):
try:
clear_output(wait=True)
ret, frame = video.read()
lines, columns, _ = frame.shape
frame = processImg(frame)
frame = cv2.resize(frame, (int(columns / small), int(lines / small)))
img = arrayShow(frame)
display(img)
time.sleep(0.02)
except KeyboardInterrupt:
video.release()
import os
import time
from tqdm import tqdm
from PIL import Image
import cv2
from IPython.display import clear_output, display, HTML
def show_images(images:list,small:int=1) -> str:
"""
images用来存放图片的绝对路径
small用来缩小图像大小,便于显示
"""
current_time = 0
for img_path in tqdm(images):
clear_output(wait=True)
img = cv2.imdecode(np.fromfile(img_path,dtype=np.uint8),-1)
if img is None or not img.any():
continue
h,w,_ = img.shape
if current_time==0:
current_time=time.time()
else:
last_time=current_time
current_time=time.time()
fps = 1. / (current_time - last_time)
text = "FPS: %d" % int(fps)
cv2.putText(img, text , (0,50), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 0, 0), 1)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (int(w / small), int(h / small)))
img = Image.fromarray(img)
display(img)
time.sleep(5)
import glob
img_paths = glob.glob('VOCdevkit/VOC2007_dest/draw_results/*.jpg')
show_images(img_paths)
对于单张图可以直接可视化,也可以保存到文件中
draw_single_image('VOCdevkit/VOC2007_dest/Annotations/000020.xml','VOCdevkit/VOC2007_dest/JPEGImages/000012.jpg')
如果是保存成图片,想要显示在jupyter notebook中还可以这样
%%html
<img src="VOCdevkit/VOC2007_dest/JPEGImages/000012.jpg",width=400,height=200>
<img src=“VOCdevkit/VOC2007_dest/JPEGImages/000012.jpg”,width=400,height=200>
|