开发: C++知识库 Java知识库 JavaScript Python PHP知识库人工智能区块链大数据移动开发嵌入式开发工具数据结构与算法开发测试游戏开发网络协议系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑笔记本显卡显示器固态硬盘硬盘耳机手机 iphone vivo oppo 小米华为单反装机图拉丁

-> 人工智能 -> 从目标检测数据集中扣出所需类别进行分类 -> 正文阅读

[人工智能]从目标检测数据集中扣出所需类别进行分类

做分类模型除了imagenet这些，还会有常见的目标检测数据集可以利用起来，把所需要的类别扣出来再做分类。检测数据集有VOC，COCO,openimage,object365等，对应的检测的数据集的格式也是常见的几种，VOC的xml左上右下的点，COCO的json的左上宽高，yolo数据集的中心点xywh的相值，openimage的csv 左上右下的相对值，object365的（等补充）
如下从这几个数据集中获取这几种车辆。

1、获取VOC数据集中两轮车

import tqdm
import glob
import shutil
import xml.etree.ElementTree as ET 
from PIL import Image 
# VOC有20个分类 'aeroplane', 'diningtable', 'sofa', 'bus', 'car', 'train', 'bicycle', 'horse', 'cow', 
# 'bird', 'sheep', 'chair',  'motorbike', 'pottedplant', 'tvmonitor', 'bottle', 'dog', 'boat', 'person', 'cat'
need_names = ["bicycle","motorbike","bottle","chair","tvmonitor","pottedplant"]  #指定要获取的分类
imgpath =  "collect/VOC/VOCdevkit/VOC2007/JPEGImages"  #指定图片位置
annpath =  "collect/VOC/VOCdevkit/VOC2007/Annotations/" #指定xml标注文件的位置
savepath = "collect/VOC" #指定图片要保存的位置，会按照我们要获取的分类名分别进行保存
name2dir = {i:os.path.join(savepath,i) for i in need_names}
for need_name in need_names:
    savedir = name2dir[need_name]
    if os.path.exists(savedir):
        shutil.rmtree(savedir)
    os.makedirs(savedir)
        
imgs = [os.path.join(imgpath,i) for i in os.listdir(imgpath)]
anns = [os.path.join(annpath,i) for i in os.listdir(annpath)]
imgnames = [os.path.splitext(os.path.basename(i))[0] for i in imgs]
imgs2name={ os.path.splitext(os.path.basename(i))[0]:os.path.basename(i) for i in imgs }
annnames = [os.path.splitext(os.path.basename(i))[0] for i in anns]

comname = list(set(imgnames) & set(annnames))

for name in tqdm.tqdm(comname):
    imgdir = os.path.join(imgpath,imgs2name[name])
    anndir = os.path.join(annpath,name+'.xml')
    tree = ET.parse(open(anndir))
    root = tree.getroot()
    size = root.find('size')
    w=  int(size.find('width').text)
    h=  int(size.find('height').text)
    if w<50 or h<50:
        continue
    i = 0
    for obj in root.iter('object'):
        cls_name = obj.find('name').text.lower()
        if cls_name in need_names:
            box = obj.find('bndbox')
            xmin = int(box.find('xmin').text)
            ymin = int(box.find('ymin').text)
            xmax = int(box.find('xmax').text)
            ymax = int(box.find('ymax').text)
            w = max(0,xmax-xmin)
            h = max(0,ymax-ymin)
            if w<30 or h<30:
                continue
            im = Image.open(imgdir)
            imcrop = im.crop((xmin,ymin,xmax,ymax))
            savename = name+'_'+str(i)+'.jpg'
            imcrop.save(os.path.join(name2dir[cls_name],savename),quality=100)
            i+=1
        else:
            continue
        
for name,namedir in name2dir.items():
    print(f"{name} img num:{len(os.listdir(namedir))}")

100%|██████████| 9963/9963 [00:16<00:00, 591.14it/s]

bicycle img num:742
motorbike img num:715
bottle img num:705
chair img num:2477
tvmonitor img num:669
pottedplant img num:1030

2 、接着做COCO数据集的分类数据获取

import numpy as np
import tqdm
from PIL import Image
from pycocotools.coco import COCO
#以下是80个分类
'''
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
'''
annpath = "collect/COCO/annotations/instances_val2017.json"
imgpath = "collect/COCO/val2017/"
savepath = "collect/COCO"
need_names=["bicycle",'motorcycle']
for need_name in need_names:
    savedir = os.path.join(savepath,need_name)
    if os.path.exists(savedir):
        shutil.rmtree(savedir)
    os.makedirs(savedir)

data_source = COCO(annotation_file=annpath)
catIds = data_source.getCatIds()   #获取类别id 
categories = data_source.loadCats(catIds)
categories.sort(key=lambda x: x['id'])
classes2Id = {}
classes2catId = {}
catId2classes = {}
catId2Id={}  
Id2catId ={}
for i,cat in enumerate(categories):
    Id2catId[i]=cat['id']
    catId2Id[cat['id']]=i
    classes2Id[cat['name']]=i
    classes2catId[cat['name']]=cat['id']
    catId2classes[cat['id']]=cat['name']

img_ids = data_source.getImgIds()
print("num images:",len(img_ids))
for index, img_id in tqdm.tqdm(enumerate(img_ids), desc='crop needed classes'):
    img_info = data_source.loadImgs(img_id)[0]
    img_name = img_info['file_name']
    img_path = os.path.join(imgpath,img_name)
    file_name = os.path.splitext(img_name)[0]
    height = img_info['height']
    width = img_info['width']
    if width<80 or height<80:
        continue
    annotation_id = data_source.getAnnIds(img_id)
    if len(annotation_id) == 0:
        continue
    annotations = data_source.loadAnns(annotation_id)
    i = 0
    for annotation in annotations:
        catId = annotation['category_id']
        cls_name = catId2classes[catId]
        if cls_name in need_names:
            box = annotation['bbox']
            # some annotations have basically no width / height, skip them
            if box[2] < 1 or box[3] < 1:
                continue
            #top_x,top_y,width,height---->xmin,ymin,xmax,ymax
            xmin = max(0,box[0])
            ymin = max(0,box[1])
            xmax = min(box[0]+box[2],width)
            ymax = min(box[1]+box[3],height)
            w = xmax-xmin
            h = ymax-ymin
            if w<30 or h<30:
                continue
            im = Image.open(img_path)
            crop = im.crop((xmin,ymin,xmax,ymax))
            save_path = os.path.join(savepath,cls_name,file_name+'_'+str(i)+'.jpg')
            crop.save(save_path,quality=100)
            i+=1
        else:
            continue
for need_name in need_names:    
    print(f"{need_name} img num:{len(os.listdir(os.path.join(savepath,need_name)))}")
        
print('finish')

loading annotations into memory...
Done (t=0.38s)
creating index...
index created!
num images: 5000


crop needed classes: 5000it [00:02, 2112.55it/s]

bicycle img num:196
motorcycle img num:300
finish

3、YOLO 格式数据

以下的代码没做太多修改，是在实际中使用的，同时处理了两个yolo文件夹，用了多进程，也对于图片与标签文件的文件名关系做了字典的处理，而不是使用glob是因为这样处理速度会快。

import os
import tqdm
from glob import glob
from multiprocessing import Process
import numpy as np
import shutil
import cv2

# imgpaths = ['ebike/electric_bicycle/images_all',"ebike/luping_buchong/images"]
# annpaths = ['ebike/electric_bicycle/labels_all',"ebike/luping_buchong/labels"]
imgpaths = ['ebike/electric_bicycle/images',"ebike/luping_buchong/images"]
annpaths = ['ebike/electric_bicycle/labels',"ebike/luping_buchong/labels"]

all_imgs=[]
all_labels=[]

for i in range(len(imgpaths)):
    imgpath=imgpaths[i]
    annpath=annpaths[i]
    imgfiles=[os.path.splitext(name)[0] for name in os.listdir(imgpath)]
    imgsuffixes=[os.path.splitext(name)[1] for name in os.listdir(imgpath)]
    name_add_fix =dict(zip(imgfiles,imgsuffixes))
    annfiles=[os.path.splitext(name)[0] for name in os.listdir(annpath)]
    comfiles = list(set(imgfiles) & set(annfiles))
    for comfile in tqdm.tqdm(comfiles):
        img = os.path.join(imgpath,comfile+name_add_fix[comfile])
        ann = os.path.join(annpath,comfile+'.txt')
        all_imgs.append(img)
        all_labels.append(ann)
print("img num",len(all_imgs))
print("ann num",len(all_labels))

with open('classes.txt','r') as f:
    classes = f.read().strip().splitlines()
classes=['ebike']
cls_path = 'ebike/ebike_classification_need'
if os.path.exists(cls_path):
    shutil.rmtree(cls_path)
for class_name in classes:
    class_path = os.path.join(cls_path,class_name)
    os.makedirs(class_path,exist_ok=False)
id2class={}
for id,name in enumerate(classes):
    id2class[str(id)]=name
print(id2class)

def crop_img(img_paths,label_paths,id2class,cls_path):
    for img_path,label_path in tqdm.tqdm(zip(img_paths,label_paths),total=len(img_paths)):
        img_name,img_suffix = os.path.splitext(os.path.split(img_path)[-1])
        with open(label_path,'r') as f:
            lines = f.read().strip().splitlines()
        for i,line in enumerate(lines):
            # if len(line.split())!=5:
            #     print(line,i,label_path)
            labelid,x,y,w,h=map(float,line.split()[:5])
            if int(labelid) !=0:
                continue
            else:
                label = id2class[str(int(labelid))]
                img = np.fromfile(img_path,np.uint8)
                img = cv2.imdecode(img,1)
                if img is None:
                    continue
                imgh,imgw,_ = img.shape
                x=x*imgw
                y=y*imgh
                w=w*imgw
                h=h*imgh
                rate = w*1.0/h
                if 0.2<rate<5:
                    xmin = max(int(x-w/2),0)
                    ymin = max(int(y-h/2),0)
                    xmax = min(int(x+w/2),int(imgw))
                    ymax = min(int(y+h/2),int(imgh))
                    imgpart = img[ymin:ymax,xmin:xmax,:]
                    imgpath = os.path.join(cls_path,label,img_name+f"_{i}"+img_suffix)
                    try:
                        cv2.imencode('.jpg',imgpart)[1].tofile(imgpath)
                    except:
                        print("non img",imgpath)

num = 40
parts = np.linspace(0,len(all_imgs),num+1).astype(np.int32)
processes=[]
for i in range(num):
    img_paths = all_imgs[parts[i]:parts[i+1]]
    label_paths = all_labels[parts[i]:parts[i+1]]
    p = Process(target=crop_img,args=(img_paths,label_paths,id2class,cls_path))
    p.start()
    processes.append(p)
for pp in processes:
    pp.join()

4、openimage数据获取

import pandas as pd
from PIL import Image
import os
import tqdm
import shutil
import numpy as np

从数据集是获取我们需要的分类只是因为要对我们的数据做扩充,openimage中有些图是没标签的，所在我们统计出来的image_id数量是少于真实图片数量

获取标签

classes = pd.read_csv('openimage/labels/metadate/class-descriptions-boxable.csv',names=['labelname','displayname'])

classes.head()

	labelname	displayname
0	/m/011k07	Tortoise
1	/m/011q46kg	Container
2	/m/012074	Magpie
3	/m/0120dh	Sea turtle
4	/m/01226z	Football

names = classes['displayname'].tolist()
temp = [i+'\n' for i in names]
with open('names.txt','w') as f:
    f.writelines(temp)

根据displayname 获取 labelname 并指定我们想要的类别

可以查看names.txt，有我们想要的分类，如bicycle或含这个词的类别


need_names = [i for i in names if 'cycle' in i.lower()]
print(need_names)

['Bicycle', 'Bicycle wheel', 'Stationary bicycle', 'Bicycle helmet', 'Motorcycle', 'Unicycle']

need_labelnames = []
for need_name in need_names:
    a=classes['displayname']==need_name
    b=classes[a].iloc[0,:].tolist()
    need_labelnames.append(b)
print(need_labelnames)

[['/m/0199g', 'Bicycle'], ['/m/01bqk0', 'Bicycle wheel'], ['/m/03kt2w', 'Stationary bicycle'], ['/m/03p3bw', 'Bicycle helmet'], ['/m/04_sv', 'Motorcycle'], ['/m/0f6nr', 'Unicycle']]

labelname2displayname = {}
for i in need_labelnames:
    key = i[0]
    value=i[1].replace(" ","_")
    labelname2displayname[key]=value
print(labelname2displayname)

{'/m/0199g': 'Bicycle', '/m/01bqk0': 'Bicycle_wheel', '/m/03kt2w': 'Stationary_bicycle', '/m/03p3bw': 'Bicycle_helmet', '/m/04_sv': 'Motorcycle', '/m/0f6nr': 'Unicycle'}

根据标签名找到对应的图片名称

测试集

先获取小的数据集，从测试集到验证集最后再做训练集


annfile = 'openimage/labels/detection/test-annotations-bbox.csv'

anns = pd.read_csv(annfile)

anns.head()

	ImageID	Source	LabelName	Confidence	XMin	XMax	YMin	YMax	IsOccluded	IsTruncated	IsGroupOf
0	000026e7ee790996	xclick	/m/07j7r	1	0.071875	0.145313	0.206250	0.391667	0	1	1
1	000026e7ee790996	xclick	/m/07j7r	1	0.439063	0.571875	0.264583	0.435417	0	1	1
2	000026e7ee790996	xclick	/m/07j7r	1	0.668750	1.000000	0.000000	0.552083	0	1	1
3	000062a39995e348	xclick	/m/015p6	1	0.206208	0.849224	0.154639	1.000000	0	0	0
4	000062a39995e348	xclick	/m/05s2s	1	0.137472	0.376940	0.000000	0.883652	1	1	0

total_num = anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(anns['ImageID'].to_list()))
print("total img num:",total_numimg)

total ann num: 937327
total img num: 112194

new_need_labelnames = [i[0] for i in need_labelnames]
old_a = np.zeros((anns.shape[0],)).astype(bool)
for i in new_need_labelnames:
    a=anns['LabelName']==i
    a = a.to_numpy()

    old_a = a | old_a
print("total num :",np.sum(old_a))
need_anns = anns[old_a]
    
    # b=classes[a].iloc[0,:].tolist()
    # need_labelnames.append(b)

total num : 4757

need_anns.head()

	ImageID	Source	LabelName	Confidence	XMin	XMax	YMin	YMax	IsOccluded
323	0013a0927e6bbefc	xclick	/m/04_sv	1	0.192188	0.600000	0.306250	0.629167	0
522	001d1da154d00e83	xclick	/m/0199g	1	0.199413	0.439883	0.681063	0.953488	1
523	001d1da154d00e83	xclick	/m/0199g	1	0.425220	0.492669	0.807309	0.880399	1
524	001d1da154d00e83	xclick	/m/0199g	1	0.548387	0.780059	0.677741	0.943522	1
1177	003c2b6816ba9d22	xclick	/m/03p3bw	1	0.336780	0.497784	0.143488	0.348786	0

for i in need_names:
    save_path = os.path.join('openimage/test_imgs',i.replace(" ","_"))
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)
    
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
    image_id = need_anns.iloc[i,0]
    image_dir = os.path.join('test',image_id)
    all_imageids.append(image_dir)
    bbox = need_anns.iloc[i,4:8].to_numpy().astype(np.float32)
    labelname = need_anns.iloc[i,2]
    displayname = labelname2displayname[labelname]
    try:
        img = Image.open(os.path.join('openimage',image_dir+'.jpg'))
        w,h = img.size 
        xmin = int(w*bbox[0])
        xmax = int(w*bbox[1])
        ymin = int(h*bbox[2])
        ymax = int(h*bbox[3])
        crop = img.crop((xmin,ymin,xmax,ymax))
        save_name = image_id+"_"+str(i)+'.jpg'
        save_path = os.path.join('openimage/test_imgs',displayname,save_name)
        crop.save(save_path,quality=100)
    except Exception as e:
        print("wrong image id:",e)
        continue
for i in need_names:
    name = i.replace(" ","_")
    save_path = os.path.join('openimage/test_imgs',name)
    print(f"{name} image num:{len(os.listdir(save_path))}")
new_all_imageids = [i+'\n' for i in set(all_imageids)]

img_names=os.path.join('openimage','test_images.txt')
if os.path.exists(img_names):
    os.remove(img_names)
with open(img_names,'w') as f:
    f.writelines(new_all_imageids)

100%|██████████| 4757/4757 [00:45<00:00, 103.85it/s]

Bicycle image num:1203
Bicycle_wheel image num:2314
Stationary_bicycle image num:47
Bicycle_helmet image num:492
Motorcycle image num:683
Unicycle image num:18

验证集

以上就完成了openimage test 数据集的获取，同样着道理只要获取val train即可，如下是val:

annfile = 'openimage/labels/detection/validation-annotations-bbox.csv'

anns = pd.read_csv(annfile)
anns.head()

	ImageID	Source	LabelName	Confidence	XMin	XMax	YMin	YMax	IsOccluded	IsGroupOf
0	0001eeaf4aed83f9	xclick	/m/0cmf2	1	0.022673	0.964201	0.071038	0.800546	0	0
1	000595fe6fee6369	xclick	/m/02wbm	1	0.000000	1.000000	0.000000	1.000000	0	1
2	000595fe6fee6369	xclick	/m/02xwb	1	0.141384	0.179676	0.676275	0.731707	0	0
3	000595fe6fee6369	xclick	/m/02xwb	1	0.213549	0.253314	0.299335	0.354767	1	0
4	000595fe6fee6369	xclick	/m/02xwb	1	0.232695	0.288660	0.490022	0.545455	1	0

total_num = anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(anns['ImageID'].to_list()))
print("total img num:",total_numimg)

total ann num: 303980
total img num: 37306

new_need_labelnames = [i[0] for i in need_labelnames]
old_a = np.zeros((anns.shape[0],)).astype(bool)
for i in new_need_labelnames:
    a=anns['LabelName']==i
    a = a.to_numpy()

    old_a = a | old_a
print("total num :",np.sum(old_a))
need_anns = anns[old_a]

total num : 1629

total_num = need_anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(need_anns['ImageID'].to_list()))
print("total img num:",total_numimg)

total ann num: 1629
total img num: 445

need_anns.head()

	ImageID	Source	LabelName	Confidence	XMin	XMax	YMin	YMax	IsOccluded	IsTruncated
226	001a995c1e25d892	xclick	/m/04_sv	1	0.101562	0.876563	0.114583	0.945833	0	0
433	00575b9132bb3746	xclick	/m/03p3bw	1	0.370206	0.513274	0.044248	0.307522	1	0
434	00575b9132bb3746	xclick	/m/0199g	1	0.000000	0.508850	0.000000	0.681416	0	1
457	00575b9132bb3746	xclick	/m/01bqk0	1	0.000000	0.194690	0.183628	0.676991	1	1
458	00575b9132bb3746	xclick	/m/01bqk0	1	0.129794	0.300885	0.539823	0.960177	1	0

for i in need_names:
    save_path = os.path.join('openimage/validation_imgs',i.replace(" ","_"))
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)
    
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
    image_id = need_anns.iloc[i,0]
    image_dir = os.path.join('validation',image_id)
    all_imageids.append(image_dir)
    bbox = need_anns.iloc[i,4:8].to_numpy().astype(np.float32)
    labelname = need_anns.iloc[i,2]
    displayname = labelname2displayname[labelname]
    try:
        img = Image.open(os.path.join('openimage',image_dir+'.jpg'))
        w,h = img.size 
        xmin = int(w*bbox[0])
        xmax = int(w*bbox[1])
        ymin = int(h*bbox[2])
        ymax = int(h*bbox[3])
        crop = img.crop((xmin,ymin,xmax,ymax))
        save_name = image_id+"_"+str(i)+'.jpg'
        save_path = os.path.join('openimage/validation_imgs',displayname,save_name)
        crop.save(save_path,quality=100)
    except Exception as e:
        print("wrong image id:",e)
        continue
for i in need_names:
    name = i.replace(" ","_")
    save_path = os.path.join('openimage/validation_imgs',name)
    print(f"{name} image num:{len(os.listdir(save_path))}")
new_all_imageids = [i+'\n' for i in set(all_imageids)]

img_names=os.path.join('openimage','validation_images.txt')
if os.path.exists(img_names):
    os.remove(img_names)
with open(img_names,'w') as f:
    f.writelines(new_all_imageids)

100%|██████████| 1629/1629 [00:27<00:00, 59.82it/s] 

Bicycle image num:418
Bicycle_wheel image num:780
Stationary_bicycle image num:10
Bicycle_helmet image num:187
Motorcycle image num:232
Unicycle image num:2

从上边两个数据集也可以看出，我们从大量的图片中只获取少量是我们需要的，这个速度很慢，而且是前提我们把所有图片都给下载了下来大概有570G左右（下载了好几天），很费力气。事实上可以只下载标签，然后生成我们需要的image id,如上边保存的两个test_images.txt 和validation_images.txt 这两个文件是可以利用官网提供的工具下载我们只需要的那部分图片的，因为训练集特别大，所在采用这种方法，事实上是所有数据集都推荐这种方法。这个工作可以参考 https://www.jianshu.com/p/40b58833af22

训练集

#这个要好长时间，耐心等待
annfile = 'openimage/labels/detection/oidv6-train-annotations-bbox.csv'

anns = pd.read_csv(annfile)
anns.head()

	ImageID	Source	LabelName	Confidence	XMin	XMax	YMin	YMax	IsOccluded	IsTruncated	...	XClick1X	XClick2X	XClick3X	XClick4X	XClick1Y	XClick2Y	XClick3Y	XClick4Y
0	000002b66c9c498e	xclick	/m/01g317	1	0.012500	0.195312	0.148438	0.587500	0	1	...	0.148438	0.012500	0.059375	0.195312	0.148438	0.357812	0.587500	0.325000
1	000002b66c9c498e	xclick	/m/01g317	1	0.025000	0.276563	0.714063	0.948438	0	1	...	0.025000	0.248438	0.276563	0.214062	0.914062	0.714063	0.782813	0.948438
2	000002b66c9c498e	xclick	/m/01g317	1	0.151562	0.310937	0.198437	0.590625	1	0	...	0.243750	0.151562	0.310937	0.262500	0.198437	0.434375	0.507812	0.590625
3	000002b66c9c498e	xclick	/m/01g317	1	0.256250	0.429688	0.651563	0.925000	1	0	...	0.315625	0.429688	0.256250	0.423438	0.651563	0.921875	0.826562	0.925000
4	000002b66c9c498e	xclick	/m/01g317	1	0.257812	0.346875	0.235938	0.385938	1	0	...	0.317188	0.257812	0.346875	0.307812	0.235938	0.289062	0.348438	0.385938

5 rows × 21 columns

total_num = anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(anns['ImageID'].to_list()))
print("total img num:",total_numimg)

total ann num: 14610229
total img num: 1743042

new_need_labelnames = [i[0] for i in need_labelnames]
old_a = np.zeros((anns.shape[0],)).astype(bool)
for i in new_need_labelnames:
    a=anns['LabelName']==i
    a = a.to_numpy()

    old_a = a | old_a
print("total num :",np.sum(old_a))
need_anns = anns[old_a]

total num : 129548

total_num = need_anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(need_anns['ImageID'].to_list()))
print("total img num:",total_numimg)

total ann num: 129548
total img num: 26694

need_anns.head()

	ImageID	Source	LabelName	Confidence	XMin	XMax	YMin	YMax	IsOccluded	IsTruncated	...	XClick1X	XClick2X	XClick3X	XClick4X	XClick1Y	XClick2Y	XClick3Y	XClick4Y
158	00002f4ff380c64c	xclick	/m/0199g	1	0.000000	0.155556	0.654867	0.876106	0	1	...	0.106667	0.000000	0.001481	0.155556	0.876106	0.803097	0.654867	0.796460
159	00002f4ff380c64c	xclick	/m/0199g	1	0.168889	0.402963	0.648230	0.873894	0	0	...	0.268148	0.168889	0.208889	0.402963	0.648230	0.803097	0.873894	0.809735
160	00002f4ff380c64c	xclick	/m/0199g	1	0.414815	0.659259	0.654867	0.887168	0	0	...	0.608889	0.414815	0.494815	0.659259	0.887168	0.796460	0.654867	0.803097
161	00002f4ff380c64c	xclick	/m/01bqk0	1	0.060741	0.151111	0.736726	0.873894	1	0	...	0.103704	0.060741	0.108148	0.151111	0.736726	0.809735	0.873894	0.805310
162	00002f4ff380c64c	xclick	/m/01bqk0	1	0.165926	0.260741	0.743363	0.878319	1	0	...	0.210370	0.165926	0.216296	0.260741	0.743363	0.816372	0.878319	0.816372

5 rows × 21 columns

for i in need_names:
    save_path = os.path.join('openimage/train_imgs',i.replace(" ","_"))
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)
    
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
    image_id = need_anns.iloc[i,0]
    image_dir = os.path.join('train',image_id)
    all_imageids.append(image_dir)
    # bbox = need_anns.iloc[i,4:8].to_numpy().astype(np.float32)
    # labelname = need_anns.iloc[i,2]
    # displayname = labelname2displayname[labelname]
    # try:
    #     img = Image.open(os.path.join('openimage',image_dir+'.jpg'))
    #     w,h = img.size 
    #     xmin = int(w*bbox[0])
    #     xmax = int(w*bbox[1])
    #     ymin = int(h*bbox[2])
    #     ymax = int(h*bbox[3])
    #     crop = img.crop((xmin,ymin,xmax,ymax))
    #     save_name = image_id+"_"+str(i)+'.jpg'
    #     save_path = os.path.join('openimage/validation_imgs',displayname,save_name)
    #     crop.save(save_path,quality=100)
    # except Exception as e:
    #     print("wrong image id:",e)
    #     continue
# for i in need_names:
#     name = i.replace(" ","_")
#     save_path = os.path.join('openimage/validation_imgs',name)
#     print(f"{name} image num:{len(os.listdir(save_path))}")
new_all_imageids = [i+'\n' for i in set(all_imageids)]

img_names=os.path.join('openimage','train_images.txt')
if os.path.exists(img_names):
    os.remove(img_names)
with open(img_names,'w') as f:
    f.writelines(new_all_imageids)

100%|██████████| 129548/129548 [00:03<00:00, 34663.79it/s]

len(new_all_imageids)

下载方式命令是：

python downloader.py train_images.txt --download_folder=train --num_processes=5

下载好的图片将保存在 download_folder指定的文件下， num_processes越大，下载用的时间越少
downloader.py下载需要科学上网，所以这里写出downloader.py的内容（20220920，对应v6版本）

# python3
# coding=utf-8
# Copyright 2020 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Open Images image downloader.

This script downloads a subset of Open Images images, given a list of image ids.
Typical uses of this tool might be downloading images:
- That contain a certain category.
- That have been annotated with certain types of annotations (e.g. Localized
Narratives, Exhaustively annotated people, etc.)

The input file IMAGE_LIST should be a text file containing one image per line
with the format <SPLIT>/<IMAGE_ID>, where <SPLIT> is either "train", "test",
"validation", or "challenge2018"; and <IMAGE_ID> is the image ID that uniquely
identifies the image in Open Images. A sample file could be:
  train/f9e0434389a1d4dd
  train/1a007563ebc18664
  test/ea8bfd4e765304db

"""

import argparse
from concurrent import futures
import os
import re
import sys

import boto3
import botocore
import tqdm

BUCKET_NAME = 'open-images-dataset'
REGEX = r'(test|train|validation|challenge2018)/([a-fA-F0-9]*)'


def check_and_homogenize_one_image(image):
  split, image_id = re.match(REGEX, image).groups()
  yield split, image_id


def check_and_homogenize_image_list(image_list):
  for line_number, image in enumerate(image_list):
    try:
      yield from check_and_homogenize_one_image(image)
    except (ValueError, AttributeError):
      raise ValueError(
          f'ERROR in line {line_number} of the image list. The following image '
          f'string is not recognized: "{image}".')


def read_image_list_file(image_list_file):
  with open(image_list_file, 'r') as f:
    for line in f:
      yield line.strip().replace('.jpg', '')


def download_one_image(bucket, split, image_id, download_folder):
  try:
    bucket.download_file(f'{split}/{image_id}.jpg',
                         os.path.join(download_folder, f'{image_id}.jpg'))
  except botocore.exceptions.ClientError as exception:
    sys.exit(
        f'ERROR when downloading image `{split}/{image_id}`: {str(exception)}')


def download_all_images(args):
  """Downloads all images specified in the input file."""
  bucket = boto3.resource(
      's3', config=botocore.config.Config(
          signature_version=botocore.UNSIGNED)).Bucket(BUCKET_NAME)

  download_folder = args['download_folder'] or os.getcwd()

  if not os.path.exists(download_folder):
    os.makedirs(download_folder)

  try:
    image_list = list(
        check_and_homogenize_image_list(
            read_image_list_file(args['image_list'])))
  except ValueError as exception:
    sys.exit(exception)

  progress_bar = tqdm.tqdm(
      total=len(image_list), desc='Downloading images', leave=True)
  with futures.ThreadPoolExecutor(
      max_workers=args['num_processes']) as executor:
    all_futures = [
        executor.submit(download_one_image, bucket, split, image_id,
                        download_folder) for (split, image_id) in image_list
    ]
    for future in futures.as_completed(all_futures):
      future.result()
      progress_bar.update(1)
  progress_bar.close()


if __name__ == '__main__':
  parser = argparse.ArgumentParser(
      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
  parser.add_argument(
      'image_list',
      type=str,
      default=None,
      help=('Filename that contains the split + image IDs of the images to '
            'download. Check the document'))
  parser.add_argument(
      '--num_processes',
      type=int,
      default=5,
      help='Number of parallel processes to use (default is 5).')
  parser.add_argument(
      '--download_folder',
      type=str,
      default=None,
      help='Folder where to download the images.')
  download_all_images(vars(parser.parse_args()))

下载好以后图片保存在train中，我们可以做同样的处理了

for i in need_names:
    save_path = os.path.join('openimage/train_imgs',i.replace(" ","_"))
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)
    
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
    image_id = need_anns.iloc[i,0]
    image_dir = os.path.join('train',image_id)
    all_imageids.append(image_dir)
    bbox = need_anns.iloc[i,4:8].to_numpy().astype(np.float32)
    labelname = need_anns.iloc[i,2]
    displayname = labelname2displayname[labelname]
    try:
        img = Image.open(os.path.join('openimage',image_dir+'.jpg'))
        w,h = img.size 
        xmin = int(w*bbox[0])
        xmax = int(w*bbox[1])
        ymin = int(h*bbox[2])
        ymax = int(h*bbox[3])
        crop = img.crop((xmin,ymin,xmax,ymax))
        save_name = image_id+"_"+str(i)+'.jpg'
        save_path = os.path.join('openimage/train_imgs',displayname,save_name)
        crop.save(save_path,quality=100)
    except Exception as e:
        print("wrong image id:",e)
        continue
for i in need_names:
    name = i.replace(" ","_")
    save_path = os.path.join('openimage/train_imgs',name)
    print(f"{name} image num:{len(os.listdir(save_path))}")

 11%|█         | 14409/129548 [02:09<26:42, 71.85it/s] 

wrong image id: cannot write empty image as JPEG


 24%|██▍       | 31367/129548 [04:43<17:53, 91.42it/s]  

wrong image id: cannot write empty image as JPEG


100%|██████████| 129548/129548 [20:20<00:00, 106.15it/s]


Bicycle image num:40161
Bicycle_wheel image num:59520
Stationary_bicycle image num:338
Bicycle_helmet image num:15951
Motorcycle image num:13382
Unicycle image num:194

除了以上方法还有更快的方式，上面的方法对于每个框都要重新读取一次图片，但实际上，可以读一次图把相同的框都给取出来，如下：

a= need_anns[['ImageID','LabelName','XMin','XMax','YMin','YMax']].groupby(["ImageID"])
for i,d in enumerate(a):
    if i<2:
        print(d[0]+":")
        print(d[1])
    else:
        break

00002f4ff380c64c:
              ImageID  LabelName      XMin      XMax      YMin      YMax
158  00002f4ff380c64c   /m/0199g  0.000000  0.155556  0.654867  0.876106
159  00002f4ff380c64c   /m/0199g  0.168889  0.402963  0.648230  0.873894
160  00002f4ff380c64c   /m/0199g  0.414815  0.659259  0.654867  0.887168
161  00002f4ff380c64c  /m/01bqk0  0.060741  0.151111  0.736726  0.873894
162  00002f4ff380c64c  /m/01bqk0  0.165926  0.260741  0.743363  0.878319
163  00002f4ff380c64c  /m/01bqk0  0.308148  0.400000  0.743363  0.878319
164  00002f4ff380c64c  /m/01bqk0  0.413333  0.505185  0.743363  0.884956
165  00002f4ff380c64c  /m/01bqk0  0.561481  0.656296  0.736726  0.880531
000091f4a275d0fb:
              ImageID LabelName      XMin      XMax      YMin      YMax
868  000091f4a275d0fb  /m/0199g  0.017143  0.998571  0.225806  0.997849