做分类模型除了imagenet这些,还会有常见的目标检测数据集可以利用起来,把所需要的类别扣出来再做分类。检测数据集有VOC,COCO,openimage,object365等,对应的检测的数据集的格式也是常见的几种,VOC的xml左上右下的点,COCO的json的左上宽高,yolo数据集的中心点xywh的相值,openimage的csv 左上右下的相对值,object365的(等补充)
如下从这几个数据集中获取这几种车辆。
1、获取VOC数据集中两轮车
import tqdm
import glob
import shutil
import xml.etree.ElementTree as ET
from PIL import Image
need_names = ["bicycle","motorbike","bottle","chair","tvmonitor","pottedplant"]
imgpath = "collect/VOC/VOCdevkit/VOC2007/JPEGImages"
annpath = "collect/VOC/VOCdevkit/VOC2007/Annotations/"
savepath = "collect/VOC"
name2dir = {i:os.path.join(savepath,i) for i in need_names}
for need_name in need_names:
savedir = name2dir[need_name]
if os.path.exists(savedir):
shutil.rmtree(savedir)
os.makedirs(savedir)
imgs = [os.path.join(imgpath,i) for i in os.listdir(imgpath)]
anns = [os.path.join(annpath,i) for i in os.listdir(annpath)]
imgnames = [os.path.splitext(os.path.basename(i))[0] for i in imgs]
imgs2name={ os.path.splitext(os.path.basename(i))[0]:os.path.basename(i) for i in imgs }
annnames = [os.path.splitext(os.path.basename(i))[0] for i in anns]
comname = list(set(imgnames) & set(annnames))
for name in tqdm.tqdm(comname):
imgdir = os.path.join(imgpath,imgs2name[name])
anndir = os.path.join(annpath,name+'.xml')
tree = ET.parse(open(anndir))
root = tree.getroot()
size = root.find('size')
w= int(size.find('width').text)
h= int(size.find('height').text)
if w<50 or h<50:
continue
i = 0
for obj in root.iter('object'):
cls_name = obj.find('name').text.lower()
if cls_name in need_names:
box = obj.find('bndbox')
xmin = int(box.find('xmin').text)
ymin = int(box.find('ymin').text)
xmax = int(box.find('xmax').text)
ymax = int(box.find('ymax').text)
w = max(0,xmax-xmin)
h = max(0,ymax-ymin)
if w<30 or h<30:
continue
im = Image.open(imgdir)
imcrop = im.crop((xmin,ymin,xmax,ymax))
savename = name+'_'+str(i)+'.jpg'
imcrop.save(os.path.join(name2dir[cls_name],savename),quality=100)
i+=1
else:
continue
for name,namedir in name2dir.items():
print(f"{name} img num:{len(os.listdir(namedir))}")
100%|██████████| 9963/9963 [00:16<00:00, 591.14it/s]
bicycle img num:742
motorbike img num:715
bottle img num:705
chair img num:2477
tvmonitor img num:669
pottedplant img num:1030
2 、接着做COCO数据集的分类数据获取
import numpy as np
import tqdm
from PIL import Image
from pycocotools.coco import COCO
'''
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
'''
annpath = "collect/COCO/annotations/instances_val2017.json"
imgpath = "collect/COCO/val2017/"
savepath = "collect/COCO"
need_names=["bicycle",'motorcycle']
for need_name in need_names:
savedir = os.path.join(savepath,need_name)
if os.path.exists(savedir):
shutil.rmtree(savedir)
os.makedirs(savedir)
data_source = COCO(annotation_file=annpath)
catIds = data_source.getCatIds()
categories = data_source.loadCats(catIds)
categories.sort(key=lambda x: x['id'])
classes2Id = {}
classes2catId = {}
catId2classes = {}
catId2Id={}
Id2catId ={}
for i,cat in enumerate(categories):
Id2catId[i]=cat['id']
catId2Id[cat['id']]=i
classes2Id[cat['name']]=i
classes2catId[cat['name']]=cat['id']
catId2classes[cat['id']]=cat['name']
img_ids = data_source.getImgIds()
print("num images:",len(img_ids))
for index, img_id in tqdm.tqdm(enumerate(img_ids), desc='crop needed classes'):
img_info = data_source.loadImgs(img_id)[0]
img_name = img_info['file_name']
img_path = os.path.join(imgpath,img_name)
file_name = os.path.splitext(img_name)[0]
height = img_info['height']
width = img_info['width']
if width<80 or height<80:
continue
annotation_id = data_source.getAnnIds(img_id)
if len(annotation_id) == 0:
continue
annotations = data_source.loadAnns(annotation_id)
i = 0
for annotation in annotations:
catId = annotation['category_id']
cls_name = catId2classes[catId]
if cls_name in need_names:
box = annotation['bbox']
if box[2] < 1 or box[3] < 1:
continue
xmin = max(0,box[0])
ymin = max(0,box[1])
xmax = min(box[0]+box[2],width)
ymax = min(box[1]+box[3],height)
w = xmax-xmin
h = ymax-ymin
if w<30 or h<30:
continue
im = Image.open(img_path)
crop = im.crop((xmin,ymin,xmax,ymax))
save_path = os.path.join(savepath,cls_name,file_name+'_'+str(i)+'.jpg')
crop.save(save_path,quality=100)
i+=1
else:
continue
for need_name in need_names:
print(f"{need_name} img num:{len(os.listdir(os.path.join(savepath,need_name)))}")
print('finish')
loading annotations into memory...
Done (t=0.38s)
creating index...
index created!
num images: 5000
crop needed classes: 5000it [00:02, 2112.55it/s]
bicycle img num:196
motorcycle img num:300
finish
3、YOLO 格式数据
以下的代码没做太多修改,是在实际中使用的,同时处理了两个yolo文件夹,用了多进程,也对于图片与标签文件的文件名关系做了字典的处理,而不是使用glob是因为这样处理速度会快。
import os
import tqdm
from glob import glob
from multiprocessing import Process
import numpy as np
import shutil
import cv2
# imgpaths = ['ebike/electric_bicycle/images_all',"ebike/luping_buchong/images"]
# annpaths = ['ebike/electric_bicycle/labels_all',"ebike/luping_buchong/labels"]
imgpaths = ['ebike/electric_bicycle/images',"ebike/luping_buchong/images"]
annpaths = ['ebike/electric_bicycle/labels',"ebike/luping_buchong/labels"]
all_imgs=[]
all_labels=[]
for i in range(len(imgpaths)):
imgpath=imgpaths[i]
annpath=annpaths[i]
imgfiles=[os.path.splitext(name)[0] for name in os.listdir(imgpath)]
imgsuffixes=[os.path.splitext(name)[1] for name in os.listdir(imgpath)]
name_add_fix =dict(zip(imgfiles,imgsuffixes))
annfiles=[os.path.splitext(name)[0] for name in os.listdir(annpath)]
comfiles = list(set(imgfiles) & set(annfiles))
for comfile in tqdm.tqdm(comfiles):
img = os.path.join(imgpath,comfile+name_add_fix[comfile])
ann = os.path.join(annpath,comfile+'.txt')
all_imgs.append(img)
all_labels.append(ann)
print("img num",len(all_imgs))
print("ann num",len(all_labels))
with open('classes.txt','r') as f:
classes = f.read().strip().splitlines()
classes=['ebike']
cls_path = 'ebike/ebike_classification_need'
if os.path.exists(cls_path):
shutil.rmtree(cls_path)
for class_name in classes:
class_path = os.path.join(cls_path,class_name)
os.makedirs(class_path,exist_ok=False)
id2class={}
for id,name in enumerate(classes):
id2class[str(id)]=name
print(id2class)
def crop_img(img_paths,label_paths,id2class,cls_path):
for img_path,label_path in tqdm.tqdm(zip(img_paths,label_paths),total=len(img_paths)):
img_name,img_suffix = os.path.splitext(os.path.split(img_path)[-1])
with open(label_path,'r') as f:
lines = f.read().strip().splitlines()
for i,line in enumerate(lines):
# if len(line.split())!=5:
# print(line,i,label_path)
labelid,x,y,w,h=map(float,line.split()[:5])
if int(labelid) !=0:
continue
else:
label = id2class[str(int(labelid))]
img = np.fromfile(img_path,np.uint8)
img = cv2.imdecode(img,1)
if img is None:
continue
imgh,imgw,_ = img.shape
x=x*imgw
y=y*imgh
w=w*imgw
h=h*imgh
rate = w*1.0/h
if 0.2<rate<5:
xmin = max(int(x-w/2),0)
ymin = max(int(y-h/2),0)
xmax = min(int(x+w/2),int(imgw))
ymax = min(int(y+h/2),int(imgh))
imgpart = img[ymin:ymax,xmin:xmax,:]
imgpath = os.path.join(cls_path,label,img_name+f"_{i}"+img_suffix)
try:
cv2.imencode('.jpg',imgpart)[1].tofile(imgpath)
except:
print("non img",imgpath)
num = 40
parts = np.linspace(0,len(all_imgs),num+1).astype(np.int32)
processes=[]
for i in range(num):
img_paths = all_imgs[parts[i]:parts[i+1]]
label_paths = all_labels[parts[i]:parts[i+1]]
p = Process(target=crop_img,args=(img_paths,label_paths,id2class,cls_path))
p.start()
processes.append(p)
for pp in processes:
pp.join()
4、openimage数据获取
import pandas as pd
from PIL import Image
import os
import tqdm
import shutil
import numpy as np
从数据集是获取我们需要的分类只是因为要对我们的数据做扩充,openimage中有些图是没标签的,所在我们统计出来的image_id数量是少于真实图片数量
获取标签
classes = pd.read_csv('openimage/labels/metadate/class-descriptions-boxable.csv',names=['labelname','displayname'])
classes.head()
| labelname | displayname |
---|
0 | /m/011k07 | Tortoise |
---|
1 | /m/011q46kg | Container |
---|
2 | /m/012074 | Magpie |
---|
3 | /m/0120dh | Sea turtle |
---|
4 | /m/01226z | Football |
---|
names = classes['displayname'].tolist()
temp = [i+'\n' for i in names]
with open('names.txt','w') as f:
f.writelines(temp)
根据displayname 获取 labelname 并指定我们想要的类别
可以查看names.txt,有我们想要的分类,如bicycle或含这个词的类别
need_names = [i for i in names if 'cycle' in i.lower()]
print(need_names)
['Bicycle', 'Bicycle wheel', 'Stationary bicycle', 'Bicycle helmet', 'Motorcycle', 'Unicycle']
need_labelnames = []
for need_name in need_names:
a=classes['displayname']==need_name
b=classes[a].iloc[0,:].tolist()
need_labelnames.append(b)
print(need_labelnames)
[['/m/0199g', 'Bicycle'], ['/m/01bqk0', 'Bicycle wheel'], ['/m/03kt2w', 'Stationary bicycle'], ['/m/03p3bw', 'Bicycle helmet'], ['/m/04_sv', 'Motorcycle'], ['/m/0f6nr', 'Unicycle']]
labelname2displayname = {}
for i in need_labelnames:
key = i[0]
value=i[1].replace(" ","_")
labelname2displayname[key]=value
print(labelname2displayname)
{'/m/0199g': 'Bicycle', '/m/01bqk0': 'Bicycle_wheel', '/m/03kt2w': 'Stationary_bicycle', '/m/03p3bw': 'Bicycle_helmet', '/m/04_sv': 'Motorcycle', '/m/0f6nr': 'Unicycle'}
根据标签名找到对应的图片名称
测试集
先获取小的数据集,从测试集到验证集最后再做训练集
annfile = 'openimage/labels/detection/test-annotations-bbox.csv'
anns = pd.read_csv(annfile)
anns.head()
| ImageID | Source | LabelName | Confidence | XMin | XMax | YMin | YMax | IsOccluded | IsTruncated | IsGroupOf | IsDepiction | IsInside |
---|
0 | 000026e7ee790996 | xclick | /m/07j7r | 1 | 0.071875 | 0.145313 | 0.206250 | 0.391667 | 0 | 1 | 1 | 0 | 0 |
---|
1 | 000026e7ee790996 | xclick | /m/07j7r | 1 | 0.439063 | 0.571875 | 0.264583 | 0.435417 | 0 | 1 | 1 | 0 | 0 |
---|
2 | 000026e7ee790996 | xclick | /m/07j7r | 1 | 0.668750 | 1.000000 | 0.000000 | 0.552083 | 0 | 1 | 1 | 0 | 0 |
---|
3 | 000062a39995e348 | xclick | /m/015p6 | 1 | 0.206208 | 0.849224 | 0.154639 | 1.000000 | 0 | 0 | 0 | 0 | 0 |
---|
4 | 000062a39995e348 | xclick | /m/05s2s | 1 | 0.137472 | 0.376940 | 0.000000 | 0.883652 | 1 | 1 | 0 | 0 | 0 |
---|
total_num = anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(anns['ImageID'].to_list()))
print("total img num:",total_numimg)
total ann num: 937327
total img num: 112194
new_need_labelnames = [i[0] for i in need_labelnames]
old_a = np.zeros((anns.shape[0],)).astype(bool)
for i in new_need_labelnames:
a=anns['LabelName']==i
a = a.to_numpy()
old_a = a | old_a
print("total num :",np.sum(old_a))
need_anns = anns[old_a]
total num : 4757
need_anns.head()
| ImageID | Source | LabelName | Confidence | XMin | XMax | YMin | YMax | IsOccluded | IsTruncated | IsGroupOf | IsDepiction | IsInside |
---|
323 | 0013a0927e6bbefc | xclick | /m/04_sv | 1 | 0.192188 | 0.600000 | 0.306250 | 0.629167 | 0 | 0 | 0 | 0 | 0 |
---|
522 | 001d1da154d00e83 | xclick | /m/0199g | 1 | 0.199413 | 0.439883 | 0.681063 | 0.953488 | 1 | 0 | 0 | 0 | 0 |
---|
523 | 001d1da154d00e83 | xclick | /m/0199g | 1 | 0.425220 | 0.492669 | 0.807309 | 0.880399 | 1 | 0 | 0 | 0 | 0 |
---|
524 | 001d1da154d00e83 | xclick | /m/0199g | 1 | 0.548387 | 0.780059 | 0.677741 | 0.943522 | 1 | 0 | 0 | 0 | 0 |
---|
1177 | 003c2b6816ba9d22 | xclick | /m/03p3bw | 1 | 0.336780 | 0.497784 | 0.143488 | 0.348786 | 0 | 0 | 0 | 0 | 0 |
---|
for i in need_names:
save_path = os.path.join('openimage/test_imgs',i.replace(" ","_"))
if os.path.exists(save_path):
shutil.rmtree(save_path)
os.makedirs(save_path)
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
image_id = need_anns.iloc[i,0]
image_dir = os.path.join('test',image_id)
all_imageids.append(image_dir)
bbox = need_anns.iloc[i,4:8].to_numpy().astype(np.float32)
labelname = need_anns.iloc[i,2]
displayname = labelname2displayname[labelname]
try:
img = Image.open(os.path.join('openimage',image_dir+'.jpg'))
w,h = img.size
xmin = int(w*bbox[0])
xmax = int(w*bbox[1])
ymin = int(h*bbox[2])
ymax = int(h*bbox[3])
crop = img.crop((xmin,ymin,xmax,ymax))
save_name = image_id+"_"+str(i)+'.jpg'
save_path = os.path.join('openimage/test_imgs',displayname,save_name)
crop.save(save_path,quality=100)
except Exception as e:
print("wrong image id:",e)
continue
for i in need_names:
name = i.replace(" ","_")
save_path = os.path.join('openimage/test_imgs',name)
print(f"{name} image num:{len(os.listdir(save_path))}")
new_all_imageids = [i+'\n' for i in set(all_imageids)]
img_names=os.path.join('openimage','test_images.txt')
if os.path.exists(img_names):
os.remove(img_names)
with open(img_names,'w') as f:
f.writelines(new_all_imageids)
100%|██████████| 4757/4757 [00:45<00:00, 103.85it/s]
Bicycle image num:1203
Bicycle_wheel image num:2314
Stationary_bicycle image num:47
Bicycle_helmet image num:492
Motorcycle image num:683
Unicycle image num:18
验证集
以上就完成了openimage test 数据集的获取,同样着道理只要获取val train即可,如下是val:
annfile = 'openimage/labels/detection/validation-annotations-bbox.csv'
anns = pd.read_csv(annfile)
anns.head()
| ImageID | Source | LabelName | Confidence | XMin | XMax | YMin | YMax | IsOccluded | IsTruncated | IsGroupOf | IsDepiction | IsInside |
---|
0 | 0001eeaf4aed83f9 | xclick | /m/0cmf2 | 1 | 0.022673 | 0.964201 | 0.071038 | 0.800546 | 0 | 0 | 0 | 0 | 0 |
---|
1 | 000595fe6fee6369 | xclick | /m/02wbm | 1 | 0.000000 | 1.000000 | 0.000000 | 1.000000 | 0 | 0 | 1 | 0 | 0 |
---|
2 | 000595fe6fee6369 | xclick | /m/02xwb | 1 | 0.141384 | 0.179676 | 0.676275 | 0.731707 | 0 | 0 | 0 | 0 | 0 |
---|
3 | 000595fe6fee6369 | xclick | /m/02xwb | 1 | 0.213549 | 0.253314 | 0.299335 | 0.354767 | 1 | 0 | 0 | 0 | 0 |
---|
4 | 000595fe6fee6369 | xclick | /m/02xwb | 1 | 0.232695 | 0.288660 | 0.490022 | 0.545455 | 1 | 0 | 0 | 0 | 0 |
---|
total_num = anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(anns['ImageID'].to_list()))
print("total img num:",total_numimg)
total ann num: 303980
total img num: 37306
new_need_labelnames = [i[0] for i in need_labelnames]
old_a = np.zeros((anns.shape[0],)).astype(bool)
for i in new_need_labelnames:
a=anns['LabelName']==i
a = a.to_numpy()
old_a = a | old_a
print("total num :",np.sum(old_a))
need_anns = anns[old_a]
total num : 1629
total_num = need_anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(need_anns['ImageID'].to_list()))
print("total img num:",total_numimg)
total ann num: 1629
total img num: 445
need_anns.head()
| ImageID | Source | LabelName | Confidence | XMin | XMax | YMin | YMax | IsOccluded | IsTruncated | IsGroupOf | IsDepiction | IsInside |
---|
226 | 001a995c1e25d892 | xclick | /m/04_sv | 1 | 0.101562 | 0.876563 | 0.114583 | 0.945833 | 0 | 0 | 0 | 0 | 0 |
---|
433 | 00575b9132bb3746 | xclick | /m/03p3bw | 1 | 0.370206 | 0.513274 | 0.044248 | 0.307522 | 1 | 0 | 0 | 0 | 0 |
---|
434 | 00575b9132bb3746 | xclick | /m/0199g | 1 | 0.000000 | 0.508850 | 0.000000 | 0.681416 | 0 | 1 | 0 | 0 | 0 |
---|
457 | 00575b9132bb3746 | xclick | /m/01bqk0 | 1 | 0.000000 | 0.194690 | 0.183628 | 0.676991 | 1 | 1 | 0 | 0 | 0 |
---|
458 | 00575b9132bb3746 | xclick | /m/01bqk0 | 1 | 0.129794 | 0.300885 | 0.539823 | 0.960177 | 1 | 0 | 0 | 0 | 0 |
---|
for i in need_names:
save_path = os.path.join('openimage/validation_imgs',i.replace(" ","_"))
if os.path.exists(save_path):
shutil.rmtree(save_path)
os.makedirs(save_path)
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
image_id = need_anns.iloc[i,0]
image_dir = os.path.join('validation',image_id)
all_imageids.append(image_dir)
bbox = need_anns.iloc[i,4:8].to_numpy().astype(np.float32)
labelname = need_anns.iloc[i,2]
displayname = labelname2displayname[labelname]
try:
img = Image.open(os.path.join('openimage',image_dir+'.jpg'))
w,h = img.size
xmin = int(w*bbox[0])
xmax = int(w*bbox[1])
ymin = int(h*bbox[2])
ymax = int(h*bbox[3])
crop = img.crop((xmin,ymin,xmax,ymax))
save_name = image_id+"_"+str(i)+'.jpg'
save_path = os.path.join('openimage/validation_imgs',displayname,save_name)
crop.save(save_path,quality=100)
except Exception as e:
print("wrong image id:",e)
continue
for i in need_names:
name = i.replace(" ","_")
save_path = os.path.join('openimage/validation_imgs',name)
print(f"{name} image num:{len(os.listdir(save_path))}")
new_all_imageids = [i+'\n' for i in set(all_imageids)]
img_names=os.path.join('openimage','validation_images.txt')
if os.path.exists(img_names):
os.remove(img_names)
with open(img_names,'w') as f:
f.writelines(new_all_imageids)
100%|██████████| 1629/1629 [00:27<00:00, 59.82it/s]
Bicycle image num:418
Bicycle_wheel image num:780
Stationary_bicycle image num:10
Bicycle_helmet image num:187
Motorcycle image num:232
Unicycle image num:2
从上边两个数据集也可以看出,我们从大量的图片中只获取少量是我们需要的,这个速度很慢,而且是前提我们把所有图片都给下载了下来大概有570G左右(下载了好几天),很费力气。事实上可以只下载标签,然后生成我们需要的image id,如上边保存的两个test_images.txt 和validation_images.txt 这两个文件是可以利用官网提供的工具下载我们只需要的那部分图片的,因为训练集特别大,所在采用这种方法,事实上是所有数据集都推荐这种方法。这个工作可以参考 https://www.jianshu.com/p/40b58833af22
训练集
annfile = 'openimage/labels/detection/oidv6-train-annotations-bbox.csv'
anns = pd.read_csv(annfile)
anns.head()
| ImageID | Source | LabelName | Confidence | XMin | XMax | YMin | YMax | IsOccluded | IsTruncated | ... | IsDepiction | IsInside | XClick1X | XClick2X | XClick3X | XClick4X | XClick1Y | XClick2Y | XClick3Y | XClick4Y |
---|
0 | 000002b66c9c498e | xclick | /m/01g317 | 1 | 0.012500 | 0.195312 | 0.148438 | 0.587500 | 0 | 1 | ... | 0 | 0 | 0.148438 | 0.012500 | 0.059375 | 0.195312 | 0.148438 | 0.357812 | 0.587500 | 0.325000 |
---|
1 | 000002b66c9c498e | xclick | /m/01g317 | 1 | 0.025000 | 0.276563 | 0.714063 | 0.948438 | 0 | 1 | ... | 0 | 0 | 0.025000 | 0.248438 | 0.276563 | 0.214062 | 0.914062 | 0.714063 | 0.782813 | 0.948438 |
---|
2 | 000002b66c9c498e | xclick | /m/01g317 | 1 | 0.151562 | 0.310937 | 0.198437 | 0.590625 | 1 | 0 | ... | 0 | 0 | 0.243750 | 0.151562 | 0.310937 | 0.262500 | 0.198437 | 0.434375 | 0.507812 | 0.590625 |
---|
3 | 000002b66c9c498e | xclick | /m/01g317 | 1 | 0.256250 | 0.429688 | 0.651563 | 0.925000 | 1 | 0 | ... | 0 | 0 | 0.315625 | 0.429688 | 0.256250 | 0.423438 | 0.651563 | 0.921875 | 0.826562 | 0.925000 |
---|
4 | 000002b66c9c498e | xclick | /m/01g317 | 1 | 0.257812 | 0.346875 | 0.235938 | 0.385938 | 1 | 0 | ... | 0 | 0 | 0.317188 | 0.257812 | 0.346875 | 0.307812 | 0.235938 | 0.289062 | 0.348438 | 0.385938 |
---|
5 rows × 21 columns
total_num = anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(anns['ImageID'].to_list()))
print("total img num:",total_numimg)
total ann num: 14610229
total img num: 1743042
new_need_labelnames = [i[0] for i in need_labelnames]
old_a = np.zeros((anns.shape[0],)).astype(bool)
for i in new_need_labelnames:
a=anns['LabelName']==i
a = a.to_numpy()
old_a = a | old_a
print("total num :",np.sum(old_a))
need_anns = anns[old_a]
total num : 129548
total_num = need_anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(need_anns['ImageID'].to_list()))
print("total img num:",total_numimg)
total ann num: 129548
total img num: 26694
need_anns.head()
| ImageID | Source | LabelName | Confidence | XMin | XMax | YMin | YMax | IsOccluded | IsTruncated | ... | IsDepiction | IsInside | XClick1X | XClick2X | XClick3X | XClick4X | XClick1Y | XClick2Y | XClick3Y | XClick4Y |
---|
158 | 00002f4ff380c64c | xclick | /m/0199g | 1 | 0.000000 | 0.155556 | 0.654867 | 0.876106 | 0 | 1 | ... | 0 | 0 | 0.106667 | 0.000000 | 0.001481 | 0.155556 | 0.876106 | 0.803097 | 0.654867 | 0.796460 |
---|
159 | 00002f4ff380c64c | xclick | /m/0199g | 1 | 0.168889 | 0.402963 | 0.648230 | 0.873894 | 0 | 0 | ... | 0 | 0 | 0.268148 | 0.168889 | 0.208889 | 0.402963 | 0.648230 | 0.803097 | 0.873894 | 0.809735 |
---|
160 | 00002f4ff380c64c | xclick | /m/0199g | 1 | 0.414815 | 0.659259 | 0.654867 | 0.887168 | 0 | 0 | ... | 0 | 0 | 0.608889 | 0.414815 | 0.494815 | 0.659259 | 0.887168 | 0.796460 | 0.654867 | 0.803097 |
---|
161 | 00002f4ff380c64c | xclick | /m/01bqk0 | 1 | 0.060741 | 0.151111 | 0.736726 | 0.873894 | 1 | 0 | ... | 0 | 0 | 0.103704 | 0.060741 | 0.108148 | 0.151111 | 0.736726 | 0.809735 | 0.873894 | 0.805310 |
---|
162 | 00002f4ff380c64c | xclick | /m/01bqk0 | 1 | 0.165926 | 0.260741 | 0.743363 | 0.878319 | 1 | 0 | ... | 0 | 0 | 0.210370 | 0.165926 | 0.216296 | 0.260741 | 0.743363 | 0.816372 | 0.878319 | 0.816372 |
---|
5 rows × 21 columns
for i in need_names:
save_path = os.path.join('openimage/train_imgs',i.replace(" ","_"))
if os.path.exists(save_path):
shutil.rmtree(save_path)
os.makedirs(save_path)
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
image_id = need_anns.iloc[i,0]
image_dir = os.path.join('train',image_id)
all_imageids.append(image_dir)
new_all_imageids = [i+'\n' for i in set(all_imageids)]
img_names=os.path.join('openimage','train_images.txt')
if os.path.exists(img_names):
os.remove(img_names)
with open(img_names,'w') as f:
f.writelines(new_all_imageids)
100%|██████████| 129548/129548 [00:03<00:00, 34663.79it/s]
len(new_all_imageids)
26694
下载方式命令是:
python downloader.py train_images.txt --download_folder=train --num_processes=5
下载好的图片将保存在 download_folder指定的文件下, num_processes越大,下载用的时间越少 downloader.py下载需要科学上网,所以这里写出downloader.py的内容(20220920,对应v6版本)
"""Open Images image downloader.
This script downloads a subset of Open Images images, given a list of image ids.
Typical uses of this tool might be downloading images:
- That contain a certain category.
- That have been annotated with certain types of annotations (e.g. Localized
Narratives, Exhaustively annotated people, etc.)
The input file IMAGE_LIST should be a text file containing one image per line
with the format <SPLIT>/<IMAGE_ID>, where <SPLIT> is either "train", "test",
"validation", or "challenge2018"; and <IMAGE_ID> is the image ID that uniquely
identifies the image in Open Images. A sample file could be:
train/f9e0434389a1d4dd
train/1a007563ebc18664
test/ea8bfd4e765304db
"""
import argparse
from concurrent import futures
import os
import re
import sys
import boto3
import botocore
import tqdm
BUCKET_NAME = 'open-images-dataset'
REGEX = r'(test|train|validation|challenge2018)/([a-fA-F0-9]*)'
def check_and_homogenize_one_image(image):
split, image_id = re.match(REGEX, image).groups()
yield split, image_id
def check_and_homogenize_image_list(image_list):
for line_number, image in enumerate(image_list):
try:
yield from check_and_homogenize_one_image(image)
except (ValueError, AttributeError):
raise ValueError(
f'ERROR in line {line_number} of the image list. The following image '
f'string is not recognized: "{image}".')
def read_image_list_file(image_list_file):
with open(image_list_file, 'r') as f:
for line in f:
yield line.strip().replace('.jpg', '')
def download_one_image(bucket, split, image_id, download_folder):
try:
bucket.download_file(f'{split}/{image_id}.jpg',
os.path.join(download_folder, f'{image_id}.jpg'))
except botocore.exceptions.ClientError as exception:
sys.exit(
f'ERROR when downloading image `{split}/{image_id}`: {str(exception)}')
def download_all_images(args):
"""Downloads all images specified in the input file."""
bucket = boto3.resource(
's3', config=botocore.config.Config(
signature_version=botocore.UNSIGNED)).Bucket(BUCKET_NAME)
download_folder = args['download_folder'] or os.getcwd()
if not os.path.exists(download_folder):
os.makedirs(download_folder)
try:
image_list = list(
check_and_homogenize_image_list(
read_image_list_file(args['image_list'])))
except ValueError as exception:
sys.exit(exception)
progress_bar = tqdm.tqdm(
total=len(image_list), desc='Downloading images', leave=True)
with futures.ThreadPoolExecutor(
max_workers=args['num_processes']) as executor:
all_futures = [
executor.submit(download_one_image, bucket, split, image_id,
download_folder) for (split, image_id) in image_list
]
for future in futures.as_completed(all_futures):
future.result()
progress_bar.update(1)
progress_bar.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument(
'image_list',
type=str,
default=None,
help=('Filename that contains the split + image IDs of the images to '
'download. Check the document'))
parser.add_argument(
'--num_processes',
type=int,
default=5,
help='Number of parallel processes to use (default is 5).')
parser.add_argument(
'--download_folder',
type=str,
default=None,
help='Folder where to download the images.')
download_all_images(vars(parser.parse_args()))
下载好以后图片保存在train中,我们可以做同样的处理了
for i in need_names:
save_path = os.path.join('openimage/train_imgs',i.replace(" ","_"))
if os.path.exists(save_path):
shutil.rmtree(save_path)
os.makedirs(save_path)
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
image_id = need_anns.iloc[i,0]
image_dir = os.path.join('train',image_id)
all_imageids.append(image_dir)
bbox = need_anns.iloc[i,4:8].to_numpy().astype(np.float32)
labelname = need_anns.iloc[i,2]
displayname = labelname2displayname[labelname]
try:
img = Image.open(os.path.join('openimage',image_dir+'.jpg'))
w,h = img.size
xmin = int(w*bbox[0])
xmax = int(w*bbox[1])
ymin = int(h*bbox[2])
ymax = int(h*bbox[3])
crop = img.crop((xmin,ymin,xmax,ymax))
save_name = image_id+"_"+str(i)+'.jpg'
save_path = os.path.join('openimage/train_imgs',displayname,save_name)
crop.save(save_path,quality=100)
except Exception as e:
print("wrong image id:",e)
continue
for i in need_names:
name = i.replace(" ","_")
save_path = os.path.join('openimage/train_imgs',name)
print(f"{name} image num:{len(os.listdir(save_path))}")
11%|█ | 14409/129548 [02:09<26:42, 71.85it/s]
wrong image id: cannot write empty image as JPEG
24%|██▍ | 31367/129548 [04:43<17:53, 91.42it/s]
wrong image id: cannot write empty image as JPEG
100%|██████████| 129548/129548 [20:20<00:00, 106.15it/s]
Bicycle image num:40161
Bicycle_wheel image num:59520
Stationary_bicycle image num:338
Bicycle_helmet image num:15951
Motorcycle image num:13382
Unicycle image num:194
除了以上方法还有更快的方式,上面的方法对于每个框都要重新读取一次图片,但实际上,可以读一次图把相同的框都给取出来,如下:
a= need_anns[['ImageID','LabelName','XMin','XMax','YMin','YMax']].groupby(["ImageID"])
for i,d in enumerate(a):
if i<2:
print(d[0]+":")
print(d[1])
else:
break
00002f4ff380c64c:
ImageID LabelName XMin XMax YMin YMax
158 00002f4ff380c64c /m/0199g 0.000000 0.155556 0.654867 0.876106
159 00002f4ff380c64c /m/0199g 0.168889 0.402963 0.648230 0.873894
160 00002f4ff380c64c /m/0199g 0.414815 0.659259 0.654867 0.887168
161 00002f4ff380c64c /m/01bqk0 0.060741 0.151111 0.736726 0.873894
162 00002f4ff380c64c /m/01bqk0 0.165926 0.260741 0.743363 0.878319
163 00002f4ff380c64c /m/01bqk0 0.308148 0.400000 0.743363 0.878319
164 00002f4ff380c64c /m/01bqk0 0.413333 0.505185 0.743363 0.884956
165 00002f4ff380c64c /m/01bqk0 0.561481 0.656296 0.736726 0.880531
000091f4a275d0fb:
ImageID LabelName XMin XMax YMin YMax
868 000091f4a275d0fb /m/0199g 0.017143 0.998571 0.225806 0.997849
4、object365
等补充
|