IT数码 购物 网址 头条 软件 日历 阅读 图书馆
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
图片批量下载器
↓批量下载图片,美女图库↓
图片自动播放器
↓图片自动播放器↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁
 
   -> 人工智能 -> 从目标检测数据集中扣出所需类别进行分类 -> 正文阅读

[人工智能]从目标检测数据集中扣出所需类别进行分类


做分类模型除了imagenet这些,还会有常见的目标检测数据集可以利用起来,把所需要的类别扣出来再做分类。检测数据集有VOC,COCO,openimage,object365等,对应的检测的数据集的格式也是常见的几种,VOC的xml左上右下的点,COCO的json的左上宽高,yolo数据集的中心点xywh的相值,openimage的csv 左上右下的相对值,object365的(等补充)
如下从这几个数据集中获取这几种车辆。

1、获取VOC数据集中两轮车

import tqdm
import glob
import shutil
import xml.etree.ElementTree as ET 
from PIL import Image 
# VOC有20个分类 'aeroplane', 'diningtable', 'sofa', 'bus', 'car', 'train', 'bicycle', 'horse', 'cow', 
# 'bird', 'sheep', 'chair',  'motorbike', 'pottedplant', 'tvmonitor', 'bottle', 'dog', 'boat', 'person', 'cat'
need_names = ["bicycle","motorbike","bottle","chair","tvmonitor","pottedplant"]  #指定要获取的分类
imgpath =  "collect/VOC/VOCdevkit/VOC2007/JPEGImages"  #指定图片位置
annpath =  "collect/VOC/VOCdevkit/VOC2007/Annotations/" #指定xml标注文件的位置
savepath = "collect/VOC" #指定图片要保存的位置,会按照我们要获取的分类名分别进行保存
name2dir = {i:os.path.join(savepath,i) for i in need_names}
for need_name in need_names:
    savedir = name2dir[need_name]
    if os.path.exists(savedir):
        shutil.rmtree(savedir)
    os.makedirs(savedir)
        
imgs = [os.path.join(imgpath,i) for i in os.listdir(imgpath)]
anns = [os.path.join(annpath,i) for i in os.listdir(annpath)]
imgnames = [os.path.splitext(os.path.basename(i))[0] for i in imgs]
imgs2name={ os.path.splitext(os.path.basename(i))[0]:os.path.basename(i) for i in imgs }
annnames = [os.path.splitext(os.path.basename(i))[0] for i in anns]

comname = list(set(imgnames) & set(annnames))

for name in tqdm.tqdm(comname):
    imgdir = os.path.join(imgpath,imgs2name[name])
    anndir = os.path.join(annpath,name+'.xml')
    tree = ET.parse(open(anndir))
    root = tree.getroot()
    size = root.find('size')
    w=  int(size.find('width').text)
    h=  int(size.find('height').text)
    if w<50 or h<50:
        continue
    i = 0
    for obj in root.iter('object'):
        cls_name = obj.find('name').text.lower()
        if cls_name in need_names:
            box = obj.find('bndbox')
            xmin = int(box.find('xmin').text)
            ymin = int(box.find('ymin').text)
            xmax = int(box.find('xmax').text)
            ymax = int(box.find('ymax').text)
            w = max(0,xmax-xmin)
            h = max(0,ymax-ymin)
            if w<30 or h<30:
                continue
            im = Image.open(imgdir)
            imcrop = im.crop((xmin,ymin,xmax,ymax))
            savename = name+'_'+str(i)+'.jpg'
            imcrop.save(os.path.join(name2dir[cls_name],savename),quality=100)
            i+=1
        else:
            continue
        
for name,namedir in name2dir.items():
    print(f"{name} img num:{len(os.listdir(namedir))}")
100%|██████████| 9963/9963 [00:16<00:00, 591.14it/s]

bicycle img num:742
motorbike img num:715
bottle img num:705
chair img num:2477
tvmonitor img num:669
pottedplant img num:1030

2 、接着做COCO数据集的分类数据获取

import numpy as np
import tqdm
from PIL import Image
from pycocotools.coco import COCO
#以下是80个分类
'''
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
'''
annpath = "collect/COCO/annotations/instances_val2017.json"
imgpath = "collect/COCO/val2017/"
savepath = "collect/COCO"
need_names=["bicycle",'motorcycle']
for need_name in need_names:
    savedir = os.path.join(savepath,need_name)
    if os.path.exists(savedir):
        shutil.rmtree(savedir)
    os.makedirs(savedir)

data_source = COCO(annotation_file=annpath)
catIds = data_source.getCatIds()   #获取类别id 
categories = data_source.loadCats(catIds)
categories.sort(key=lambda x: x['id'])
classes2Id = {}
classes2catId = {}
catId2classes = {}
catId2Id={}  
Id2catId ={}
for i,cat in enumerate(categories):
    Id2catId[i]=cat['id']
    catId2Id[cat['id']]=i
    classes2Id[cat['name']]=i
    classes2catId[cat['name']]=cat['id']
    catId2classes[cat['id']]=cat['name']

img_ids = data_source.getImgIds()
print("num images:",len(img_ids))
for index, img_id in tqdm.tqdm(enumerate(img_ids), desc='crop needed classes'):
    img_info = data_source.loadImgs(img_id)[0]
    img_name = img_info['file_name']
    img_path = os.path.join(imgpath,img_name)
    file_name = os.path.splitext(img_name)[0]
    height = img_info['height']
    width = img_info['width']
    if width<80 or height<80:
        continue
    annotation_id = data_source.getAnnIds(img_id)
    if len(annotation_id) == 0:
        continue
    annotations = data_source.loadAnns(annotation_id)
    i = 0
    for annotation in annotations:
        catId = annotation['category_id']
        cls_name = catId2classes[catId]
        if cls_name in need_names:
            box = annotation['bbox']
            # some annotations have basically no width / height, skip them
            if box[2] < 1 or box[3] < 1:
                continue
            #top_x,top_y,width,height---->xmin,ymin,xmax,ymax
            xmin = max(0,box[0])
            ymin = max(0,box[1])
            xmax = min(box[0]+box[2],width)
            ymax = min(box[1]+box[3],height)
            w = xmax-xmin
            h = ymax-ymin
            if w<30 or h<30:
                continue
            im = Image.open(img_path)
            crop = im.crop((xmin,ymin,xmax,ymax))
            save_path = os.path.join(savepath,cls_name,file_name+'_'+str(i)+'.jpg')
            crop.save(save_path,quality=100)
            i+=1
        else:
            continue
for need_name in need_names:    
    print(f"{need_name} img num:{len(os.listdir(os.path.join(savepath,need_name)))}")
        
print('finish')


loading annotations into memory...
Done (t=0.38s)
creating index...
index created!
num images: 5000


crop needed classes: 5000it [00:02, 2112.55it/s]

bicycle img num:196
motorcycle img num:300
finish

3、YOLO 格式数据

以下的代码没做太多修改,是在实际中使用的,同时处理了两个yolo文件夹,用了多进程,也对于图片与标签文件的文件名关系做了字典的处理,而不是使用glob是因为这样处理速度会快。

import os
import tqdm
from glob import glob
from multiprocessing import Process
import numpy as np
import shutil
import cv2

# imgpaths = ['ebike/electric_bicycle/images_all',"ebike/luping_buchong/images"]
# annpaths = ['ebike/electric_bicycle/labels_all',"ebike/luping_buchong/labels"]
imgpaths = ['ebike/electric_bicycle/images',"ebike/luping_buchong/images"]
annpaths = ['ebike/electric_bicycle/labels',"ebike/luping_buchong/labels"]

all_imgs=[]
all_labels=[]

for i in range(len(imgpaths)):
    imgpath=imgpaths[i]
    annpath=annpaths[i]
    imgfiles=[os.path.splitext(name)[0] for name in os.listdir(imgpath)]
    imgsuffixes=[os.path.splitext(name)[1] for name in os.listdir(imgpath)]
    name_add_fix =dict(zip(imgfiles,imgsuffixes))
    annfiles=[os.path.splitext(name)[0] for name in os.listdir(annpath)]
    comfiles = list(set(imgfiles) & set(annfiles))
    for comfile in tqdm.tqdm(comfiles):
        img = os.path.join(imgpath,comfile+name_add_fix[comfile])
        ann = os.path.join(annpath,comfile+'.txt')
        all_imgs.append(img)
        all_labels.append(ann)
print("img num",len(all_imgs))
print("ann num",len(all_labels))

with open('classes.txt','r') as f:
    classes = f.read().strip().splitlines()
classes=['ebike']
cls_path = 'ebike/ebike_classification_need'
if os.path.exists(cls_path):
    shutil.rmtree(cls_path)
for class_name in classes:
    class_path = os.path.join(cls_path,class_name)
    os.makedirs(class_path,exist_ok=False)
id2class={}
for id,name in enumerate(classes):
    id2class[str(id)]=name
print(id2class)

def crop_img(img_paths,label_paths,id2class,cls_path):
    for img_path,label_path in tqdm.tqdm(zip(img_paths,label_paths),total=len(img_paths)):
        img_name,img_suffix = os.path.splitext(os.path.split(img_path)[-1])
        with open(label_path,'r') as f:
            lines = f.read().strip().splitlines()
        for i,line in enumerate(lines):
            # if len(line.split())!=5:
            #     print(line,i,label_path)
            labelid,x,y,w,h=map(float,line.split()[:5])
            if int(labelid) !=0:
                continue
            else:
                label = id2class[str(int(labelid))]
                img = np.fromfile(img_path,np.uint8)
                img = cv2.imdecode(img,1)
                if img is None:
                    continue
                imgh,imgw,_ = img.shape
                x=x*imgw
                y=y*imgh
                w=w*imgw
                h=h*imgh
                rate = w*1.0/h
                if 0.2<rate<5:
                    xmin = max(int(x-w/2),0)
                    ymin = max(int(y-h/2),0)
                    xmax = min(int(x+w/2),int(imgw))
                    ymax = min(int(y+h/2),int(imgh))
                    imgpart = img[ymin:ymax,xmin:xmax,:]
                    imgpath = os.path.join(cls_path,label,img_name+f"_{i}"+img_suffix)
                    try:
                        cv2.imencode('.jpg',imgpart)[1].tofile(imgpath)
                    except:
                        print("non img",imgpath)

num = 40
parts = np.linspace(0,len(all_imgs),num+1).astype(np.int32)
processes=[]
for i in range(num):
    img_paths = all_imgs[parts[i]:parts[i+1]]
    label_paths = all_labels[parts[i]:parts[i+1]]
    p = Process(target=crop_img,args=(img_paths,label_paths,id2class,cls_path))
    p.start()
    processes.append(p)
for pp in processes:
    pp.join()

4、openimage数据获取

import pandas as pd
from PIL import Image
import os
import tqdm
import shutil
import numpy as np

从数据集是获取我们需要的分类只是因为要对我们的数据做扩充,openimage中有些图是没标签的,所在我们统计出来的image_id数量是少于真实图片数量

获取标签

classes = pd.read_csv('openimage/labels/metadate/class-descriptions-boxable.csv',names=['labelname','displayname'])
classes.head()
labelnamedisplayname
0/m/011k07Tortoise
1/m/011q46kgContainer
2/m/012074Magpie
3/m/0120dhSea turtle
4/m/01226zFootball
names = classes['displayname'].tolist()
temp = [i+'\n' for i in names]
with open('names.txt','w') as f:
    f.writelines(temp)

根据displayname 获取 labelname 并指定我们想要的类别

可以查看names.txt,有我们想要的分类,如bicycle或含这个词的类别


need_names = [i for i in names if 'cycle' in i.lower()]
print(need_names)
['Bicycle', 'Bicycle wheel', 'Stationary bicycle', 'Bicycle helmet', 'Motorcycle', 'Unicycle']
need_labelnames = []
for need_name in need_names:
    a=classes['displayname']==need_name
    b=classes[a].iloc[0,:].tolist()
    need_labelnames.append(b)
print(need_labelnames)
[['/m/0199g', 'Bicycle'], ['/m/01bqk0', 'Bicycle wheel'], ['/m/03kt2w', 'Stationary bicycle'], ['/m/03p3bw', 'Bicycle helmet'], ['/m/04_sv', 'Motorcycle'], ['/m/0f6nr', 'Unicycle']]
labelname2displayname = {}
for i in need_labelnames:
    key = i[0]
    value=i[1].replace(" ","_")
    labelname2displayname[key]=value
print(labelname2displayname)
{'/m/0199g': 'Bicycle', '/m/01bqk0': 'Bicycle_wheel', '/m/03kt2w': 'Stationary_bicycle', '/m/03p3bw': 'Bicycle_helmet', '/m/04_sv': 'Motorcycle', '/m/0f6nr': 'Unicycle'}

根据标签名找到对应的图片名称

测试集

先获取小的数据集,从测试集到验证集最后再做训练集


annfile = 'openimage/labels/detection/test-annotations-bbox.csv'

anns = pd.read_csv(annfile)
anns.head()
ImageIDSourceLabelNameConfidenceXMinXMaxYMinYMaxIsOccludedIsTruncatedIsGroupOfIsDepictionIsInside
0000026e7ee790996xclick/m/07j7r10.0718750.1453130.2062500.39166701100
1000026e7ee790996xclick/m/07j7r10.4390630.5718750.2645830.43541701100
2000026e7ee790996xclick/m/07j7r10.6687501.0000000.0000000.55208301100
3000062a39995e348xclick/m/015p610.2062080.8492240.1546391.00000000000
4000062a39995e348xclick/m/05s2s10.1374720.3769400.0000000.88365211000
total_num = anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(anns['ImageID'].to_list()))
print("total img num:",total_numimg)

total ann num: 937327
total img num: 112194
new_need_labelnames = [i[0] for i in need_labelnames]
old_a = np.zeros((anns.shape[0],)).astype(bool)
for i in new_need_labelnames:
    a=anns['LabelName']==i
    a = a.to_numpy()

    old_a = a | old_a
print("total num :",np.sum(old_a))
need_anns = anns[old_a]
    
    # b=classes[a].iloc[0,:].tolist()
    # need_labelnames.append(b)
total num : 4757
need_anns.head()
ImageIDSourceLabelNameConfidenceXMinXMaxYMinYMaxIsOccludedIsTruncatedIsGroupOfIsDepictionIsInside
3230013a0927e6bbefcxclick/m/04_sv10.1921880.6000000.3062500.62916700000
522001d1da154d00e83xclick/m/0199g10.1994130.4398830.6810630.95348810000
523001d1da154d00e83xclick/m/0199g10.4252200.4926690.8073090.88039910000
524001d1da154d00e83xclick/m/0199g10.5483870.7800590.6777410.94352210000
1177003c2b6816ba9d22xclick/m/03p3bw10.3367800.4977840.1434880.34878600000
for i in need_names:
    save_path = os.path.join('openimage/test_imgs',i.replace(" ","_"))
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)
    
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
    image_id = need_anns.iloc[i,0]
    image_dir = os.path.join('test',image_id)
    all_imageids.append(image_dir)
    bbox = need_anns.iloc[i,4:8].to_numpy().astype(np.float32)
    labelname = need_anns.iloc[i,2]
    displayname = labelname2displayname[labelname]
    try:
        img = Image.open(os.path.join('openimage',image_dir+'.jpg'))
        w,h = img.size 
        xmin = int(w*bbox[0])
        xmax = int(w*bbox[1])
        ymin = int(h*bbox[2])
        ymax = int(h*bbox[3])
        crop = img.crop((xmin,ymin,xmax,ymax))
        save_name = image_id+"_"+str(i)+'.jpg'
        save_path = os.path.join('openimage/test_imgs',displayname,save_name)
        crop.save(save_path,quality=100)
    except Exception as e:
        print("wrong image id:",e)
        continue
for i in need_names:
    name = i.replace(" ","_")
    save_path = os.path.join('openimage/test_imgs',name)
    print(f"{name} image num:{len(os.listdir(save_path))}")
new_all_imageids = [i+'\n' for i in set(all_imageids)]

img_names=os.path.join('openimage','test_images.txt')
if os.path.exists(img_names):
    os.remove(img_names)
with open(img_names,'w') as f:
    f.writelines(new_all_imageids)
100%|██████████| 4757/4757 [00:45<00:00, 103.85it/s]

Bicycle image num:1203
Bicycle_wheel image num:2314
Stationary_bicycle image num:47
Bicycle_helmet image num:492
Motorcycle image num:683
Unicycle image num:18

验证集

以上就完成了openimage test 数据集的获取,同样着道理只要获取val train即可,如下是val:

annfile = 'openimage/labels/detection/validation-annotations-bbox.csv'

anns = pd.read_csv(annfile)
anns.head()
ImageIDSourceLabelNameConfidenceXMinXMaxYMinYMaxIsOccludedIsTruncatedIsGroupOfIsDepictionIsInside
00001eeaf4aed83f9xclick/m/0cmf210.0226730.9642010.0710380.80054600000
1000595fe6fee6369xclick/m/02wbm10.0000001.0000000.0000001.00000000100
2000595fe6fee6369xclick/m/02xwb10.1413840.1796760.6762750.73170700000
3000595fe6fee6369xclick/m/02xwb10.2135490.2533140.2993350.35476710000
4000595fe6fee6369xclick/m/02xwb10.2326950.2886600.4900220.54545510000
total_num = anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(anns['ImageID'].to_list()))
print("total img num:",total_numimg)
total ann num: 303980
total img num: 37306
new_need_labelnames = [i[0] for i in need_labelnames]
old_a = np.zeros((anns.shape[0],)).astype(bool)
for i in new_need_labelnames:
    a=anns['LabelName']==i
    a = a.to_numpy()

    old_a = a | old_a
print("total num :",np.sum(old_a))
need_anns = anns[old_a]

total num : 1629
total_num = need_anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(need_anns['ImageID'].to_list()))
print("total img num:",total_numimg)
total ann num: 1629
total img num: 445
need_anns.head()
ImageIDSourceLabelNameConfidenceXMinXMaxYMinYMaxIsOccludedIsTruncatedIsGroupOfIsDepictionIsInside
226001a995c1e25d892xclick/m/04_sv10.1015620.8765630.1145830.94583300000
43300575b9132bb3746xclick/m/03p3bw10.3702060.5132740.0442480.30752210000
43400575b9132bb3746xclick/m/0199g10.0000000.5088500.0000000.68141601000
45700575b9132bb3746xclick/m/01bqk010.0000000.1946900.1836280.67699111000
45800575b9132bb3746xclick/m/01bqk010.1297940.3008850.5398230.96017710000
for i in need_names:
    save_path = os.path.join('openimage/validation_imgs',i.replace(" ","_"))
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)
    
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
    image_id = need_anns.iloc[i,0]
    image_dir = os.path.join('validation',image_id)
    all_imageids.append(image_dir)
    bbox = need_anns.iloc[i,4:8].to_numpy().astype(np.float32)
    labelname = need_anns.iloc[i,2]
    displayname = labelname2displayname[labelname]
    try:
        img = Image.open(os.path.join('openimage',image_dir+'.jpg'))
        w,h = img.size 
        xmin = int(w*bbox[0])
        xmax = int(w*bbox[1])
        ymin = int(h*bbox[2])
        ymax = int(h*bbox[3])
        crop = img.crop((xmin,ymin,xmax,ymax))
        save_name = image_id+"_"+str(i)+'.jpg'
        save_path = os.path.join('openimage/validation_imgs',displayname,save_name)
        crop.save(save_path,quality=100)
    except Exception as e:
        print("wrong image id:",e)
        continue
for i in need_names:
    name = i.replace(" ","_")
    save_path = os.path.join('openimage/validation_imgs',name)
    print(f"{name} image num:{len(os.listdir(save_path))}")
new_all_imageids = [i+'\n' for i in set(all_imageids)]

img_names=os.path.join('openimage','validation_images.txt')
if os.path.exists(img_names):
    os.remove(img_names)
with open(img_names,'w') as f:
    f.writelines(new_all_imageids)
100%|██████████| 1629/1629 [00:27<00:00, 59.82it/s] 

Bicycle image num:418
Bicycle_wheel image num:780
Stationary_bicycle image num:10
Bicycle_helmet image num:187
Motorcycle image num:232
Unicycle image num:2

从上边两个数据集也可以看出,我们从大量的图片中只获取少量是我们需要的,这个速度很慢,而且是前提我们把所有图片都给下载了下来大概有570G左右(下载了好几天),很费力气。事实上可以只下载标签,然后生成我们需要的image id,如上边保存的两个test_images.txt 和validation_images.txt 这两个文件是可以利用官网提供的工具下载我们只需要的那部分图片的,因为训练集特别大,所在采用这种方法,事实上是所有数据集都推荐这种方法。这个工作可以参考 https://www.jianshu.com/p/40b58833af22

训练集

#这个要好长时间,耐心等待
annfile = 'openimage/labels/detection/oidv6-train-annotations-bbox.csv'

anns = pd.read_csv(annfile)
anns.head()
ImageIDSourceLabelNameConfidenceXMinXMaxYMinYMaxIsOccludedIsTruncated...IsDepictionIsInsideXClick1XXClick2XXClick3XXClick4XXClick1YXClick2YXClick3YXClick4Y
0000002b66c9c498exclick/m/01g31710.0125000.1953120.1484380.58750001...000.1484380.0125000.0593750.1953120.1484380.3578120.5875000.325000
1000002b66c9c498exclick/m/01g31710.0250000.2765630.7140630.94843801...000.0250000.2484380.2765630.2140620.9140620.7140630.7828130.948438
2000002b66c9c498exclick/m/01g31710.1515620.3109370.1984370.59062510...000.2437500.1515620.3109370.2625000.1984370.4343750.5078120.590625
3000002b66c9c498exclick/m/01g31710.2562500.4296880.6515630.92500010...000.3156250.4296880.2562500.4234380.6515630.9218750.8265620.925000
4000002b66c9c498exclick/m/01g31710.2578120.3468750.2359380.38593810...000.3171880.2578120.3468750.3078120.2359380.2890620.3484380.385938

5 rows × 21 columns

total_num = anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(anns['ImageID'].to_list()))
print("total img num:",total_numimg)
total ann num: 14610229
total img num: 1743042
new_need_labelnames = [i[0] for i in need_labelnames]
old_a = np.zeros((anns.shape[0],)).astype(bool)
for i in new_need_labelnames:
    a=anns['LabelName']==i
    a = a.to_numpy()

    old_a = a | old_a
print("total num :",np.sum(old_a))
need_anns = anns[old_a]
total num : 129548
total_num = need_anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(need_anns['ImageID'].to_list()))
print("total img num:",total_numimg)
total ann num: 129548
total img num: 26694
need_anns.head()
ImageIDSourceLabelNameConfidenceXMinXMaxYMinYMaxIsOccludedIsTruncated...IsDepictionIsInsideXClick1XXClick2XXClick3XXClick4XXClick1YXClick2YXClick3YXClick4Y
15800002f4ff380c64cxclick/m/0199g10.0000000.1555560.6548670.87610601...000.1066670.0000000.0014810.1555560.8761060.8030970.6548670.796460
15900002f4ff380c64cxclick/m/0199g10.1688890.4029630.6482300.87389400...000.2681480.1688890.2088890.4029630.6482300.8030970.8738940.809735
16000002f4ff380c64cxclick/m/0199g10.4148150.6592590.6548670.88716800...000.6088890.4148150.4948150.6592590.8871680.7964600.6548670.803097
16100002f4ff380c64cxclick/m/01bqk010.0607410.1511110.7367260.87389410...000.1037040.0607410.1081480.1511110.7367260.8097350.8738940.805310
16200002f4ff380c64cxclick/m/01bqk010.1659260.2607410.7433630.87831910...000.2103700.1659260.2162960.2607410.7433630.8163720.8783190.816372

5 rows × 21 columns

for i in need_names:
    save_path = os.path.join('openimage/train_imgs',i.replace(" ","_"))
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)
    
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
    image_id = need_anns.iloc[i,0]
    image_dir = os.path.join('train',image_id)
    all_imageids.append(image_dir)
    # bbox = need_anns.iloc[i,4:8].to_numpy().astype(np.float32)
    # labelname = need_anns.iloc[i,2]
    # displayname = labelname2displayname[labelname]
    # try:
    #     img = Image.open(os.path.join('openimage',image_dir+'.jpg'))
    #     w,h = img.size 
    #     xmin = int(w*bbox[0])
    #     xmax = int(w*bbox[1])
    #     ymin = int(h*bbox[2])
    #     ymax = int(h*bbox[3])
    #     crop = img.crop((xmin,ymin,xmax,ymax))
    #     save_name = image_id+"_"+str(i)+'.jpg'
    #     save_path = os.path.join('openimage/validation_imgs',displayname,save_name)
    #     crop.save(save_path,quality=100)
    # except Exception as e:
    #     print("wrong image id:",e)
    #     continue
# for i in need_names:
#     name = i.replace(" ","_")
#     save_path = os.path.join('openimage/validation_imgs',name)
#     print(f"{name} image num:{len(os.listdir(save_path))}")
new_all_imageids = [i+'\n' for i in set(all_imageids)]

img_names=os.path.join('openimage','train_images.txt')
if os.path.exists(img_names):
    os.remove(img_names)
with open(img_names,'w') as f:
    f.writelines(new_all_imageids)
100%|██████████| 129548/129548 [00:03<00:00, 34663.79it/s]
len(new_all_imageids)
26694

下载方式命令是:

python downloader.py train_images.txt --download_folder=train --num_processes=5

下载好的图片将保存在 download_folder指定的文件下, num_processes越大,下载用的时间越少
downloader.py下载需要科学上网,所以这里写出downloader.py的内容(20220920,对应v6版本)

# python3
# coding=utf-8
# Copyright 2020 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Open Images image downloader.

This script downloads a subset of Open Images images, given a list of image ids.
Typical uses of this tool might be downloading images:
- That contain a certain category.
- That have been annotated with certain types of annotations (e.g. Localized
Narratives, Exhaustively annotated people, etc.)

The input file IMAGE_LIST should be a text file containing one image per line
with the format <SPLIT>/<IMAGE_ID>, where <SPLIT> is either "train", "test",
"validation", or "challenge2018"; and <IMAGE_ID> is the image ID that uniquely
identifies the image in Open Images. A sample file could be:
  train/f9e0434389a1d4dd
  train/1a007563ebc18664
  test/ea8bfd4e765304db

"""

import argparse
from concurrent import futures
import os
import re
import sys

import boto3
import botocore
import tqdm

BUCKET_NAME = 'open-images-dataset'
REGEX = r'(test|train|validation|challenge2018)/([a-fA-F0-9]*)'


def check_and_homogenize_one_image(image):
  split, image_id = re.match(REGEX, image).groups()
  yield split, image_id


def check_and_homogenize_image_list(image_list):
  for line_number, image in enumerate(image_list):
    try:
      yield from check_and_homogenize_one_image(image)
    except (ValueError, AttributeError):
      raise ValueError(
          f'ERROR in line {line_number} of the image list. The following image '
          f'string is not recognized: "{image}".')


def read_image_list_file(image_list_file):
  with open(image_list_file, 'r') as f:
    for line in f:
      yield line.strip().replace('.jpg', '')


def download_one_image(bucket, split, image_id, download_folder):
  try:
    bucket.download_file(f'{split}/{image_id}.jpg',
                         os.path.join(download_folder, f'{image_id}.jpg'))
  except botocore.exceptions.ClientError as exception:
    sys.exit(
        f'ERROR when downloading image `{split}/{image_id}`: {str(exception)}')


def download_all_images(args):
  """Downloads all images specified in the input file."""
  bucket = boto3.resource(
      's3', config=botocore.config.Config(
          signature_version=botocore.UNSIGNED)).Bucket(BUCKET_NAME)

  download_folder = args['download_folder'] or os.getcwd()

  if not os.path.exists(download_folder):
    os.makedirs(download_folder)

  try:
    image_list = list(
        check_and_homogenize_image_list(
            read_image_list_file(args['image_list'])))
  except ValueError as exception:
    sys.exit(exception)

  progress_bar = tqdm.tqdm(
      total=len(image_list), desc='Downloading images', leave=True)
  with futures.ThreadPoolExecutor(
      max_workers=args['num_processes']) as executor:
    all_futures = [
        executor.submit(download_one_image, bucket, split, image_id,
                        download_folder) for (split, image_id) in image_list
    ]
    for future in futures.as_completed(all_futures):
      future.result()
      progress_bar.update(1)
  progress_bar.close()


if __name__ == '__main__':
  parser = argparse.ArgumentParser(
      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
  parser.add_argument(
      'image_list',
      type=str,
      default=None,
      help=('Filename that contains the split + image IDs of the images to '
            'download. Check the document'))
  parser.add_argument(
      '--num_processes',
      type=int,
      default=5,
      help='Number of parallel processes to use (default is 5).')
  parser.add_argument(
      '--download_folder',
      type=str,
      default=None,
      help='Folder where to download the images.')
  download_all_images(vars(parser.parse_args()))

下载好以后图片保存在train中,我们可以做同样的处理了

for i in need_names:
    save_path = os.path.join('openimage/train_imgs',i.replace(" ","_"))
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)
    
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
    image_id = need_anns.iloc[i,0]
    image_dir = os.path.join('train',image_id)
    all_imageids.append(image_dir)
    bbox = need_anns.iloc[i,4:8].to_numpy().astype(np.float32)
    labelname = need_anns.iloc[i,2]
    displayname = labelname2displayname[labelname]
    try:
        img = Image.open(os.path.join('openimage',image_dir+'.jpg'))
        w,h = img.size 
        xmin = int(w*bbox[0])
        xmax = int(w*bbox[1])
        ymin = int(h*bbox[2])
        ymax = int(h*bbox[3])
        crop = img.crop((xmin,ymin,xmax,ymax))
        save_name = image_id+"_"+str(i)+'.jpg'
        save_path = os.path.join('openimage/train_imgs',displayname,save_name)
        crop.save(save_path,quality=100)
    except Exception as e:
        print("wrong image id:",e)
        continue
for i in need_names:
    name = i.replace(" ","_")
    save_path = os.path.join('openimage/train_imgs',name)
    print(f"{name} image num:{len(os.listdir(save_path))}")



 11%|█         | 14409/129548 [02:09<26:42, 71.85it/s] 

wrong image id: cannot write empty image as JPEG


 24%|██▍       | 31367/129548 [04:43<17:53, 91.42it/s]  

wrong image id: cannot write empty image as JPEG


100%|██████████| 129548/129548 [20:20<00:00, 106.15it/s]


Bicycle image num:40161
Bicycle_wheel image num:59520
Stationary_bicycle image num:338
Bicycle_helmet image num:15951
Motorcycle image num:13382
Unicycle image num:194
除了以上方法还有更快的方式,上面的方法对于每个框都要重新读取一次图片,但实际上,可以读一次图把相同的框都给取出来,如下:
a= need_anns[['ImageID','LabelName','XMin','XMax','YMin','YMax']].groupby(["ImageID"])
for i,d in enumerate(a):
    if i<2:
        print(d[0]+":")
        print(d[1])
    else:
        break
00002f4ff380c64c:
              ImageID  LabelName      XMin      XMax      YMin      YMax
158  00002f4ff380c64c   /m/0199g  0.000000  0.155556  0.654867  0.876106
159  00002f4ff380c64c   /m/0199g  0.168889  0.402963  0.648230  0.873894
160  00002f4ff380c64c   /m/0199g  0.414815  0.659259  0.654867  0.887168
161  00002f4ff380c64c  /m/01bqk0  0.060741  0.151111  0.736726  0.873894
162  00002f4ff380c64c  /m/01bqk0  0.165926  0.260741  0.743363  0.878319
163  00002f4ff380c64c  /m/01bqk0  0.308148  0.400000  0.743363  0.878319
164  00002f4ff380c64c  /m/01bqk0  0.413333  0.505185  0.743363  0.884956
165  00002f4ff380c64c  /m/01bqk0  0.561481  0.656296  0.736726  0.880531
000091f4a275d0fb:
              ImageID LabelName      XMin      XMax      YMin      YMax
868  000091f4a275d0fb  /m/0199g  0.017143  0.998571  0.225806  0.997849

4、object365

等补充

  人工智能 最新文章
2022吴恩达机器学习课程——第二课(神经网
第十五章 规则学习
FixMatch: Simplifying Semi-Supervised Le
数据挖掘Java——Kmeans算法的实现
大脑皮层的分割方法
【翻译】GPT-3是如何工作的
论文笔记:TEACHTEXT: CrossModal Generaliz
python从零学(六)
详解Python 3.x 导入(import)
【答读者问27】backtrader不支持最新版本的
上一篇文章      下一篇文章      查看所有文章
加:2022-09-30 00:52:59  更:2022-09-30 00:54:14 
 
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁

360图书馆 购物 三丰科技 阅读网 日历 万年历 2024年12日历 -2024/12/28 18:10:59-

图片自动播放器
↓图片自动播放器↓
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
图片批量下载器
↓批量下载图片,美女图库↓
  网站联系: qq:121756557 email:121756557@qq.com  IT数码
数据统计