针对有一点mmdetction基础的,然后想根据自己的数据集,熟练训练自己的模型。需要改成自己配置的地方,我会在代码中做好标记,方便修改。
我们先了解一下mmdetection的基本流程,你想训练一个模型,你只需要准备的是:数据集,mmdetection的配置文件。
下面我分为两部分,分别处理这两个东西。然后你就可以用官方实现的训练工具愉快的进行训练了。
1. 数据集的处理
先把数据集复制到mmdetection的data目录下,方便管理,data目录下一个文件夹就是一个数据集。dataset1/data/目录下是你的.xml文件和.jpg文件,如果你的数据集本身就是voc数据集,那可以跳过步骤1.1。
- xml2voc2007.py:用于将.xml文件转换成voc2007数据集。
- voc2coco.py:用于将voc数据集转换成coco数据集。
- box_visiual.py:利用coco数据集可视化数据集的ground truth。查看数据集中是否有脏数据,根据具体情况除掉。
如果需要用到其他的格式转换,或者数据集处理的一些操作,参考:数据集拆分,互转,可视化,查错 - 一届书生 - 博客园 (cnblogs.com)
1.1 数据集转变: .xml --> voc数据集
首先是数据集的处理,我是比较习惯用coco数据集,虽然mmdetection也可以训练voc数据集。因为我拿到手的是一个.jpg和.xml文件的数据集,因为我们要先将.xml文件数据集转换成voc数据集,然后再将voc数据集转换成coco数据集。
mmdetection/data/dataset1/xml2voc2007.py
import argparse
import glob
import os
import random
import os.path as osp
import sys
import shutil
percent_train = 0.9
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("--input_dir", default="data", help="input annotated directory")
parser.add_argument("--output_dir", default="VOCdevkit", help="output dataset directory")
args = parser.parse_args()
if osp.exists(args.output_dir):
print("Output directory already exists:", args.output_dir)
sys.exit(1)
os.makedirs(args.output_dir)
print("| Creating dataset dir:", osp.join(args.output_dir, "VOC2007"))
if not os.path.exists(osp.join(args.output_dir, "VOC2007", "Annotations")):
os.makedirs(osp.join(args.output_dir, "VOC2007", "Annotations"))
if not os.path.exists(osp.join(args.output_dir, "VOC2007", "ImageSets")):
os.makedirs(osp.join(args.output_dir, "VOC2007", "ImageSets"))
if not os.path.exists(osp.join(args.output_dir, "VOC2007", "ImageSets", "Main")):
os.makedirs(osp.join(args.output_dir, "VOC2007", "ImageSets", "Main"))
if not os.path.exists(osp.join(args.output_dir, "VOC2007", "JPEGImages")):
os.makedirs(osp.join(args.output_dir, "VOC2007", "JPEGImages"))
total_img = glob.glob(osp.join(args.input_dir, "*.jpg"))
print('| Image number: ', len(total_img))
total_xml = glob.glob(osp.join(args.input_dir, "*.xml"))
print('| Xml number: ', len(total_xml))
num_total = len(total_xml)
data_list = range(num_total)
num_tr = int(num_total * percent_train)
num_train = random.sample(data_list, num_tr)
print('| Train number: ', num_tr)
print('| Val number: ', num_total - num_tr)
file_train = open(
osp.join(args.output_dir, "VOC2007", "ImageSets", "Main", "train.txt"), 'w')
file_val = open(
osp.join(args.output_dir, "VOC2007", "ImageSets", "Main", "val.txt"), 'w')
for i in data_list:
name = total_xml[i][:-4] + '\n'
if i in num_train:
file_train.write(name[5:])
else:
file_val.write(name[5:])
file_train.close()
file_val.close()
if os.path.exists(args.input_dir):
for root, dirs, files in os.walk(args.input_dir):
for file in files:
src_file = osp.join(root, file)
if src_file.endswith(".jpg"):
shutil.copy(src_file, osp.join(args.output_dir, "VOC2007", "JPEGImages"))
else:
shutil.copy(src_file, osp.join(args.output_dir, "VOC2007", "Annotations"))
print('| Done!')
if __name__ == "__main__":
print("—" * 50)
main()
print("—" * 50)
1.2 数据集转变: voc数据集 --> coco数据集
写的有点繁琐了,代码比较冗长,暂时没有时间去优化一下。 但是很好用!!!
mmdetection/data/dataset1/voc2coco.py
import json
import os
import shutil
root_path=os.getcwd()
def voc2coco():
import datetime
from PIL import Image
class_name_to_id = {'point': 1, }
if not os.path.exists(os.path.join(root_path, "coco2017")):
os.makedirs(os.path.join(root_path, "coco2017"))
os.makedirs(os.path.join(root_path, "coco2017", "annotations"))
os.makedirs(os.path.join(root_path, "coco2017", "train2017"))
os.makedirs(os.path.join(root_path, "coco2017", "val2017"))
now = datetime.datetime.now()
data = dict(
info=dict(
description=None,
url=None,
version=None,
year=now.year,
contributor=None,
date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"),
),
licenses=[dict(url=None, id=0, name=None, )],
images=[
],
type="instances",
annotations=[
],
categories=[
],
)
for name, id in class_name_to_id.items():
data["categories"].append(
dict(supercategory=None, id=id, name=name, )
)
images_dir = os.path.join(root_path, 'VOCdevkit', 'VOC2007', 'JPEGImages')
images = os.listdir(images_dir)
images_id = {}
for idx, image_name in enumerate(images):
images_id.update({image_name[:-4]: idx})
train_img = []
fp = open(os.path.join(root_path, 'VOCdevkit', 'VOC2007', 'ImageSets', 'Main', 'train.txt'))
for i in fp.readlines():
train_img.append(i[:-1] + ".jpg")
for image in train_img:
img = Image.open(os.path.join(images_dir, image))
data["images"].append(
dict(
license=0,
url=None,
file_name=image,
height=img.height,
width=img.width,
date_captured=None,
id=images_id[image[:-4]],
)
)
train_xml = [i[:-4] + '.xml' for i in train_img]
bbox_id = 0
for xml in train_xml:
category = []
xmin = []
ymin = []
xmax = []
ymax = []
import xml.etree.ElementTree as ET
tree = ET.parse(os.path.join(root_path, 'VOCdevkit', 'VOC2007', 'Annotations', xml))
root = tree.getroot()
object = root.findall('object')
for i in object:
category.append(class_name_to_id[i.findall('name')[0].text])
bndbox = i.findall('bndbox')
for j in bndbox:
xmin.append(float(j.findall('xmin')[0].text))
ymin.append(float(j.findall('ymin')[0].text))
xmax.append(float(j.findall('xmax')[0].text))
ymax.append(float(j.findall('ymax')[0].text))
for i in range(len(category)):
data["annotations"].append(
dict(
id=bbox_id,
image_id=images_id[xml[:-4]],
category_id=category[i],
area=(xmax[i] - xmin[i]) * (ymax[i] - ymin[i]),
bbox=[xmin[i], ymin[i], xmax[i] - xmin[i], ymax[i] - ymin[i]],
iscrowd=0,
)
)
bbox_id += 1
json.dump(data, open(os.path.join(root_path, 'coco2017', 'annotations', 'instances_train2017.json'), 'w'))
val_img = []
fp = open(os.path.join(root_path, 'VOCdevkit', 'VOC2007', 'ImageSets', 'Main', 'val.txt'))
for i in fp.readlines():
val_img.append(i[:-1] + ".jpg")
del data['images']
data['images'] = []
del data['annotations']
data['annotations'] = []
for image in val_img:
img = Image.open(os.path.join(images_dir, image))
data["images"].append(
dict(
license=0,
url=None,
file_name=image,
height=img.height,
width=img.width,
date_captured=None,
id=images_id[image[:-4]],
)
)
val_xml = [i[:-4] + '.xml' for i in val_img]
for xml in val_xml:
category = []
xmin = []
ymin = []
xmax = []
ymax = []
import xml.etree.ElementTree as ET
tree = ET.parse(os.path.join(root_path, 'VOCdevkit', 'VOC2007', 'Annotations', xml))
root = tree.getroot()
object = root.findall('object')
for i in object:
category.append(class_name_to_id[i.findall('name')[0].text])
bndbox = i.findall('bndbox')
for j in bndbox:
xmin.append(float(j.findall('xmin')[0].text))
ymin.append(float(j.findall('ymin')[0].text))
xmax.append(float(j.findall('xmax')[0].text))
ymax.append(float(j.findall('ymax')[0].text))
for i in range(len(category)):
data["annotations"].append(
dict(
id=bbox_id,
image_id=images_id[xml[:-4]],
category_id=category[i],
area=(xmax[i] - xmin[i]) * (ymax[i] - ymin[i]),
bbox=[xmin[i], ymin[i], xmax[i] - xmin[i], ymax[i] - ymin[i]],
iscrowd=0,
)
)
bbox_id += 1
json.dump(data, open(os.path.join(root_path, 'coco2017', 'annotations', 'instances_val2017.json'), 'w'))
print('| VOC -> COCO annotations transform finish.')
print('Start copy images...')
for img_name in train_img:
shutil.copy(os.path.join(root_path, "VOCdevkit", "VOC2007", "JPEGImages", img_name),
os.path.join(root_path, "coco2017", 'train2017', img_name))
print('| Train images copy finish.')
for img_name in val_img:
shutil.copy(os.path.join(root_path, "VOCdevkit", "VOC2007", "JPEGImages", img_name),
os.path.join(root_path, "coco2017", 'val2017', img_name))
print('| Val images copy finish.')
if __name__ == '__main__':
print("—" * 50)
voc2coco()
print("—" * 50)
1.3 数据集真实值可视化
利用coco数据集可视化数据集的ground truth。查看数据集中是否有脏数据,根据具体情况除掉。
mmdetection/data/dataset1/box_visiual.py
import os
import cv2
import json
import random
root_path=os.getcwd()
SAMPLE_NUMBER=30
id_category={1:'point'}
def visiual():
json_file = os.path.join(root_path,'coco2017','annotations','instances_train2017.json')
data = json.load(open(json_file, 'r'))
images=data['images']
for i in random.sample(images, SAMPLE_NUMBER):
img = cv2.imread(os.path.join(root_path, 'coco2017','train2017',i['file_name']))
bboxes = []
category_ids=[]
annotations=data['annotations']
for j in annotations:
if j['image_id'] == i['id']:
bboxes.append(j["bbox"])
category_ids.append(j['category_id'])
for idx,bbox in enumerate(bboxes):
left_top = (int(bbox[0]), int(bbox[1]))
right_bottom = (int(bbox[0]+bbox[2]), int(bbox[1]+bbox[3]))
cv2.rectangle(img, left_top, right_bottom, (0, 255, 0), 1)
cv2.putText(img, id_category[category_ids[idx]], left_top, cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, 0.4, (255, 255, 255), 1)
cv2.imwrite(os.path.join('visiual',i['file_name']), img)
if __name__ == '__main__':
print('—'*50)
os.mkdir('visiual')
visiual()
print('| visiual completed.')
print('| saved as ',os.path.join(os.getcwd(),'visiual'))
print('—'*50)
到这里我们的数据集就准备好了,第一大步完成,开始第二步。
2. 配置文件的处理
配置文件的处理,我们主要在work_dirs目录下,如果在你 mmdetection/ 目录下没有 work_dirs 目录的话,新建一个文件夹,然后我们在 work_dirs/ 目录下新建一个自己的项目文件夹,例如图中 dataset1。然后我们在 dataset1/ 目录下见一个python文件,用于生成配置文件。
2.1 生成配置文件
先生成一个我们的配置文件,然后我们再在配置文件中做详细修改。
mmdetection/work_dirs/dataset1/create_config.py
import os
import random
import numpy as np
import torch
from mmdet.apis import set_random_seed
from mmcv import Config
"""
设置随机种子
"""
seed = 7777
"""Sets the random seeds."""
set_random_seed(seed, deterministic=False)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
os.environ['PYTHONHASHSEED'] = str(seed)
job_num='1'
model_name = f'cascade_rcnn_r50_fpn_1x_job{job_num}'
work_dir = os.path.join(os.getcwd(),model_name)
baseline_cfg_path = "../../configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py"
cfg_path=os.path.join(work_dir,model_name+'.py')
train_data_images = os.getcwd()+'/../../data/xuliandi/coco2017/train2017'
val_data_images = os.getcwd()+'/../../data/xuliandi/coco2017/val2017'
test_data_images = os.getcwd()+'/../../data/xuliandi/coco2017/val2017'
num_classes = 1
classes = ("point",)
load_from = 'https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco/cascade_rcnn_r50_fpn_1x_coco_20200316-3dc56deb.pth'
train_ann_file = os.getcwd()+'/../../data/xuliandi/coco2017/annotations/instances_train2017.json'
val_ann_file = os.getcwd()+'/../../data/xuliandi/coco2017/annotations/instances_val2017.json'
test_ann_file = os.getcwd()+'/../../data/xuliandi/coco2017/annotations/instances_val2017.json'
gpu_ids = [1]
total_epochs = 30
batch_size = 2**1
num_worker = 2
log_interval = 40
checkpoint_interval = 15
evaluation_interval = 1
lr=0.01 / 2
"""
制作mmdetection的cascade配置文件
"""
def create_mm_config():
cfg = Config.fromfile(baseline_cfg_path)
cfg.work_dir = work_dir
cfg.seed = seed
cfg.load_from = load_from
if not os.path.exists(work_dir):
os.makedirs(work_dir)
print("| work dir:", work_dir)
for head in cfg.model.roi_head.bbox_head:
head.num_classes = num_classes
cfg.gpu_ids = gpu_ids
cfg.runner.max_epochs = total_epochs
cfg.total_epochs = total_epochs
cfg.optimizer.lr = lr
cfg.lr_config = dict(
policy='CosineAnnealing',
by_epoch=False,
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
min_lr=1e-07)
cfg.log_config.interval = log_interval
cfg.checkpoint_config.interval = checkpoint_interval
cfg.dataset_type = 'CocoDataset'
cfg.classes = classes
cfg.data.train.img_prefix = train_data_images
cfg.data.train.classes = cfg.classes
cfg.data.train.ann_file = train_ann_file
cfg.data.train.type = 'CocoDataset'
cfg.data.val.img_prefix = val_data_images
cfg.data.val.classes = cfg.classes
cfg.data.val.ann_file = val_ann_file
cfg.data.val.type = 'CocoDataset'
cfg.data.test.img_prefix = val_data_images
cfg.data.test.classes = cfg.classes
cfg.data.test.ann_file = val_ann_file
cfg.data.test.type = 'CocoDataset'
cfg.data.samples_per_gpu = batch_size
cfg.data.workers_per_gpu = num_worker
cfg.evaluation.metric = 'bbox'
cfg.evaluation.interval = evaluation_interval
cfg.evaluation.save_best = 'bbox_mAP'
cfg.log_config.hooks = [dict(type='TextLoggerHook')]
print("| config path:",cfg_path)
cfg.dump(cfg_path)
if __name__ == '__main__':
print("—" * 50)
create_mm_config()
print("—" * 50)
2.2 修改配置文件
一些没有在生成配置文件中设置的,我们直接打开配置文件,进行修改,例如下边的anchor_generator的一些参数。
mmdetection/work_dirs/dataset1/cascade_rcnn_r50_fpn_1x_job1/cascade_rcnn_r50_fpn_1x_job1.py
3. 开始训练
在mmdetection根目录下,也就是 mmdetection/ 目录用命令行运行,可以等程序运行起来后,看显存占用,然后调节batch_size。
单GPU训练
模板
python tools/train.py ${配置文件} --gpu-ids ${gpu id}
样例:我想利用第二张显卡训练,就将 –gpu-ids 设置为1
python tools/train.py work_dirs/dataset1/cascade_rcnn_r50_fpn_1x_job1/cascade_rcnn_r50_fpn_1x_job1.py --gpu-ids 1
多GPU训练
模板
bash tools/dist_train.sh ${配置文件} ${gpu 数量}
样例:我用两张显卡一起训练
bash tools/dist_train.sh work_dirs/dataset1/cascade_rcnn_r50_fpn_1x_job1/cascade_rcnn_r50_fpn_1x_job1.py 2
4. 可视化模型的输出
训练完后可以看一下自己模型推理的结果,看一下效果。在我们的工作目录下创建一个 visiual.py 文件。
mmdetection/work_dirs/dataset1/cascade_rcnn_r50_fpn_1x_job2/visiual.py
import os
import cv2
import glob
import cv2.cv2
import numpy as np
import shutil
from mmdet.apis import inference_detector, init_detector
root_path = os.getcwd()
job_num='2'
model_name = f'cascade_rcnn_r50_fpn_1x_job{job_num}.py'
test_images_path = os.path.join(root_path,'../../../data/dataset1/coco2017/train2017/')
save_dir='results_visiual_job'+job_num
classes = ("point", )
image_id=(1,)
SCORE_THRESH=0.1
DEVICE='cuda:0'
def inference_res(model, images_filename):
results = []
for img_name in images_filename:
img = test_images_path + img_name
result = inference_detector(model, img)
for i in range(len(result)):
for j in result[i]:
j = np.array(j).tolist()
if j[-1] >= SCORE_THRESH:
pred = {'image_id': img_name,
'category_id': 1,
'bbox': [j[0], j[1], j[2], j[3]],
'score': j[-1]}
results.append(pred)
return results
def visiual(results):
img_names = os.listdir(test_images_path)
for i in img_names:
img = cv2.imread(os.path.join(test_images_path, i))
for j in results:
if j['image_id']==i:
if j['score'] >= SCORE_THRESH:
xmin=int(j['bbox'][0])
ymin=int(j['bbox'][1])
xmax=int(j['bbox'][2])
ymax=int(j['bbox'][3])
cv2.rectangle(img, (xmin,ymin ), (xmax,ymax ), (0, 0, 255), 2)
cv2.imwrite(save_dir+'/'+i,img)
if __name__ == '__main__':
print("—" * 50)
if os.path.exists(save_dir):
shutil.rmtree(save_dir)
os.makedirs(save_dir)
best_epoch_filepath = glob.glob('best'+'*')[0]
config = os.path.join(root_path,model_name)
checkpoint = os.path.join(root_path,best_epoch_filepath)
print('| config: ',config)
print('| checkpoint: ',checkpoint)
model = init_detector(config, checkpoint, device=DEVICE)
images_filename = os.listdir(test_images_path)
results = inference_res(model,images_filename)
visiual(results)
print('| image save dir:',save_dir)
print('| Visiual complete.')
print("—" * 50)
5. gradio做到网页
我相信面前的你肯定也是个愿意折腾的小伙伴,那就让我们把它做到网页上,过程很简单。
更多的配置,参考gradio官网文档 【Gradio Getting Started】 【Gradio Docs】
5.1 安装gradio
pip install gradio
5.2 编写实例
我是在 mmdetection/ 目录下新建了一个 gradio.py 文件。运行后就可以看到控制台输出了一个网址,点进去,就可以上传图片,然后可以推理了。
mmdetection/gradio.py
import os
from cv2 import cv2
import gradio as gr
import numpy as np
from mmdet.apis import inference_detector, init_detector
root_path = os.getcwd()
classes = ("apple", )
image_id=(1,)
SCORE_THRESH=0.2
DEVICE='cuda:0'
config_path="./work_dirs/xuliandi/cascade_rcnn_r50_fpn_1x/cascade_rcnn_r50_fpn_1x.py"
checkpoint_path="./work_dirs/xuliandi/cascade_rcnn_r50_fpn_1x/best_bbox_mAP_epoch_9.pth"
config = config_path
checkpoint = checkpoint_path
model = init_detector(config, checkpoint, device=DEVICE)
def inference_res(model, image_input):
results=[]
result = inference_detector(model, image_input)
for i in range(len(result)):
for j in result[i]:
j = np.array(j).tolist()
if j[-1] >= SCORE_THRESH:
pred = {'bbox': [j[0], j[1], j[2], j[3]],
'score': j[-1]}
results.append(pred)
return results
def detect_image(image_input):
results = inference_res(model,image_input)
for i in results:
xmin=int(i['bbox'][0])
ymin=int(i['bbox'][1])
xmax=int(i['bbox'][2])
ymax=int(i['bbox'][3])
cv2.rectangle(image_input, (xmin,ymin ), (xmax,ymax ), (255, 0, 0), 2)
return image_input
if __name__ == '__main__':
gr.Interface(fn=detect_image, inputs="image", outputs="image", capture_session=True).launch()
?? 完结撒花
|