开发: C++知识库 Java知识库 JavaScript Python PHP知识库人工智能区块链大数据移动开发嵌入式开发工具数据结构与算法开发测试游戏开发网络协议系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑笔记本显卡显示器固态硬盘硬盘耳机手机 iphone vivo oppo 小米华为单反装机图拉丁

-> 人工智能 -> 使用YOLOP进行目标检测和分割 -> 正文阅读

[人工智能]使用YOLOP进行目标检测和分割

代码：https://github.com/hustvl/YOLOP

论文：https://arxiv.org/abs/2108.11250

使用YOLOP跑通目标检测和分割：进行2类（美工刀+打火机）目标检测和三类的语义分割（背景类+美工刀+打火机）

1.数据处理代码

将yolov5 多边形训练标签格式转换为yolop训练格式

'''
将yolov5 label convert yolovp label
yolov5 label: .txt   label x1 y1 x2 y2 x3 y3
yolovp label: .json
{
    "category": "car",
    "id": 18,
    "attributes": {
        "occluded": true,
        "truncated": false,
        "trafficLightColor": "none"
    },
    "box2d": {
        "x1": 594.220274,
        "y1": 548.81621,
        "x2": 730.432491,
        "y2": 653.289656
    }
},
'''
import os
import cv2
import json
from tqdm import tqdm
from PIL import Image
from PIL import ImageDraw
import numpy as np

dict_label = {'0': 'utility_knife', '1': 'lighter'}

def mkdir_dir(path):
    if not os.path.exists(path):
        os.mkdir(path)
    # else:
    #     files = os.listdir(path)
    #     for file in files:
    #         file_path = os.path.join(path, file)
    #         os.remove(file_path)

def mkdir_dir1(path):
    if not os.path.exists(path):
        os.mkdir(path)
    else:
        files = os.listdir(path)
        for file in files:
            file_path = os.path.join(path, file)
            os.remove(file_path)

def mkdir_dir2(path):

    paths_list = []
    paths_list.append(path)
    while not os.path.exists(path):
        path_list = path.split('/')
        part_name = '/'+path_list[-1]
        path = path.replace(part_name, '')
        paths_list.append(path)

    paths_list.reverse()
    for path in paths_list:
        mkdir_dir(path)
    # mkdir_dir1(paths_list[-1])



if __name__ == '__main__':
    # dir_names = ['2004', '2005', '503', '517', '525', '526', '527', '528', '529', '530', '531', '532',
    #              '541', '543', '544', '555', '556', '557', '558', '650', '651',
    #              '849', '1552', '1553', '1646', '1675', '1676', '1719', '1720', '1721']
    dir_names = ['2004']
    src_dir = '/media/fxp/7292a4b1-2584-4296-8caf-eb9788c2ffb9/data/xray/危险品检测/process_ok/20220428'
    save_dir = '/media/fxp/7292a4b1-2584-4296-8caf-eb9788c2ffb9/data/xray/危险品检测/process_ok/yolop_label'

    for name in dir_names:
        print(name)
        dir_img = os.path.join(src_dir, name, 'images')
        dir_txt = os.path.join(src_dir, name, 'labels')

        dir_save_imgs = os.path.join(save_dir, name, 'images', 'train')
        mkdir_dir2(dir_save_imgs)

        dir_save_det = os.path.join(save_dir, name, 'det_labels', 'train')
        mkdir_dir2(dir_save_det)
        dir_save_seg = os.path.join(save_dir, name, 'seg_labels', 'train')
        mkdir_dir2(dir_save_seg)



        txts = os.listdir(dir_txt)
        num  = 0
        for txt in tqdm(txts):
            if 1: #num < 1:
                result_list = []
                result_dict = {}
                txt_path = os.path.join(dir_txt, txt)
                save_json = os.path.join(dir_save_det,txt[:-4]+'.json')
                save_seg_img = os.path.join(dir_save_seg, txt[:-4]+'.png')

                img_path = os.path.join(dir_img, txt[:-4]+'.jpg')
                save_img = os.path.join(dir_save_imgs, txt[:-4] + '.jpg')

                img = cv2.imread(img_path)
                h, w = img.shape[:2]
                size = (h, w, 3)  # ( annotation.imgWidth , annotation.imgHeight )


                # labelImg = Image.new("L", size, 0)
                labelImg = np.zeros(size, np.uint8)
                labelImg[:, :, 0] = 200
                labelImg[:, :, 1] = 0
                labelImg[:, :, 2] = 0
                # drawer = ImageDraw.Draw(labelImg)

                lines = open(txt_path, 'r', encoding='utf-8').readlines()
                save_label = False
                dict_result = {}
                for line in lines:
                    # print(line, img[:-4]+'.txt')
                    # point = list(map(int, line.strip().split(' ')))

                    label = line.strip().split(' ')[0]
                    label_name = dict_label[label]
                    dict_result["category"] = label_name



                    points = list(map(float, line.strip().split(' ')[1:]))  # 读取中点，w，h
                    widths = [x * w for x in points[::2]]
                    heights = [y * h for y in points[1::2]]

                    polygon = []
                    for i_ in range(len(widths)):
                        ptStart = [widths[i_], heights[i_]]
                        polygon.append(ptStart)

                    points = np.array(polygon, dtype=np.int32)
                    if label == '0':
                        # drawer.polygon(polygon, fill=100)
                        # labelImg[:, :, 1] = 2

                        cv2.fillPoly(labelImg, [points], color=(0, 2, 0))

                    elif label == '1':
                        # drawer.polygon(polygon, fill=200)
                        # labelImg[:, :, 2] = 2
                        cv2.fillPoly(labelImg, [points], color=(0, 0, 2))
                    else:
                        print('label is error:', label)


                    x1 = min(widths)
                    x2 = max(widths)
                    y1 = min(heights)
                    y2 = max(heights)
                    dict_box = {}
                    dict_box["x1"] = x1
                    dict_box["y1"] = y1
                    dict_box["x2"] = x2
                    dict_box["y2"] = y2
                    dict_result["box2d"] = dict_box
                    result_list.append(dict_result)
                    result_dict["frames"] = {"objects":result_list}

                # data2 = json.dumps(result_list, sort_keys=True, indent=4, separators=(',', ': '))
                with open(save_json, 'w') as file_obj:
                    json.dump(result_dict, file_obj, indent=4, separators=(',', ': '))

                # labelImg.save(save_seg_img)
                cv2.imwrite(save_seg_img, labelImg)
                cv2.imwrite(save_img, img)
                num += 1

2. 修改YOLOP代码位置

将车道线相关的代码删掉，具体可查看上传的工程；

1）默认参数的修改

YOLOP-main/lib/default.py

_C.GPUS? 根据你实际的显卡数进行修改

_C.WORKERS? 由cpu的数量确认worker是的数量，直接影响数据加载速度

Dataloader的num_worker设置多少才合适，这个问题是很难有一个推荐的值。有以下几个建议：

num_workers=0表示只有主进程去加载batch数据，这个可能会是一个瓶颈。
num_workers = 1表示只有一个worker进程用来加载batch数据，而主进程是不参与数据加载的。这样速度也会很慢。
num_workers>0 表示只有指定数量的worker进程去加载数据，主进程不参与。增加num_works也同时会增加cpu内存的消耗。所以num_workers的值依赖于 batch size和机器性能。
一般开始是将num_workers设置为等于计算机上的CPU数量
最好的办法是缓慢增加num_workers，直到训练速度不再提高，就停止增加num_workers的值。
————————————————
版权声明：本文为CSDN博主「龙南希」的原创文章，遵循CC 4.0 BY-SA版权协议，转载请附上原文出处链接及本声明。
原文链接：https://blog.csdn.net/qq_28057379/article/details/115427052

_C.num_seg_class 修改为3 ，作者进行行驶区域分割为2，自己数据使用时设置为3；这里会影响标签处理；

_C.MODEL.PRETRAINED 加载预训练模型的路径

_C.DATASET.DATAROOT 保留了数据加载的基础路径，因为数据量大，批次多，所以修改了数据读取的方式；

_C.DATASET.NAME_LIST = ['2004']? 自己加入，按照子文件夹来读取图片和标签；

_C.TEST.NMS_CONF_THRESHOLD 将阈值从0.001提升至0.4；这个修改可参看yoloP 的issue；

2）网络参数的训练设置

（哪些参数可以不用更新）

YOLOP-main/tools/train.py中第240行下面加入：

        if cfg.TRAIN.DET_SEG_ONLY:
            logger.info('freeze  Ll_Seg heads...')
            for k, v in model.named_parameters():
                v.requires_grad = True  # train all layers
                if k.split(".")[1] in Encoder_para_idx + Ll_Seg_Head_para_idx:
                    print('freezing %s' % k)
                    v.requires_grad = False

3）loss的计算

YOLOP-main/lib/core/loss.py中将有关车道线的代码置0；

4）数据加载代码的处理

（包括数据制作）

数据读取：YOLOP-main/lib/dataset/AutoDriveDataset.py

修改AutoDriveDataset类的初始化为：

def get_filelist(path):
    Filelist = []
    for home, dirs, files in os.walk(path):
        for filename in files:
            # 文件名列表，包含完整路径
            Filelist.append(os.path.join(home, filename))
            # file_ = os.path.join(home, filename)[::-1].split('/')[1][::-1]
            # # 文件名列表，只包含文件名
            # Filelist.append( filename)
    return Filelist

class AutoDriveDataset(Dataset):
    """
    A general Dataset for some common function
    """

    def __init__(self, cfg, is_train, inputsize=640, transform=None):
        """
        initial all the characteristic

        Inputs:
        -cfg: configurations
        -is_train(bool): whether train set or not
        -transform: ToTensor and Normalize
        
        Returns:
        None
        """
        self.is_train = is_train
        self.cfg = cfg
        self.transform = transform
        self.inputsize = inputsize
        self.Tensor = transforms.ToTensor()
        self.img_root = Path(cfg.DATASET.DATAROOT)
        # label_root = Path(cfg.DATASET.LABELROOT)
        # mask_root = Path(cfg.DATASET.MASKROOT)
        # lane_root = Path(cfg.DATASET.LANEROOT)
        if is_train:
            self.indicator = cfg.DATASET.TRAIN_SET
        else:
            self.indicator = cfg.DATASET.TEST_SET
        self.name_list = cfg.DATASET.NAME_LIST

        mask_root_list = []
        self.mask_list = []
        for name in self.name_list:
            # self.img_root_list.append(self.img_root / name / 'images' / self.indicator)
            mask_root_list.append(self.img_root / name / 'seg_labels' / self.indicator)
            # self.det_root_list.append(self.img_root / name / 'det_labels' / self.indicator)

        for mask_root in mask_root_list:
            # result_ = mask_root.iterdir()
            result_ = get_filelist(mask_root)
            self.mask_list += result_  # 可以获取直接下级文件和文件夹

        # for name in self.name_list:
        #     self.img_root_list.append(img_root / name / self.indicator)


        # self.img_root = img_root / indicator
        # self.label_root = label_root / indicator
        # self.mask_root = mask_root / indicator
        # self.lane_root = lane_root / indicator
        # self.label_list = self.label_root.iterdir()
        # self.mask_list = self.mask_root.iterdir() # 可以获取直接下级文件和文件夹

        self.db = []

        self.data_format = cfg.DATASET.DATA_FORMAT

        self.scale_factor = cfg.DATASET.SCALE_FACTOR
        self.rotation_factor = cfg.DATASET.ROT_FACTOR
        self.flip = cfg.DATASET.FLIP
        self.color_rgb = cfg.DATASET.COLOR_RGB

        # self.target_type = cfg.MODEL.TARGET_TYPE
        # self.shapes = np.array(cfg.DATASET.ORG_IMG_SIZE)

YOLOP-main/lib/dataset/bdd.py中的修改内容：

single_cls： False ? ? ? # just detect vehicle

其中遍历数据的内容修改为：

for mask in tqdm(self.mask_list):
            mask_path = str(mask)
            # label_path = mask_path.replace(str(self.mask_root), str(self.label_root)).replace(".png", ".json")
            # image_path = mask_path.replace(str(self.mask_root), str(self.img_root)).replace(".png", ".jpg")

            label_path = mask_path.replace(".png", ".json").replace('/seg_labels/', '/det_labels/')
            image_path = mask_path.replace(".png", ".jpg").replace('/seg_labels/', '/images/')
            assert (os.path.exists(label_path), 'label_path is not exist!')
            assert (os.path.exists(image_path), 'image_path is not exist!')

YOLOP-main/lib/dataset/convert.py中的内容修改：

id_dict = {'utility_knife': 0, 'lighter': 1}

其中'utility_knife', 'lighter是我自己训练样本的类别；

5）测试代码的修改

YOLOP-main/lib/core/function.py

将：

da_seg_mask = torch.nn.functional.interpolate(da_seg_mask, scale_factor=int(1/ratio), mode='bilinear')

替换为：

ori_h, ori_w = img_test.shape[:2]
da_seg_mask = torch.nn.functional.interpolate(da_seg_mask, size=[ori_h, ori_w],                                                                      
                                     mode='bilinear')  # 将图片上/下采样到指定的大小

将：

da_gt_mask = torch.nn.functional.interpolate(da_gt_mask, scale_factor=int(1/ratio), mode='bilinear')

替换为：

da_gt_mask = torch.nn.functional.interpolate(da_gt_mask, size=[ori_h, ori_w],
                                                                      mode='bilinear')

YOLOP-main/lib/utils/plot.py

将

img = cv2.resize(img, (1280,720), interpolation=cv2.INTER_LINEAR)

屏蔽掉；?

6）预训练模型的加载

原代码：

if os.path.exists(cfg.MODEL.PRETRAINED):
            logger.info("=> loading model '{}'".format(cfg.MODEL.PRETRAINED))
            checkpoint = torch.load(cfg.MODEL.PRETRAINED)
            begin_epoch = checkpoint['epoch']
            # best_perf = checkpoint['perf']
            last_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])

修改为：

 checkpoint1 = checkpoint['state_dict'] #分割类别变化了，会影响模型参数加载
            # -------begin
            model_state_dict = model.state_dict()
            for k in list(checkpoint1.keys()):
                if k in model_state_dict:
                    shape_model = tuple(model_state_dict[k].shape)
                    shape_checkpoint = tuple(checkpoint1[k].shape)
                    if shape_model != shape_checkpoint:
                        # incorrect_shapes.append((k, shape_checkpoint, shape_model))
                        checkpoint1.pop(k)
                        print(k, shape_model, shape_checkpoint)
                else:
                    print(k, ' layer is missing!')
            model.load_state_dict(checkpoint1, strict=False)

其他的细节请查看修改后的工程；

7.修改YOLOP的网络结构

检测的检测头detect的参数类别是2（美工刀和打火机），之前为1；区域分割的类别也要修改为2;

YOLOP = [
[24, 33, 42],   #Det_out_idx, Da_Segout_idx, LL_Segout_idx
[ -1, Focus, [3, 32, 3]],   #0
[ -1, Conv, [32, 64, 3, 2]],    #1
[ -1, BottleneckCSP, [64, 64, 1]],  #2
[ -1, Conv, [64, 128, 3, 2]],   #3
[ -1, BottleneckCSP, [128, 128, 3]],    #4
[ -1, Conv, [128, 256, 3, 2]],  #5
[ -1, BottleneckCSP, [256, 256, 3]],    #6
[ -1, Conv, [256, 512, 3, 2]],  #7
[ -1, SPP, [512, 512, [5, 9, 13]]],     #8
[ -1, BottleneckCSP, [512, 512, 1, False]],     #9
[ -1, Conv,[512, 256, 1, 1]],   #10
[ -1, Upsample, [None, 2, 'nearest']],  #11
[ [-1, 6], Concat, [1]],    #12
[ -1, BottleneckCSP, [512, 256, 1, False]], #13
[ -1, Conv, [256, 128, 1, 1]],  #14
[ -1, Upsample, [None, 2, 'nearest']],  #15
[ [-1,4], Concat, [1]],     #16         #Encoder

[ -1, BottleneckCSP, [256, 128, 1, False]],     #17
[ -1, Conv, [128, 128, 3, 2]],      #18
[ [-1, 14], Concat, [1]],       #19
[ -1, BottleneckCSP, [256, 256, 1, False]],     #20
[ -1, Conv, [256, 256, 3, 2]],      #21
[ [-1, 10], Concat, [1]],   #22
[ -1, BottleneckCSP, [512, 512, 1, False]],     #23
[ [17, 20, 23], Detect,  [2, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detection head 24

[ 16, Conv, [256, 128, 3, 1]],   #25
[ -1, Upsample, [None, 2, 'nearest']],  #26
[ -1, BottleneckCSP, [128, 64, 1, False]],  #27
[ -1, Conv, [64, 32, 3, 1]],    #28
[ -1, Upsample, [None, 2, 'nearest']],  #29
[ -1, Conv, [32, 16, 3, 1]],    #30
[ -1, BottleneckCSP, [16, 8, 1, False]],    #31
[ -1, Upsample, [None, 2, 'nearest']],  #32
# [ -1, Conv, [8, 2, 3, 1]], #33 Driving area segmentation head
[ -1, Conv, [8, 3, 3, 1]], #33 Driving area segmentation head

[ 16, Conv, [256, 128, 3, 1]],   #34
[ -1, Upsample, [None, 2, 'nearest']],  #35
[ -1, BottleneckCSP, [128, 64, 1, False]],  #36
[ -1, Conv, [64, 32, 3, 1]],    #37
[ -1, Upsample, [None, 2, 'nearest']],  #38
[ -1, Conv, [32, 16, 3, 1]],    #39
[ -1, BottleneckCSP, [16, 8, 1, False]],    #40
[ -1, Upsample, [None, 2, 'nearest']],  #41
[ -1, Conv, [8, 2, 3, 1]] #42 Lane line segmentation head
]