[人工智能] 目标检测部署（卡牌识别）

开发: C++知识库 Java知识库 JavaScript Python PHP知识库人工智能区块链大数据移动开发嵌入式开发工具数据结构与算法开发测试游戏开发网络协议系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑笔记本显卡显示器固态硬盘硬盘耳机手机 iphone vivo oppo 小米华为单反装机图拉丁

-> 人工智能 -> 目标检测部署（卡牌识别） -> 正文阅读

[人工智能]目标检测部署（卡牌识别）

????????最近在折腾yolov5，训练了一个识别纸牌的模型，最后使用onnxruntime进行部署，感兴趣的可以上github上clone下来玩玩，模型的权重文件上传到了百度网盘，链接和提取码写在readme里。

? ? ? ? 模型的训练使用了yolov5l的权重模型，训练的时候使用的batchsize为8（理论可以设置的更大，gpu的占用还没吃满），训练了200个epoch，取了效果最好的权重模型。

? ? ? ? 从git上下载下来后的文件结构大致是这样的：

?????????function文件夹中存放了一些工具函数，image存放待检测的图片，model则存放模型的权重文件。

config.py

LABEL_DICT = {'cardlabel': ['10C', '10D', '10H', '10S', '2C', '2D', '2H', '2S', '3C', '3D', '3H', '3S',
                            '4C', '4D', '4H', '4S', '5C', '5D', '5H', '5S', '6C', '6D', '6H', '6S', '7C',
                            '7D', '7H', '7S', '8C', '8D', '8H', '8S', '9C', '9D', '9H', '9S', '1C', '1D',
                            '1H', '1S', '11C', '11D', '11H', '11S', '13C', '13D', '13H', '13S', '12C', '12D', '12H', '12S']}

????????这里主要是包括52张牌（不包括大小王）的label，J,Q,K,A我都用对应的数字进行替代，数字后面的H,C,D,S对应的是花色，分别是红桃，草花，方片和黑桃。

utils.py（只贴部分代码）

class LoadImages:
    def __init__(self, path, img_size=640, stride=32, auto=True):
        p = str(Path(path).resolve())
        if '*' in p:
            files = sorted(glob.glob(p, recursive=True))
        elif os.path.isdir(p):
            files = sorted(glob.glob(os.path.join(p, '*.*')))
        elif os.path.isfile(p):
            files = [p]
        else:
            raise Exception(f'ERROR: {p} does not exist')

        images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
        videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
        ni, nv = len(images), len(videos)

        self.img_size = img_size
        self.stride = stride
        self.files = images + videos
        self.nf = ni + nv
        self.video_flag = [False] * ni + [True] * nv
        self.mode = 'image'
        self.auto = auto
        if any(videos):
            self.new_video(videos[0])
        else:
            self.cap = None
        assert self.nf > 0, f'No images or videos found in {p}. ' \
                            f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'

    def __iter__(self):
        self.count = 0
        return self

    def __next__(self):
        if self.count == self.nf:
            raise StopIteration
        path = self.files[self.count]

        if self.video_flag[self.count]:
            # Read video
            self.mode = 'video'
            ret_val, img0 = self.cap.read()
            if not ret_val:
                self.count += 1
                self.cap.release()
                if self.count == self.nf:
                    raise StopIteration
                else:
                    path = self.files[self.count]
                    self.new_video(path)
                    ret_val, img0 = self.cap.read()

            self.frame += 1
            print(f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ', end='')

        else:
            # Read image
            self.count += 1
            img0 = cv2.imread(path)  # BGR
            assert img0 is not None, 'Image Not Found ' + path
            print(f'image {self.count}/{self.nf} {path}: ', end='')

        # Padded resize
        img = letterbox(img0, self.img_size, stride=self.stride, auto=self.auto)[0]

        # Convert
        img = img.transpose((2, 0, 1))[::-1]
        img = np.ascontiguousarray(img)

        return path, img, img0, self.cap

    def new_video(self, path):
        self.frame = 0
        self.cap = cv2.VideoCapture(path)
        self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))

    def __len__(self):
        return self.nf

????????这个类主要负责读取图像数据。

def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
                        labels=(), max_det=300):
    nc = prediction.shape[2] - 5
    xc = prediction[..., 4] > conf_thres

    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'

    min_wh, max_wh = 2, 4096
    max_nms = 30000
    time_limit = 10.0
    redundant = True
    multi_label &= nc > 1
    merge = False

    t = time.time()
    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
    for xi, x in enumerate(prediction):
        # Apply constraints
        x = x[xc[xi]]

        # Cat apriori labels if autolabelling
        if labels and len(labels[xi]):
            l = labels[xi]
            v = torch.zeros((len(l), nc + 5), device=x.device)
            v[:, :4] = l[:, 1:5]
            v[:, 4] = 1.0
            v[range(len(l)), l[:, 0].long() + 5] = 1.0
            x = torch.cat((x, v), 0)

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Compute conf
        x[:, 5:] *= x[:, 4:5]
        box = xywh2xyxy(x[:, :4])

        if multi_label:
            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
        else:
            conf, j = x[:, 5:].max(1, keepdim=True)
            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]

        # Filter by class
        if classes is not None:
            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]

        # Check shape
        n = x.shape[0]
        if not n:
            continue
        elif n > max_nms:
            x = x[x[:, 4].argsort(descending=True)[:max_nms]]

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)
        boxes, scores = x[:, :4] + c, x[:, 4]
        i = torchvision.ops.nms(boxes, scores, iou_thres)
        if i.shape[0] > max_det:
            i = i[:max_det]
        if merge and (1 < n < 3E3):
            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
            iou = box_iou(boxes[i], boxes) > iou_thres
            weights = iou * scores[None]
            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)
            if redundant:
                i = i[iou.sum(1) > 1]

        output[xi] = x[i]
        if (time.time() - t) > time_limit:
            print(f'WARNING: NMS time limit {time_limit}s exceeded')
            break
    return output

? ? ? ? 这里主要是非极大值抑制，输出最终的预测结果，包括坐标值，置信度以及标签索引值

# -*- coding: utf-8 -*-
"""
Time:     2021.10.26
Author:   Athrunsunny
Version:  V 0.1
File:     inference.py
Describe: Functions in this file is use to inference
"""

import cv2
import torch
import time
import onnxruntime
import numpy as np
from function.utils import LoadImages, Annotator, colors, check_img_size, non_max_suppression, scale_coords
from function import config as CFG


def load_model(weights, **options):
    imgsz = options.pop('imgsz', 640)
    stride = options.pop('stride', 64)

    w = str(weights[0] if isinstance(weights, list) else weights)
    session = onnxruntime.InferenceSession(w, None)
    imgsz = check_img_size(imgsz, s=stride)
    return session, imgsz, stride


def image_process(img):
    assert isinstance(img, np.ndarray)
    img = img.astype('float32')
    img /= 255.0
    if len(img.shape) == 3:
        img = img[None]
    return img


def inference(session, img, **options):
    conf_thres = options.pop('conf_thres', 0.25)
    iou_thres = options.pop('iou_thres', 0.45)
    classes = options.pop('classes', None)
    agnostic = options.pop('agnostic', False)
    max_det = options.pop('max_det', 1000)

    pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img}))
    pred = non_max_suppression(pred, conf_thres=conf_thres, iou_thres=iou_thres, classes=classes, max_det=max_det,
                               agnostic=agnostic)
    return pred


def post_process(pred, img, im0s, dataset, **options):
    showImg = options.pop('showImg', False)
    hide_conf = options.pop('hide_conf', False)
    hide_labels = options.pop('hide_labels', False)
    line_thickness = options.pop('line_thickness', 1)
    labelDict = options.pop('labelDict', None)

    labels = labelDict['cardlabel']
    res_label = []
    for i, det in enumerate(pred):
        s, im0, frame = '', im0s.copy(), getattr(dataset, 'frame', 0)
        annotator = Annotator(im0, line_width=line_thickness, example=str(labels))
        if len(det):
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
            for *xyxy, conf, cls in reversed(det):
                c = int(cls)
                label = None if hide_labels else (labels[c] if hide_conf else f'{labels[c]} {conf:.2f}')
                label_no_conf = None if hide_labels else (labels[c] if hide_conf else f'{labels[c]}')
                res_label.append(label_no_conf)
                annotator.box_label(xyxy, label, color=colors(c, True))
        print(f'{s}')
        im0 = annotator.result()
        if showImg:
            cv2.imshow('result', im0)
            cv2.waitKey(0)
    return res_label


def run(weights, source, **options):
    conf_thres = options.pop('conf_thres', 0.25)  # confidence threshold
    iou_thres = options.pop('iou_thres', 0.45)  # NMS IOU threshold
    classes = options.pop('classes', None)  # filter by class: --class 0, or --class 0 2 3
    agnostic = options.pop('agnostic', False)  # class-agnostic NMS
    max_det = options.pop('max_det', 1000)  # maximum detections per image
    hide_conf = options.pop('hide_conf', False)  # hide confidences
    hide_labels = options.pop('hide_labels', False)  # hide labels
    line_thickness = options.pop('line_thickness', 1)  # bounding box thickness (pixels)
    imgsz = options.pop('imgsz', 640)  # inference size (pixels)
    showImg = options.pop('showImg', False)  # show results
    labelDict = options.pop('labelDict', CFG.LABEL_DICT)  # config labels

    session, imgsz, stride = load_model(weights=weights, imgsz=imgsz)
    dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=False)
    res = []
    for path, img, im0s, vid_cap in dataset:
        img = image_process(img)
        t1 = time.time()
        pred = inference(session, img, conf_thres=conf_thres, iou_thres=iou_thres, max_det=max_det, classes=classes,
                         agnostic=agnostic)
        t2 = time.time()
        print('Inference time:%.3fs' % (t2 - t1))
        res = post_process(pred, img, im0s, dataset, hide_conf=hide_conf, hide_labels=hide_labels,
                           line_thickness=line_thickness, showImg=showImg, labelDict=labelDict)
    return res


if __name__ == '__main__':
    imagepath = 'image/1.jpg'
    modelpath = 'model/weight.onnx'
    res = run(modelpath, imagepath, showImg=True)
    print(res)

????????该项目到这里也就结束了，代码量也比较少，比较容易理解，以下附一张实际检测的效果图