[人工智能] c++封装yolov4进行目标检测

开发: C++知识库 Java知识库 JavaScript Python PHP知识库人工智能区块链大数据移动开发嵌入式开发工具数据结构与算法开发测试游戏开发网络协议系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑笔记本显卡显示器固态硬盘硬盘耳机手机 iphone vivo oppo 小米华为单反装机图拉丁

-> 人工智能 -> c++封装yolov4进行目标检测 -> 正文阅读

[人工智能]c++封装yolov4进行目标检测

yolo4是用c++写的，在工程中的部署特别方便。之前项目中使用yolov4，取得了不错的效果。在这里记录一下。
使用官方接口调用，我们首先得编译darknet动态库，下载yolov4源码

git clone https://github.com/AlexeyAB/darknet.git

编译yolov4为动态库

yolo_cpp_dll.sln，在darknet-master\build\darknet目录下面。
设置x64和Lease，然后右击项目，生成。
注意：这里应该配置好环境 CUDA 10.2以及 cuDNN，以及修改了yolo_cpp_dll.vcxproj中的cuda版本。
然后在darknet-master\build\darknet\x64就会生成yolo_cpp_dll.dll文件。这个文件就是编译好的dll文件，可以直接使用。
yolo_cpp_dll.dll的接口如下：

struct bbox_t {
    unsigned int x, y, w, h;    // (x,y) - top-left corner, (w, h) - width & height of bounded box
    float prob;                    // confidence - probability that the object was found correctly
    unsigned int obj_id;        // class of object - from range [0, classes-1]
    unsigned int track_id;        // tracking id for video (0 - untracked, 1 - inf - tracked object)
    unsigned int frames_counter;// counter of frames on which the object was detected
};

class Detector {
public:
        Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0);
        ~Detector();

        std::vector<bbox_t> detect(std::string image_filename, float thresh = 0.2, bool use_mean = false);
        std::vector<bbox_t> detect(image_t img, float thresh = 0.2, bool use_mean = false);
        static image_t load_image(std::string image_filename);
        static void free_image(image_t m);

#ifdef OPENCV
        std::vector<bbox_t> detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false);
        std::shared_ptr<image_t> mat_to_image_resize(cv::Mat mat) const;
#endif
};

关于这一部分有疑问的，可以参考官文档：https://github.com/AlexeyAB/darknet#yolo-v4-in-other-frameworks

检测步骤
检测部分需要用到刚刚编译的动态库，以及yolov4源码下的yolo_v2_class.hpp，需要将yolo_v2_class.hpp路径添加到包含目录中。

在这里插入图片描述
使用步骤大致分为以下几步
加载网络模型

string modelConfig = "yolov4/s_yolov4.cfg";
string modelWeights = "yolov4/yolov4.weights"";
string classesFile = "yolov4/s_coco.names";
Detector detector(modelConfig, modelWeights, 0);

调用 detector中的方法detect方法进行检测

std::vector<bbox_t> res = detector.detect(image);

image是Mat类型的

输出结果结构体bbox_t可以在yolo_v2_class.hpp看到，包括检测框左上角坐标，高和宽，置信度，类别等，有了它们画图也很简单了。

struct bbox_t {
    unsigned int x, y, w, h;       // (x,y) - top-left corner, (w, h) - width & height of bounded box
    float prob;                    // confidence - probability that the object was found correctly
    unsigned int obj_id;           // class of object - from range [0, classes-1]
    unsigned int track_id;         // tracking id for video (0 - untracked, 1 - inf - tracked object)
    unsigned int frames_counter;   // counter of frames on which the object was detected
    float x_3d, y_3d, z_3d;        // center of object (in Meters) if ZED 3D Camera is used
};

Drawer(image, res, classes);

void Drawer(Mat& frame, vector<bbox_t> outs, vector<string> classes)
{
	//获取所有最佳检测框信息
	for (int i = 0; i < outs.size(); i++)
	{
		DrawBoxes(frame, classes, outs[i].obj_id, outs[i].prob, outs[i].x, outs[i].y,
			outs[i].x + outs[i].w, outs[i].y + outs[i].h);
	}
}

void DrawBoxes(Mat& frame, vector<string> classes, int classId, float conf, int left, int top, int right, int bottom)
{
	//画检测框
	rectangle(frame, Point(left, top), Point(right, bottom), Scalar(255, 178, 50), 3);
	//该检测框对应的类别和置信度
	string label = format("%.2f", conf);
	if (!classes.empty())
	{
		CV_Assert(classId < (int)classes.size());
		label = classes[classId] + ":" + label;
	}
	//将标签显示在检测框顶部
	int baseLine;
	Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
	top = max(top, labelSize.height);
	//rectangle(frame, Point(left, top - round(1.5 * labelSize.height)), Point(left + round(1.5 * labelSize.width), top + baseLine), Scalar(255, 255, 255), FILLED);
	putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 0, 0), 1);
}

完整的代码如下：

#include <opencv2/opencv.hpp>
#include <yolo_v2_class.hpp>

using namespace std;
using namespace cv;

void DrawBoxes(Mat& frame, vector<string> classes, int classId, float conf, int left, int top, int right, int bottom)
{
	//画检测框
	rectangle(frame, Point(left, top), Point(right, bottom), Scalar(255, 178, 50), 3);
	//该检测框对应的类别和置信度
	string label = format("%.2f", conf);
	if (!classes.empty())
	{
		CV_Assert(classId < (int)classes.size());
		label = classes[classId] + ":" + label;
	}
	//将标签显示在检测框顶部
	int baseLine;
	Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
	top = max(top, labelSize.height);
	rectangle(frame, Point(left, top - round(1.5 * labelSize.height)), Point(left + round(1.5 * labelSize.width), top + baseLine), Scalar(255, 255, 255), FILLED);
	putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 0, 0), 1);
}

void Drawer(Mat& frame, vector<bbox_t> outs, vector<string> classes)
{
	//获取所有最佳检测框信息
	for (int i = 0; i < outs.size(); i++)
	{
		DrawBoxes(frame, classes, outs[i].obj_id, outs[i].prob, outs[i].x, outs[i].y,
			outs[i].x + outs[i].w, outs[i].y + outs[i].h);
	}
}

int main() {
	string classesFile = "yolov4/s_coco.names";
	vector<string> classes;
	ifstream ifs(classesFile.c_str());
	string line;
	while (getline(ifs, line)) classes.push_back(line);
	string modelConfig = "yolov4/yolov4.cfg";
	string modelWeights = "yolov4/yolov4.weights";
	Mat image = imread("dog.jpg");
	if (image.empty()) {
		cout << "the image is enpty" << endl;
	}
	else {
		Detector detector(modelConfig, modelWeights, 0);
		std::vector<bbox_t> res = detector.detect(image);
		Drawer(image, res, classes);
		imwrite("dog_result.jpg", image);
	}
	system("pause");
	return 0;
}

C#使用yolov4
由于在我的项目中，需要与C#的界面进行对接，并且需要实时获得检测结果，因此我想到将以上代码进一步封装成动态库。这样做的好处在于避免了C#直接调用yolo_cpp_dll.dll，方便在C++进行进一步的功能扩展。
在封装dll的过程中，不能直接对main方法中的内容进行封装。原因在于，如果对main中的内容都进行封装，每次检测图片都会经历一个模型加载的过程，会花费大量的时间，远远不能达到实时的效果。因此我对main中的内容进行了分解。
解决思路如下：
抽离加载模型的部分，返回模型的指针。

extern "C" __declspec(dllexport)
void* __stdcall model()
{
	string modelConfig = "yolov4/yolov4.cfg";
	string modelWeights = "yolov4/yolov4.weights";
	Detector* model = new Detector(modelConfig, modelWeights, 0);加载模型，只运行一遍
	return	(void*)model;
}

因为C#是可以接受C++的指针的，C#就可以将这个指针作为参数返回给C++的检测方法，对图片进行检测。
封装检测的部分代码如下：有两个参数，一个是图片指针，另一个是模型的指针。可以返回检测结果的指针，根据需要进行修改。

extern "C" __declspec(dllexport)
void* __stdcall Detect(const char* imagefile, void* p_model)
{
	Detector* model = (Detector*)p_model;
	string fileimage;
	fileimage = imagefile;
	Mat image = imread(fileimage);
	if (image.empty())
	{
		cout << "the image is enpty" << endl;
		return NULL;
	}
	std::vector<bbox_t> res = model->detect(image);
}

为了避免造成内存泄漏，C#需要调用C++的函数，将模型进行删除，删除模型的代码如下。

extern "C" __declspec(dllexport)
void __stdcall stop_model(void* p_model)
{
	Detector* model = (Detector*)p_model;
	delete(model);
	int* res_ar = (int*)res_arr;
}