??故事前言:我是一个喜欢用C++编程的小白,强迫症的在Google和百度、github等地方搜了一圈都没发现有C++版本的将yolov5的xml文件解析成txt文件以便于模型的训练,找到的只有python版本的,强迫症的我就结合了Tinyxml解析树来实现了xml对txt的转换。有任何不懂的可在评论区询问,有啥不足的地方欢迎大佬指出~ ??首先说下我的开发环境:ubuntu20.04 + VScode + Tinyxml,需要在👉资源库👈下载好Tinyxml库,然后将tinystr.h、tinystr.cpp、tinyxml.h、tinyxml.cpp、tinyxmlerror.cpp、tinyxmlparser.cpp添加到工程中,接下来就可以写代码了 ??以下列的XML文件来说,我们在解析xml树的时候只能一层层的剥开。
<?xml version="1.0" ?><annotation>
<folder>train_images</folder>
<filename>843.png</filename>
<size>
<width>400</width>
<height>400</height>
<depth>3</depth>
</size>
<object>
<name>with_mask</name>
<bndbox>
<xmin>1</xmin>
<ymin>132</ymin>
<xmax>59</xmax>
<ymax>215</ymax>
</bndbox>
</object>
<object>
<name>with_mask</name>
<bndbox>
<xmin>139</xmin>
<ymin>128</ymin>
<xmax>224</xmax>
<ymax>210</ymax>
</bndbox>
</object>
<object>
<name>with_mask</name>
<bndbox>
<xmin>331</xmin>
<ymin>205</ymin>
<xmax>400</xmax>
<ymax>277</ymax>
</bndbox>
</object>
</annotation>
下面是利用Tinyxml解析上述xml文件的代码,话不多说,为了让大家能尽可能的明白,全文都尽量注释了~
#include<bits/stdc++.h>
#include "tinyxml.h"
#include "tinystr.h"
using namespace std;
const string flag_1="with_mask", flag_2="without_mask", flag_3="mask_weared_incorrect";
struct label{
int w, h;
float status;
int xmin, xmax, ymin, ymax;
};
struct box{
float status;
double x,y,w,h;
};
void rescaling(label &lab, box &b){
double dw = (double)1/lab.w, dh = (double)1/lab.h;
double x = (lab.xmin+lab.xmax)>>1, y = (lab.ymin+lab.ymax)>>1;
double w = (lab.xmax-lab.xmin), h = (lab.ymax-lab.ymin);
x *= dw, w*=dw, y*=dh, h*=dh;
b.status = lab.status, b.x=x, b.y=y, b.w=w, b.h=h;
return;
}
int main(){
TiXmlDocument* myDoc = new TiXmlDocument;
TiXmlElement *root, *node_filename, *node_size, *node_obj;
TiXmlElement *size_w, *size_h, *size_d;
label labs[3000];
box boxs[3000];
fstream fout;
for(int i=0; i<2053; i++){
string id_num = to_string(i);
string xmlPath = "../MyPython/yolov5/data/labels/mask_xml/" + id_num + ".xml";
const char* path = xmlPath.c_str();
fout.open("../MyPython/yolov5/data/labels/mask_txt/" + id_num + ".txt", fstream::out);
if(!fout) {cerr << "未能打开txt文件" << endl; exit(-1);}
if(!myDoc->LoadFile(path)){cerr << "未能正常加载xml文件" << endl; exit(-1);}
root = myDoc->RootElement();
if(!root){cerr << "Fail to load file: No root element." << endl; exit(1);}
node_filename = root->FirstChildElement("filename");
node_size = node_filename->NextSiblingElement("size");
size_w = node_size->FirstChildElement("width");
size_h = size_w->NextSiblingElement("height");
size_d = size_h->NextSiblingElement("depth");
const char* width = size_w->GetText();
int w = atoi(width);
const char* height = size_h->GetText();
int h = atoi(height);
const char* depth = size_d->GetText();
node_obj = node_size->NextSiblingElement("object");
TiXmlElement *obj_name, *obj_bndbox;
for(; node_obj!=nullptr; node_obj=node_obj->NextSiblingElement("object")){
labs[i].w=w, labs[i].h=h;
obj_name = node_obj->FirstChildElement("name");
if(obj_name->GetText() == flag_1) labs[i].status = 0;
else if(obj_name->GetText() == flag_2) labs[i].status = 1;
else if(obj_name->GetText() == flag_3) labs[i].status = 2;
obj_bndbox = obj_name->NextSiblingElement("bndbox");
TiXmlElement *pos = obj_bndbox->FirstChildElement("xmin");
const char* xmin = pos->GetText();
labs[i].xmin = atoi(xmin);
pos = pos->NextSiblingElement("ymin");
const char* ymin = pos->GetText();
labs[i].ymin = atoi(ymin);
pos = pos->NextSiblingElement("xmax");
const char* xmax = pos->GetText();
labs[i].xmax = atoi(xmax);
pos = pos->NextSiblingElement("ymax");
const char* ymax = pos->GetText();
labs[i].ymax = atoi(ymax);
rescaling(labs[i], boxs[i]);
fout << boxs[i].status << " " << boxs[i].x << " " << boxs[i].y
<< " " << boxs[i].w << " " << boxs[i].h;
if(node_obj->NextSiblingElement("object")) fout << endl;
}
fout.close();
}
return 0;
}
? 路漫漫其修远兮,吾将上下而求索
|