?
该网络有三个部分组成? backbone(提取高级语义特征),上采样(恢复分辨率),head (三个卷积最终输出三个向量 )heatmap[B,C,H,W],wh [B,2,H,W],reg[B,2,h,w]
heatmap 存放的是目标中心点位置,整张图那个位置最接近1 ,代表那个位置,是目标的中心点。
wh? 总共两个通道,存放的是目标在该点的目标框的长和宽,所以计算左上和右下点? x-w/2??
reg 总共两个通道,存放的是目标中心点的x,y偏移量,加上这个数值即可
wh reg 为什么就两个通道?
??? 因为目标的中心点取值范围不会超过整个图的size,即便是多个目标,也不会超过,可以想象成把多个通道的热力图,合起来,每个关键点都在size中
判断这个点属于那个类别,依据是,每个热度图取前十个最大值,比如说2个通道(2个类别),总共取了20个值,这个值除以10 ,就是类别。
所以后处理流程是
float hm_chn0[Height][Width] = {0};
float hm_chn1[Height][Width] = {0};
float reg_chn0[Height][Width] = {0};
float reg_chn1[Height][Width] = {0};
float wh_chn0[Height][Width] = {0};
float wh_chn1[Height][Width] = {0};
// if (desc->astMI_OutputTensorDescs[0].eElmFormat == MI_IPU_FORMAT_FP32)
// {
// //MI_FLOAT* data = (MI_FLOAT*)(OutputTensorVector->astArrayTensors[0].ptTensorData[0]);
// //memcpy(hmData, OutputTensorVector->astArrayTensors[0].ptTensorData[0], sizeof(MI_FLOAT)*s32ClassCount );
// //memcpy(whData, OutputTensorVector->astArrayTensors[1].ptTensorData[0], sizeof(MI_FLOAT)*s32ClassCount );
// //memcpy(regData, OutputTensorVector->astArrayTensors[2].ptTensorData[0], sizeof(MI_FLOAT)*s32ClassCount);
// }
MI_FLOAT* phmdata = (MI_FLOAT*)(OutputTensorVector->astArrayTensors[0].ptTensorData[0]);
MI_FLOAT* pwhdata = (MI_FLOAT*)(OutputTensorVector->astArrayTensors[1].ptTensorData[0]);
MI_FLOAT* pregdata = (MI_FLOAT*)(OutputTensorVector->astArrayTensors[2].ptTensorData[0]);
for(unsigned int h = 0; h < Height; h++)
{
for(unsigned int w = 0; w < Width; w++)
{
//number+=1;
if(s32ClassCount%8==0)
{
hm_chn0[h][w] =*(phmdata+((h*Height+w)*8));
hm_chn1[h][w] =*(phmdata+((h*Height+w)*8+1));
//heatmap_chn0[h][w] = (unsigned char)hm_chn0[h][w]*255;
//heatmap_chn1[h][w] = (unsigned char)hm_chn1[h][w]*255;
wh_chn0[h][w] =*(pwhdata+((h*Height+w)*8));
wh_chn1[h][w] =*(pwhdata+((h*Height+w)*8+1));
reg_chn0[h][w] =*(pregdata+((h*Height+w)*8));
reg_chn1[h][w] =*(pregdata+((h*Height+w)*8+1));
}
}
}
cout<<"hm[0] " << hm_chn0[0][0]<<" " << hm_chn0[0][1]<<endl;
//cout<<"hm[0] " << heatmap_chn0[0][0]<<" " << heatmap_chn0[0][1]<<endl;
// ofs << std::endl << "}" << std::endl << std::endl;
// ofs.close();
// Mat heatmap_image(Height, Width, CV_8UC1);
// heatmap_image.data = heatmap[0];
// imwrite("heat.jpg",heatmap_image);
// heatmap_image.data = heatmap[1];
// imwrite("heat_1.jpg",heatmap_image);
Mat img_hm_0(Height, Width, CV_8UC1);
Mat img_hm_1(Height, Width, CV_8UC1);
Mat heatmap(Height, Width, CV_8UC1);
Mat heatmap_1(Height, Width, CV_8UC1);
Mat src(Height, Width, CV_32FC1);
float a =0;
for (int row = 0;row < Height;row++)
{
for (int col = 0;col < Width;col++)
{
//a = data[(200*row+col)*8] *255 ;
a = hm_chn0[row][col] *255;
src.at<float>(row, col)= a ;
}
}
src.convertTo(heatmap,CV_8U,10,0);
imwrite("heatmap_0.jpg",heatmap);
//
src.convertTo(img_hm_0,CV_8U);
src.convertTo(img_hm_1,CV_8U);
unsigned char c = img_hm_0.at<uchar>(0, 0);
unsigned char b = img_hm_0.at<uchar>(0, 1);
cout<<" hm 0 unchar is" << (int)c <<' '<< (int)b << endl;
// 对heatmap做maxpool,我用膨胀来代替了
cv::Mat mat;
//Mat HmImg1(Height, Width, CV_8UC1);
Mat Hmax1(Height, Width, CV_8UC1);
// Mat HmImg2(Height, Width, CV_8UC1);
Mat Hmax2(Height, Width, CV_8UC1);
//
//HmImg1.data = heatmap[0];
Mat hmimg1 = img_hm_0.clone();
//HmImg2.data = heatmap[1];
Mat hmimg2 = img_hm_0.clone();
cv::Mat element = getStructuringElement(MORPH_RECT, Size(3, 3));
dilate(hmimg1, Hmax1, element);
dilate(hmimg2, Hmax2, element);
///
// for (unsigned int i = 0; i < onebuf; i++)
// {
// if (hmimg1.data[i] != Hmax1.data[i])
// {
// hm[0][i] = 0;
// }
// if (hmimg2.data[i] != Hmax2.data[i])
// {
// hm[1][i] = 0;
// }
// }
for (int row = 0;row < Height;row++)
{
for (int col = 0;col < Width;col++)
{
if(hmimg1.at<uchar>(row,col) != Hmax1.at<uchar>(row,col))
{
hm_chn0[row][col] = 0;
}
if(hmimg2.at<uchar>(row,col) != Hmax2.at<uchar>(row,col))
{
hm_chn1[row][col] = 0;
}
}
}
// save
for (int row = 0;row < Height;row++)
{
for (int col = 0;col < Width;col++)
{
//a = data[(200*row+col)*8] *255 ;
a = hm_chn0[row][col] *255;
src.at<float>(row, col)= a ;
}
}
src.convertTo(heatmap_1,CV_8U,10,0);
imwrite("heatmap_1.jpg",heatmap);
cout<<"ssssssssss"<<endl;
float topk_scores[Chn][topN];
int topk_inds[Chn][topN];
int topk_ys[Chn][topN];
int topk_xs[Chn][topN];
// get topN
// //在两张热度图中分别取最大的10个点
//topk(hm[0], onebuf, topN, topk_scores[0], topk_inds[0]);
//topk(hm[1], onebuf, topN, topk_scores[1], topk_inds[1]);
//在二维数据中 &hm_chn0[1] 表示首行地址 hm_chn0+1 代表第一行地址, hm_chn0[0] 和 *hm_chn0 表示首行元素地址,*(hm_chn1+1)第一行元素地址, *hm_chn0+1 表示首行下一个元素地址
topk(hm_chn0[0], onebuf, topN, topk_scores[0], topk_inds[0]);
topk(hm_chn1[0], onebuf, topN, topk_scores[1], topk_inds[1]);
//
//
float scores[Chn * topN];
int num = 0;
//求这20个点的坐标
for (unsigned int cl = 0; cl < Chn; cl++)
{
for (int n = 0; n < topN; n++)
{ //坐标 = y*200+x.
//存放的是每张图的位置 0-40000
topk_inds[cl][n] = topk_inds[cl][n] % (onebuf);
//y 0-200
topk_ys[cl][n] = (int)(topk_inds[cl][n] / Height);
topk_xs[cl][n] = (int)(topk_inds[cl][n] % Height);
//取值范围0-1
scores[num] = topk_scores[cl][n];
num++;
//cout <<"scores is " << scores[num]<<endl;
}
}
float topk_score[topN]; //输出数值最大的10个点的数值
//取值范围 0-20 存放的是得分高的位置
int topk_ind[topN];
// //在20个点里取10个点
topk(scores, Chn * topN, topN, topk_score, topk_ind);
cout <<"Top scores is " << topk_score[0]<<endl;
int topk_clses[topN];
int topk_y[topN];
int topk_x[topN];
int ind[topN];
for (unsigned int cl = 0; cl < topN; cl++)
{
//取值范围是0-1,就是这最大十个点,所属的类别
topk_clses[cl] = (int)(topk_ind[cl] / topN);
//10个值的 x,y坐标(中心坐标)相对于200*200来说
topk_y[cl] = topk_ys[topk_clses[cl]][topk_ind[cl] % topN];
topk_x[cl] = topk_xs[topk_clses[cl]][topk_ind[cl] % topN];
//取值范围是0-H*w(40000)
ind[cl] = topk_inds[topk_clses[cl]][topk_ind[cl] % topN];
}
//以上代码求出了在【112,112,2】上的10个最大值坐标和对应的值
//对wh,reg处理,引入回归量
float feat_reg[onebuf][Chn] = {0};
float feat_wh[onebuf][Chn] = {0};
float reg_view[topN][Chn];
float wh_view[topN][Chn];
float x[topN];
float y[topN];
float dets[topN][6];
//这个循环如果并入maxpool的循环里,处理时间会多2ms,所以依然放在这边
/
// for (unsigned int i = 0; i < onebuf; i++)
// {
// feat_reg[i][0] = reg[0][i];
// feat_reg[i][1] = reg[1][i];
// feat_wh[i][0] = wh[0][i];
// feat_wh[i][1] = wh[1][i];
// }
int inc = 0;
for (int row = 0;row < Height;row++)
{
for (int col = 0;col < Width;col++)
{
feat_reg[inc][0] = reg_chn0[row][col];
feat_reg[inc][1] = reg_chn1[row][col];
feat_wh[inc][0] = wh_chn0[row][col];
feat_wh[inc][1] = wh_chn1[row][col];
inc+=1;
}
}
/
std::vector<int> ids;
std::vector<cv::Rect> boxes;
std::vector<float> confidences;
cout<<"vvvvvvv"<<endl;
for (int num = 0; num < topN; num++)
{
//reg 存放的是中心点的的偏移量
reg_view[num][0] = feat_reg[ind[num]][0];
reg_view[num][1] = feat_reg[ind[num]][1];
//坐标增加回归量
x[num] = topk_x[num] + reg_view[num][0];
y[num] = topk_y[num] + reg_view[num][1];
//读取前10个索引对应的wh
wh_view[num][0] = feat_wh[ind[num]][0];
wh_view[num][1] = feat_wh[ind[num]][1];
//输出[10,6]的检测结果,其中10是置信top10,6是4(bboxes)+1(scores)+1(clses)
dets[num][0] = (x[num] - (wh_view[num][0] / 2)) * 4;
if(dets[num][0] < 0)
{
dets[num][0] = 0;
}
dets[num][1] = (y[num] - (wh_view[num][1] / 2)) * 4;
if(dets[num][1] < 0)
{
dets[num][1] = 0;
}
dets[num][2] = (x[num] + (wh_view[num][0] / 2)) * 4;
if(dets[num][2] < 0)
{
dets[num][2] = 0;
}
dets[num][3] = (y[num] + (wh_view[num][1] / 2)) * 4;
if(dets[num][3] < 0)
{
dets[num][3] = 0;
}
//
dets[num][4] = topk_score[num];
dets[num][5] = topk_clses[num];
ids.push_back(dets[num][5]);
confidences.push_back(dets[num][4]);
boxes.emplace_back((int)dets[num][0], (int)dets[num][1], (int)(wh_view[num][0] * 4), (int)(wh_view[num][1] * 4));
}
//nms
cout<<"gggggggggggggggg"<<endl;
std::vector<int> indices;
float score_threshold = 0.1;
float nms_threshold = 0.1;
NMSBoxes(boxes, confidences, score_threshold, nms_threshold, indices);
//string filename=(string)(pstPreProcessedData->pImagePath);
//cv 默认格式bgr,hwc
cv::Mat img = cv::imread(image_path, -1);
if (img.empty()) {
std::cout << " error! image don't exist!" << std::endl;
exit(1);
}
// //网络大小
int net_w, net_h;
net_w = Width * 4;
net_h = Height * 4;
std::vector<float> Result_str;
cout<<"indec size is "<<(int)(indices.size()) <<endl;
for (size_t i = 0; i < indices.size(); ++i)
{
//这4个点都是对于448*448图片来说的
int idx = indices[i];
cv::Rect box = boxes[idx];
float xmin = static_cast<float>(box.x);
float ymin = static_cast<float>(box.y);
float xmax = xmin + static_cast<float>(box.width);
float ymax = ymin + static_cast<float>(box.height);
cout << xmin << " " << ymin << " " << xmax << " " << ymax << " " << confidences[idx] << " " << ids[idx] << " ";
cout << endl;
Result_str.push_back(xmin);
Result_str.push_back(ymin);
Result_str.push_back(xmax);
Result_str.push_back(ymax);
Result_str.push_back(confidences[idx]);
Result_str.push_back(ids[idx]);
//我们把这些点映射回原图
if (img.cols > img.rows) //宽大于高
{
xmin = xmin * img.cols / net_w;
xmax = xmax * img.cols / net_w;
ymin = (ymin * img.cols / net_w) - ((img.cols - img.rows) / 2);
ymax = (ymax * img.cols / net_w) - ((img.cols - img.rows) / 2);
if(ymin < 0)
{
ymin = 0;
}
if(ymax > img.rows)
{
ymax = img.rows;
}
}
else //高大于宽
{
ymin = ymin * img.rows / net_h;
ymax = ymax * img.rows / net_h;
xmin = (xmin * img.rows / net_h) - ((img.rows - img.cols) / 2);
xmax = (xmax * img.rows / net_h) - ((img.rows - img.cols) / 2);
if(xmin < 0)
{
xmin = 0;
}
if(xmax > img.cols)
{
xmax = img.cols;
}
}
cout << xmin << " " << ymin << " " << xmax << " " << ymax << " " << confidences[idx] << " " << ids[idx] << " ";
cout << endl;
cv::rectangle(img, Point((int)(xmin),(int)(ymin)), Point((int)(xmax),(int)(ymax)), Scalar(0, 0, 255), 2);
std::string save_out ="EEEEEE.jpg";
imwrite(save_out, img);
}
|