目的
将雷达信号处理流程脉冲压缩、MTI、加窗、MTD、CFAR过程进行并行化设计
代码
#include <stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <windows.h>
#include <ctime>
#include "Parameter.h"
#include "ReadSaveData.h"
#include "malloc.h"
#include <cufft.h>
#include "ComputeEchoNoise.h"
#include "ComputeHFilterFFT.h"
#include "ComputeCoherentAccumulation.h"
#include <vector>
#include "TargetRecognition.h"
#include "AddWin.h"
#include <iostream>
#include "Parameter.h"
using namespace std;
using std::vector;
__global__ void MTI(cufftComplex *idata, cufftComplex *odata, const int size)
{
int blockId = blockIdx.x + blockIdx.y * gridDim.x;
int threadId = blockId * (blockDim.x * blockDim.y)
+ (threadIdx.y * blockDim.x) + threadIdx.x;
unsigned int index = threadId;
if (N_jie == 1)
{
if (index < size)
{
odata[index].x = idata[index + NWITH0].x - idata[index].x;
odata[index].y = idata[index + NWITH0].y - idata[index].y;
}
}
if (N_jie == 2)
{
if (index < size)
{
odata[index].x = idata[index + 2 * NWITH0].x - 2 * idata[index + NWITH0].x + idata[index].x;
odata[index].y = idata[index + 2 * NWITH0].y - 2 * idata[index + NWITH0].y + idata[index].y;
}
}
}
int main()
{
cufftComplex *data_Host = (cufftComplex*)malloc(NWITH0 * Nfft * sizeof(cufftComplex));
ReadData(data_Host, NWITH0 * Nfft);
cufftComplex *H_dev;
cudaMalloc((void**)&H_dev, NWITH0 * sizeof(cufftComplex));
cudaMemset(H_dev, 0, NWITH0 * sizeof(cufftComplex));
cufftComplex *data_NWITH0_dev;
cudaMalloc((void**)&data_NWITH0_dev, Nfft * NWITH0 * sizeof(cufftComplex));
cufftComplex *data_MTI_dev;
cudaMalloc((void**)&data_MTI_dev, Nfft * NWITH0 * sizeof(cufftComplex));
cudaMemset(data_MTI_dev, 0, Nfft * NWITH0 * sizeof(cufftComplex));
double *Win;
cudaMalloc((void**)&Win, Nfft * sizeof(double));
cudaMemset(Win, 0, Nfft * sizeof(double));
cufftComplex *data_AddWin;
cudaMalloc((void**)&data_AddWin, Nfft * NWITH0 * sizeof(cufftComplex));
cudaMemset(data_AddWin, 0, Nfft * NWITH0 * sizeof(cufftComplex));
cufftComplex *data_N_dev;
cudaMalloc((void**)&data_N_dev, Nfft * NWITH0 * sizeof(cufftComplex));
cudaMemset(data_N_dev, 0, Nfft * NWITH0 * sizeof(cufftComplex));
double *dataTAbs_N_dev;
cudaMalloc((void**)&dataTAbs_N_dev, Nfft * NWITH0 * sizeof(double));
cudaMemset(dataTAbs_N_dev, 0, Nfft * NWITH0 * sizeof(double));
double *targetYoN_dev;
cudaMalloc((void**)&targetYoN_dev, Nfft * NWITH0 * sizeof(double));
double *target_host = (double*)malloc(Nfft * NWITH0 *sizeof(double));
memset(target_host, 0, Nfft * NWITH0 * sizeof(double));
cufftHandle plan_NWITH0_One, plan_NWITH0_Many, plan_Nfft_Many;
cufftPlan1d(&plan_NWITH0_One, NWITH0, CUFFT_C2C, BATCH);
int number_NWITH0[1] = { NWITH0 };
int inembed[2];
int onembed[2];
inembed[0] = NWITH0;
inembed[1] = Nfft;
onembed[0] = NWITH0;
onembed[1] = Nfft;
cufftPlanMany(&plan_NWITH0_Many, 1, number_NWITH0, inembed, 1, NWITH0, onembed, 1, NWITH0, CUFFT_C2C, Nfft);
int number_Nfft[1] = { Nfft };
cufftPlanMany(&plan_Nfft_Many, 1, number_Nfft, inembed, NWITH0, 1, onembed, NWITH0, 1, CUFFT_C2C, NWITH0);
ComputeHFilterFFT(H_dev, plan_NWITH0_One);
cudaMemset(data_NWITH0_dev, 0, Nfft*NWITH0 * sizeof(cufftComplex));
cudaMemcpy(data_NWITH0_dev, data_Host, Nfft*NWITH0 * sizeof(cufftComplex), cudaMemcpyHostToDevice);
ComputeEchoNoise(H_dev, data_NWITH0_dev, plan_NWITH0_One, plan_NWITH0_Many, 0);
AddWin(Win, data_NWITH0_dev, data_AddWin);
dim3 dimBlock2D(BLOCK_SIZE, BLOCK_SIZE);
dim3 dimGrid2D_NWITH0_Nfft((NWITH0 + BLOCK_SIZE - 1) / dimBlock2D.x, (Nfft + BLOCK_SIZE - 1) / dimBlock2D.y);
MTI << <dimBlock2D, dimGrid2D_NWITH0_Nfft >> >(data_AddWin, data_MTI_dev, (Nfft - 1)*NWITH0);
ComputeCoherentAccumulation(data_MTI_dev, data_MTI_dev, dataTAbs_N_dev, plan_Nfft_Many);
TargetRecognition(dataTAbs_N_dev, targetYoN_dev);
cudaMemcpy(target_host, targetYoN_dev, NWITH0 * Nfft * sizeof(double), cudaMemcpyDeviceToHost);
cudaFree(H_dev);
cudaFree(data_NWITH0_dev);
cudaFree(data_MTI_dev);
cudaFree(data_N_dev);
cudaFree(dataTAbs_N_dev);
cudaFree(targetYoN_dev);
cudaFree(target_host);
cudaFree(data_Host);
return 0;
}
|