VS2017 CUDA编程学习1:CUDA编程两变量加法运算
VS2017 CUDA编程学习2:在GPU上执行线程
前言
今天学习了如何使用CUDA获取GPU设备上属性信息,这里做个学习笔记并分享给大家。
1. 学习内容
CUDA获取设备数量与设备名称,CUDA获取设备通用属性信息,CUDA获取设备内存相关属性信息,CUDA获取设备线程相关属性信息等。
2. VS2017 CUDA代码实现
#include <stdio.h>
#include <iostream>
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <memory>
int main()
{
int device_count = 0;
cudaGetDeviceCount(&device_count);
if (device_count == 0)
{
std::cout << "There are no available device(s) that support CUDA" << std::endl;
}
else
{
std::cout << "Detected < " << device_count <<" > CUDA Capable device(s)\n" << std::endl;
}
cudaDeviceProp device_Property;
int device = 0;
cudaGetDevice(&device);
cudaGetDeviceProperties(&device_Property, device);
printf("Device %d: \"%s\"\n", device, device_Property.name);
int driverVersion = 0;
int runtimeVersion = 0;
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
printf("CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion / 1000,
(driverVersion % 100) / 10, runtimeVersion / 1000, (runtimeVersion % 100) / 10);
printf("(%2d) Multiprocessors\n", device_Property.multiProcessorCount);
printf("GPU Max Clock rate: %.0f MHz(%0.2f GHz)\n", device_Property.clockRate * 1e-3f, device_Property.clockRate * 1e-6f);
printf("Total amount of global memory: %.0f MBytes(%llu bytes)\n",
(float)device_Property.totalGlobalMem / 1048576.0f, (unsigned long long)device_Property.totalGlobalMem);
printf("Memory Clock rate: %.0f MHz\n", device_Property.memoryClockRate * 1e-3f);
printf("Memory Bus Width: %d-bit\n", device_Property.memoryBusWidth);
if (device_Property.l2CacheSize)
{
printf("L2 Cache Size : %d bytes\n", device_Property.l2CacheSize);
}
printf("Total amount of constant memory: %lu bytes\n", device_Property.totalConstMem);
printf("Total amount of shared memory per block: %lu bytes\n", device_Property.sharedMemPerBlock);
printf("Total number of registers available per block: %d\n", device_Property.regsPerBlock);
printf("Maximum number of threads per multiprocessor: %d\n", device_Property.maxThreadsPerMultiProcessor);
printf("Maximum number of threads per block: %d\n", device_Property.maxThreadsPerBlock);
printf("Max dimension size of a thread block (x, y, z):(%d, %d, %d)\n",
device_Property.maxThreadsDim[0],
device_Property.maxThreadsDim[1],
device_Property.maxThreadsDim[2]);
printf("Max dimension size of a grid size (x, y, z): (%d, %d, %d)\n",
device_Property.maxGridSize[0],
device_Property.maxGridSize[1],
device_Property.maxGridSize[2]);
int device1;
cudaGetDevice(&device1);
cudaDeviceProp deviceProp;
memset(&deviceProp, 0, sizeof(cudaDeviceProp));
deviceProp.major = 1;
deviceProp.minor = 3;
cudaChooseDevice(&device1, &deviceProp);
printf("ID of device which support double precision is :%d\n",
device1);
cudaSetDevice(device1);
system("pause");
return 0;
}
总结
这次主要是学习使用CUDA获取GPU设备属性信息,这在多GPU设备选择以及设备内核参数配置方面很重要,如果配置内核参数超出设备限制,程序可能崩溃!
学习资料
《基于GPU加速的计算机视觉编程》
|