一、新建項目
打開VS2017 → 新建項目 → Win32控制台應用程序 → “空項目”打鈎
二、調整配置管理器平台類型
右鍵項目 → 屬性 → 配置管理器 → 全改為“x64”
三、配置生成屬性
右鍵項目 → 生成依賴項 → 生成自定義 → 勾選“CUDA 9.0XXX”
三、配置基本庫目錄
注意:后續步驟中出現的目錄地址需取決於你當前的CUDA版本及安裝路徑
右鍵項目 → 屬性 → 配置屬性 → VC++目錄 → 包含目錄,添加以下目錄:
- C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\include
- C:\ProgramData\NVIDIA Corporation\CUDA Samples\v9.0\common\inc
…… → 庫目錄,添加以下目錄:
- C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\lib\x64
- C:\ProgramData\NVIDIA Corporation\CUDA Samples\v9.0\common\lib\x64
四、配置CUDA靜態鏈接庫路徑
右鍵項目 → 屬性 → 配置屬性 → 鏈接器 → 常規 → 附加庫目錄,添加以下目錄:
- $(CUDA_PATH_V9_0)\lib\$(Platform)
五、選用CUDA靜態鏈接庫
右鍵項目 → 屬性 → 配置屬性 → 鏈接器 → 輸入 → 附加依賴項,添加以下庫:
- cublas.lib;cublas_device.lib;cuda.lib;cudadevrt.lib;cudart.lib;cudart_static.lib;cufft.lib;cufftw.lib;curand.lib;cusolver.lib;cusparse.lib;nppc.lib;nppial.lib;nppicc.lib;nppicom.lib;nppidei.lib;nppif.lib;nppig.lib;nppim.lib;nppist.lib;nppisu.lib;nppitc.lib;npps.lib;nvblas.lib;nvcuvid.lib;nvgraph.lib;nvml.lib;nvrtc.lib;OpenCL.lib;
以上為 “第三步” 中添加的庫目錄 “C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\lib\x64” 中的庫! - 注意:kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
這些庫為原有!
六、配置源碼文件風格
右鍵源文件 → 添加 → 新建項 → 選擇 “CUDA C/C++ File”
右鍵 “xxx.cu" 源文件 → 屬性 → 配置屬性 → 常規 → 項類型 → 設置為“CUDA C/C++”
七、測試程序
1 #include "cuda_runtime.h" 2 #include "device_launch_parameters.h" 3 #include <stdio.h> 4 5 int main() { 6 int deviceCount; 7 cudaGetDeviceCount(&deviceCount); 8 9 int dev; 10 for (dev = 0; dev < deviceCount; dev++) 11 { 12 int driver_version(0), runtime_version(0); 13 cudaDeviceProp deviceProp; 14 cudaGetDeviceProperties(&deviceProp, dev); 15 if (dev == 0) 16 if (deviceProp.minor = 9999 && deviceProp.major == 9999) 17 printf("\n"); 18 printf("\nDevice%d:\"%s\"\n", dev, deviceProp.name); 19 cudaDriverGetVersion(&driver_version); 20 printf("CUDA驅動版本: %d.%d\n", driver_version / 1000, (driver_version % 1000) / 10); 21 cudaRuntimeGetVersion(&runtime_version); 22 printf("CUDA運行時版本: %d.%d\n", runtime_version / 1000, (runtime_version % 1000) / 10); 23 printf("設備計算能力: %d.%d\n", deviceProp.major, deviceProp.minor); 24 printf("Total amount of Global Memory: %u bytes\n", deviceProp.totalGlobalMem); 25 printf("Number of SMs: %d\n", deviceProp.multiProcessorCount); 26 printf("Total amount of Constant Memory: %u bytes\n", deviceProp.totalConstMem); 27 printf("Total amount of Shared Memory per block: %u bytes\n", deviceProp.sharedMemPerBlock); 28 printf("Total number of registers available per block: %d\n", deviceProp.regsPerBlock); 29 printf("Warp size: %d\n", deviceProp.warpSize); 30 printf("Maximum number of threads per SM: %d\n", deviceProp.maxThreadsPerMultiProcessor); 31 printf("Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock); 32 printf("Maximum size of each dimension of a block: %d x %d x %d\n", deviceProp.maxThreadsDim[0], 33 deviceProp.maxThreadsDim[1], 34 deviceProp.maxThreadsDim[2]); 35 printf("Maximum size of each dimension of a grid: %d x %d x %d\n", deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], deviceProp.maxGridSize[2]); 36 printf("Maximum memory pitch: %u bytes\n", deviceProp.memPitch); 37 printf("Texture alignmemt: %u bytes\n", deviceProp.texturePitchAlignment); 38 printf("Clock rate: %.2f GHz\n", deviceProp.clockRate * 1e-6f); 39 printf("Memory Clock rate: %.0f MHz\n", deviceProp.memoryClockRate * 1e-3f); 40 printf("Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth); 41 } 42 43 return 0; 44 }
輸出結果: