1. 程式人生 > >win+cuda +vs 新CUDA專案配置

win+cuda +vs 新CUDA專案配置

step1:新建一個專案(可空)
step2:右鍵專案 → 屬性 → 配置管理器 → 全改為“x64”
step3:右鍵專案 → 生成依賴項 → 生成自定義 → 勾選“CUDA 9.0XXX”
這裡寫圖片描述
step4:右鍵專案 → 屬性 → C/C++ → 附加包含目錄增加:
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\include
I:\cudnn\include

這裡寫圖片描述step5:右鍵專案 → 屬性 → 連結器→ 輸入→附加依賴項增加:
cublas.lib
curand.lib
cudart.lib
測試程式碼:

#include "cuda_runtime.h"
#include "device_launch_parameters.h" #include <stdio.h> int main() { int deviceCount; cudaGetDeviceCount(&deviceCount); int dev; for (dev = 0; dev < deviceCount; dev++) { int driver_version(0), runtime_version(0); cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, dev); if
(dev == 0) if (deviceProp.minor = 9999 && deviceProp.major == 9999) printf("\n"); printf("\nDevice%d:\"%s\"\n", dev, deviceProp.name); cudaDriverGetVersion(&driver_version); printf("CUDA驅動版本: %d.%d\n", driver_version / 1000
, (driver_version % 1000) / 10); cudaRuntimeGetVersion(&runtime_version); printf("CUDA執行時版本: %d.%d\n", runtime_version / 1000, (runtime_version % 1000) / 10); printf("裝置計算能力: %d.%d\n", deviceProp.major, deviceProp.minor); printf("Total amount of Global Memory: %u bytes\n", deviceProp.totalGlobalMem); printf("Number of SMs: %d\n", deviceProp.multiProcessorCount); printf("Total amount of Constant Memory: %u bytes\n", deviceProp.totalConstMem); printf("Total amount of Shared Memory per block: %u bytes\n", deviceProp.sharedMemPerBlock); printf("Total number of registers available per block: %d\n", deviceProp.regsPerBlock); printf("Warp size: %d\n", deviceProp.warpSize); printf("Maximum number of threads per SM: %d\n", deviceProp.maxThreadsPerMultiProcessor); printf("Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock); printf("Maximum size of each dimension of a block: %d x %d x %d\n", deviceProp.maxThreadsDim[0], deviceProp.maxThreadsDim[1], deviceProp.maxThreadsDim[2]); printf("Maximum size of each dimension of a grid: %d x %d x %d\n", deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], deviceProp.maxGridSize[2]); printf("Maximum memory pitch: %u bytes\n", deviceProp.memPitch); printf("Texture alignmemt: %u bytes\n", deviceProp.texturePitchAlignment); printf("Clock rate: %.2f GHz\n", deviceProp.clockRate * 1e-6f); printf("Memory Clock rate: %.0f MHz\n", deviceProp.memoryClockRate * 1e-3f); printf("Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth); } return 0; }

這裡寫圖片描述

test2

#include "cuda_runtime.h" 
#include< stdio.h>
#include "device_launch_parameters.h" 

bool InitCUDA()

{

    int count;

    cudaGetDeviceCount(&count);

    if (count == 0)

    {

        fprintf(stderr, "There is no device.\n");

        return false;

    }

    int i;

    for (i = 0; i < count; i++)

    {

        cudaDeviceProp prop;

        if (cudaGetDeviceProperties(&prop, i) == cudaSuccess)

        {

            if (prop.major >= 1)

            {

                break;

            }

        }

    }

    if (i == count)

    {

        fprintf(stderr, "There is no device supporting CUDA 1.x.\n");

        return false;

    }

    cudaSetDevice(i);

    return true;

}



int main()

{

    if (!InitCUDA())

    {

        return 0;

    }

    printf("HelloWorld, CUDA has been initialized.\n");


    return 0;

}