cudaMemcpy用於在主機(Host)和設備(Device)之間往返的傳遞數據,用法如下:
主機到設備:cudaMemcpy(d_A,h_A,nBytes,cudaMemcpyHostToDevice)
設備到主機:cudaMemcpy(h_A,d_A,nBytes,cudaMemcpyDeviceToHost)
注意:該函數是同步執行函數,在未完成數據的轉移操作之前會鎖死並一直占有CPU進程的控制權,所以不用再添加cudaDeviceSynchronize()函數
示例:
void VectorReal_copyBuffer(realVecHandle* addr, float hostArr[],long buffer, bool tohost, int copyMode) { realVecHandle * native =addr; float * host = hostArr; if ( copyMode == 0 ) { if (!tohost) { cudaMemcpy( native->data,host, native->size, cudaMemcpyHostToDevice) ; } else { cudaMemcpy( host, native->data, native->size, cudaMemcpyDeviceToHost) ; } } }