Hi all, I’m testing the following code on my Nvidia GPU (GTX 750 Ti + latest CUDA 8.0/OpenCL driver/library ) and don’t quite understand why it’s not working as expected.
The same code seems working fine with OpenCL on Mac CPU. Any comments? Thanks in advance.
#include <iostream>
#include <CL/cl.h>
#include <assert.h>
#include <string.h>
cl_platform_id platform_id;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_device_id device_id;
cl_context context;
cl_command_queue command_queue;
cl_mem gpu_cache_ptr;
void *cpu_ptr;
void *gpu_ptr;
int main() {
cl_int ret;
// setup
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
assert(ret == CL_SUCCESS);
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices);
assert(ret == CL_SUCCESS);
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
assert(ret == CL_SUCCESS);
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
assert(ret == CL_SUCCESS);
// create mapped buffer
size_t size = 10;
gpu_cache_ptr = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR, size, NULL, &ret);
assert(ret == CL_SUCCESS);
cpu_ptr = clEnqueueMapBuffer(command_queue, gpu_cache_ptr, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, size, 0, NULL, NULL, &ret);
assert(ret == CL_SUCCESS);
memset(cpu_ptr, 0, size);
// fill data
memset(cpu_ptr, 'h', 1);
memset(cpu_ptr+1, 'e', 1);
memset(cpu_ptr+2, 'l', 1);
memset(cpu_ptr+3, 'l', 1);
memset(cpu_ptr+4, 'o', 1);
memset(cpu_ptr+5, '\0', 1);
std::cout << "cpu_ptr=" << (char*)cpu_ptr << std::endl;
// create another gpu mem
cl_mem gpu_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &ret);
assert(ret == CL_SUCCESS);
gpu_ptr = (void *) gpu_mem;
// copy data to this gpu mem
ret = clEnqueueCopyBuffer(command_queue, (cl_mem) gpu_cache_ptr, (cl_mem) gpu_ptr, 0, 0, size, 0, NULL, NULL);
assert(ret == CL_SUCCESS);
clFinish(command_queue);
char* recovered_value = new char[size];
// copy back to cpu buffer
ret = clEnqueueReadBuffer(command_queue, (cl_mem) gpu_ptr, CL_TRUE, 0, size, recovered_value, 0, NULL, NULL);
assert(ret == CL_SUCCESS);
std::cout << "recovered_value=" << recovered_value << std::endl;
delete[] recovered_value;
}