please can you help me with this? There must be something what I’m overseeing.
I’m passing to kernel 2 buffers, each with 4 values. One contains data and another one should return data from kernel. So when I run my kernel and pass 4 as a global_work_size it should work. But instead of that in my return buffer I get only 2 values. Everything works if I double the size of global_work_size to 8,…why?
Here is the code (very simplified):
kernel:
__kernel void mainGard(__global const double *Bij,
__global double *result)
{
int gid = get_global_id(0);
result[gid] = Bij[gid];
}
host:
cl_uint nTotalBij = 4
cl_double *Bij = new cl_double[nTotalBij];
for(int i=0; i<4;i++)
Bij[i] = i;
cl_double result[nTotalBij];
memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
sizeof(cl_double) * nTotalBij, Bij, NULL);
memObjects[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_double) * nTotalBij, NULL, NULL);
errCode = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]);
errCode = clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]);
size_t globalWorkSize[1] = { 4 };
size_t localWorkSize[1] = { 1 };
// Queue the kernel up for execution across the array
errCode = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL,
globalWorkSize, localWorkSize,
0, NULL, &event)