Data not copied from host to device

I have a byte array with values in RGBA format and tried to copy the array as an image to the device memory. But the content of the image on the device doesn’t contain anything.

I tried to copy the content of one image to another one as a first example.

I also tried to set the pixel of the second image with a color directly and copied the content back from device to the host. That worked. Just the other way around is not working. I of course checked that the byte array was not empty.

Can someone help me please?

Here is my code:


const char *KernelSource =                                            "
" \
"__kernel void luminance(__read_only  image2d_t input,  
" \
"                        __write_only image2d_t output,              
" \
"                        sampler_t sampler)                                 
" \
"{                                                                                      
" \
"                                                                                      
" \
"    int2 pos = (int2)(get_global_id(0), get_global_id(1));   
" \
"    float4 color = read_imagef(input, sampler, pos);        
" \
"    //float4 color = (float4)(0,0,0,0.5);                             
" \
"    write_imagef(output, pos, color);                                
" \
"}                                                                                     
" \
"
";

inline void ImageBuffer::platformConvertToLuminanceMask()
{
    IntRect luminanceRect(IntPoint(), size());
    RefPtr<ByteArray> srcPixelArray = getUnmultipliedImageData(luminanceRect);
    unsigned char* data = srcPixelArray->data();

    cl_device_id clDeviceId;
    cl_int error = clGetDeviceIDs(0, CL_DEVICE_TYPE_CPU, 1, &clDeviceId, 0);
    errorCode(error);
    if (error != CL_SUCCESS)
        return;

    cl_context clContext = clCreateContext(0, 1, &clDeviceId, 0, 0, &error);
    errorCode(error);
    if (!clContext)
        return;

    cl_command_queue clQueue = clCreateCommandQueue(clContext, clDeviceId, 0, &error);
    if (!clQueue)
        return;
    errorCode(error);

    cl_program clProgram = clCreateProgramWithSource(clContext, 1, (const char **) & KernelSource, 0, &error);
    if (!clProgram)
        return;

    error = clBuildProgram(clProgram, 0, 0, 0, 0, 0);
    errorCode(error);
    if (error != CL_SUCCESS)
        return;

    cl_kernel clKernel = clCreateKernel(clProgram, "luminance", &error);
    errorCode(error);

    if (!clKernel || error != CL_SUCCESS)
        return;
    
    cl_image_format imageFormat;
    imageFormat.image_channel_data_type = CL_UNORM_INT8;
    imageFormat.image_channel_order = CL_RGBA;

    cl_mem clInputImage = clCreateImage2D(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &imageFormat, luminanceRect.width(), luminanceRect.height(), 0, data, &error);
    errorCode(error);

    RefPtr<ByteArray> result = ByteArray::create(srcPixelArray->length());
    unsigned char* dest = result->data();
    cl_mem clOutputImage = clCreateImage2D(clContext, CL_MEM_WRITE_ONLY, &imageFormat, luminanceRect.width(), luminanceRect.height(), 0, 0, &error);
    errorCode(error);

    if (!clInputImage || !clOutputImage)
        return;

    size_t region[3] = {luminanceRect.width(), luminanceRect.height(), 1};
    size_t origin[3] = {0, 0, 0};
    // Didn't work either:
    //cl_mem clInputImage = clCreateImage2D(clContext, CL_MEM_READ_ONLY, &imageFormat, luminanceRect.width(), luminanceRect.height(), 0, 0, &error);
    //error = clEnqueueWriteImage(clQueue, clInputImage, CL_TRUE, origin, region, 0, 0, data, 0, 0, 0);
    //errorCode(error);

    if (error != CL_SUCCESS)
        return;

    cl_sampler sampler = clCreateSampler(clContext, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &error);
    errorCode(error);

    error = 0;
    error  = clSetKernelArg(clKernel, 0, sizeof(cl_mem), &clInputImage);
    error |= clSetKernelArg(clKernel, 1, sizeof(cl_mem), &clOutputImage);
    error |= clSetKernelArg(clKernel, 2, sizeof(cl_sampler), &sampler);
    errorCode(error);

    if (error != CL_SUCCESS)
        return;

    // Get the maximum work group size for executing the kernel on the device
    size_t clLocalWorkSize[1] = {1};
    //error = clGetKernelWorkGroupInfo(clKernel, clDeviceId, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &clLocalWorkSize, 0);
    //errorCode(error);

    if (error != CL_SUCCESS)
        return;

    size_t clGlobalWorkSize[1] = {luminanceRect.width() * luminanceRect.height()};
    error = clEnqueueNDRangeKernel(clQueue, clKernel, 1, 0, clGlobalWorkSize, clLocalWorkSize, 0, 0, 0);
    errorCode(error);

    if (error)
        return;

    clFinish(clQueue);

    error = clEnqueueReadImage(clQueue, clOutputImage, CL_TRUE, origin, region, 0, 0, dest, 0, 0, 0);
    errorCode(error);
    
    
    RefPtr<ByteArray> temp = ByteArray::create(srcPixelArray->length());
    unsigned char* tmp = temp->data();
    
    error = clEnqueueReadImage(clQueue, clInputImage, CL_TRUE, origin, region, 0, 0, tmp, 0, NULL, NULL);
    errorCode(error);

    if (error != CL_SUCCESS)
        return;

    putUnmultipliedImageData(result.get(), luminanceRect.size(), luminanceRect, IntPoint());

    // Shutdown and cleanup
    clReleaseMemObject(clInputImage);
    clReleaseMemObject(clOutputImage);
    clReleaseProgram(clProgram);
    clReleaseKernel(clKernel);
    clReleaseCommandQueue(clQueue);
    clReleaseContext(clContext);
}

I did not get any error and the function completed.

clEnqueueNDRangeKernel(clQueue, clKernel, 1, 0, clGlobalWorkSize, clLocalWorkSize, 0, 0, 0);

You are enqueuing a 1-dimensional NDRange. However, your kernel source code is expecting a 2-dimensional NDRange.

Thanks

Thanks, I fixed the NDRange as well as the working group size to:


    size_t clLocalWorkSize[2] = {1, 1};
    size_t clGlobalWorkSize[2] = {luminanceRect.width(), luminanceRect.height()};
    error = clEnqueueNDRangeKernel(clQueue, clKernel, 2, 0, clGlobalWorkSize, clLocalWorkSize, 0, 0, 0);

But it did not fixed the missing data problem.

I tried to use the buffer (clCreateBuffer) and that worked without any problems.

I think the sampler is correct. The same for the image format. But I also tried uint for read/write colors in combination with CL_UNSIGNED_INT8 as image format. It seems that I still get (0,0,0,0) as color.

I copied the pixel data to an buffer instead of an image2d men and that seem to work:


__kernel void luminance(__read_only  image2d_t input,
                        __write_only image2d_t output,
                        __global char* buffer,
                        sampler_t sampler)
{
    int2 pos = (int2)(get_global_id(0), get_global_id(1));
    uint4 color = read_imageui(input, sampler, pos);

    // Values not copied to output. New created values like (uint4)(128, 128, 128, 255) works.
    write_imageui(output, pos, color);

    // This works:
    buffer[pos.x * 4] = color.x;
    buffer[pos.x * 4 + 1] = color.y;
    buffer[pos.x * 4 + 2] = color.z;
    buffer[pos.x * 4 + 3] = color.w;
}

I checked the first values of both, the image2d and the buffer. The buffer contained the information that I expected (the value of the input every 4th char entry).

Why is it not possible to copy pixel data between two image2d objects?

It is possible, of course. Why else would write_imageX() exist?

If you’re still using CL_UNORM_INT8 as the data format as from your host-code above, then you need to use read_imagef and write_imagef.