I am having a silly problem I just can’t figure out. In my kernel I want to use a char array to hold values in the range 0-255 but am unable to do so, when I read back the char array on the host side it has no values in it. I have a simple test kernel that tries to fill the output array with the char values 1 through 10 as an example of my problem and have tested it on Apples OpenCL implementation as well as NVIDIA’s so I assume i’m doing something wrong. If I make the output array of type int it works correctly. Thanks in advance
I use the khronos provided C++ bindings:
main.cpp
#include <iostream>
#include <fstream>
#include "cl.hpp"
int main(void)
{
cl_int err = CL_SUCCESS;
cl::Context context(CL_DEVICE_TYPE_GPU, 0, NULL, NULL, &err);
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
char * outH = new char[10];
cl::Buffer outCL(context,CL_MEM_WRITE_ONLY| CL_MEM_USE_HOST_PTR,sizeof(cl_char)*10,outH,
&err);
std::ifstream file("kernel.cl");
std::string prog(std::istreambuf_iterator<char>(file),(std::istreambuf_iterator<char>()));
cl::Program::Sources source(1,std::make_pair(prog.c_str(), prog.length()+1));
cl::Program program(context, source);
program.build(devices,"");
cl::Kernel kernel(program, "testKernel", &err);
kernel.setArg(0, outCL);
cl::CommandQueue queue(context, devices[0], 0, &err);
cl::Event event;
queue.enqueueNDRangeKernel(kernel,
cl::NullRange,
cl::NDRange(10),
cl::NDRange(1, 1),
NULL,
&event);
event.wait();
queue.enqueueReadBuffer(outCL,CL_TRUE,0,sizeof(cl_char)*10,outH);
for (int i=0; i<10; i++) {
std::cout<<outH[i]<<std::endl;
}
return 0;
}
and kernel.cl
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
__constant char charArray[] = {1,2,3,4,5,6,7,8,9,10};
__kernel void testKernel(__global int * out)
{
size_t tid = get_global_id(0);
out[tid] = charArray[tid];
}