OpenCL C++ bindings: enqueueNDRangeKernel ignores offset and global argument

Hello I have a very simple example program and I cannot find the mistake.

The global and offset arguments when I call enqueueNDRangeKernel are completely ignored. The get_global_id(0) always gives 0 to the local argument-1 (in this case 49). As I understand the Api, I thought I should get 30 to 79? Where is the problem?



#include <iostream>
#include <string>

const std::string kernelSource =
   "__kernel void square ( __global float* A, const __global float* B, int N)
"
   "{
"
   "   int i = get_global_id(0);
"
   "   if (i<100) A[i] = i;
"
   "}
";

int main(int argc, char *argv[])
{
   try {
      std::vector<cl::Platform> platforms;
      cl_int N = 100;
      std::vector<cl::Device> devices;
      cl::Platform::get(&platforms);   
      platforms.begin()->getDevices(CL_DEVICE_TYPE_ALL, & devices);
      cl::Context context(devices);
      cl::Device& device = devices.front();
      cl::CommandQueue cmdqueue(context, device);
      cl::Buffer inputBuffer(context, CL_MEM_READ_WRITE, 200 *  sizeof(cl_float));
      cl::Buffer outputBuffer(context, CL_MEM_READ_WRITE, 200 *  sizeof(cl_float));
      cl_float inputs[200],outputs[200];
      for (int i=0;i<100;i++){
         inputs[i] = i;
         outputs[i] = 0;
      }
      cmdqueue.enqueueWriteBuffer(inputBuffer, true, 0, 200*sizeof(cl_float),inputs);
      cmdqueue.finish();
      cl::Program::Sources sourceCode;
      sourceCode.push_back(std::make_pair(kernelSource.c_str(),kernelSource.size()));
      cl::Program program = cl::Program(context, sourceCode);
      program.build(devices);
      cl::Kernel kernel(program, "square");
      kernel.setArg(0, outputBuffer);
      kernel.setArg(1, inputBuffer);
      kernel.setArg(2,N);
      cl::Event event;
      cmdqueue.enqueueNDRangeKernel(kernel,cl::NDRange(30),cl::NDRange(50), cl::NDRange(25),NULL,&event);
      event.wait();
      cmdqueue.enqueueReadBuffer(outputBuffer, true, 0, 200*sizeof(cl_float),outputs);
      cmdqueue.finish();
      for (int i=0;i<100;i++){
         std::cout << outputs[i] << std::endl;
      }
   }
   catch (cl::Error& err) {
      std::cerr << "OpenCL error: " << err.what() << "(" << err.err() << ")" << std::endl;
      return 1;
   }
}

I’ve just run your code and it produces the desired results (30-79). Which OpenCL platform/device are you running this on?