Results 1 to 2 of 2

Thread: CL_INVALID_COMMAND_QUEUE issue on NVIDIA

  1. #1
    Newbie
    Join Date
    Jul 2017
    Posts
    1

    Question CL_INVALID_COMMAND_QUEUE issue on NVIDIA

    Would be appreciate if somebody can help. I have a small kernel that always finished with CL_INVALID_COMMAND_QUEUE error. I've tried on different hardware gtx 765m or gtx 980, the result is the same.
    Here is the code host + kernel:

    //get all platforms (drivers)
    std::vector<cl::Platform> all_platforms;
    cl::Platform::get(&all_platforms);
    if(all_platforms.size()==0){
    std::cout<<" No platforms found. Check OpenCL installation!\n";
    exit(1);
    }
    cl::Platform default_platform=all_platforms[1];
    std::cout << "Using platform: "<<default_platform.getInfo<CL_PLATFORM_NAME>()<<" \n";

    //get default device of the default platform
    std::vector<cl:evice> all_devices;
    default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
    if(all_devices.size()==0){
    std::cout<<" No devices found. Check OpenCL installation!\n";
    exit(1);
    }
    cl:evice default_device=all_devices[0];
    std::cout<< "Using device: "<<default_device.getInfo<CL_DEVICE_NAME>()<<" \n";

    cl::Context context({default_device});

    cl::Program::Sources sources;

    std::string kernel_code=
    "__kernel void test(__global float* A,__global float* R) {"
    "int i = get_global_id(0);"
    "if(i>=1075021) return;"
    "if(i<60000) {"
    "R[i]=0;"
    "return;"
    "};"
    "float vm=0.f;"
    "for(int j=i-60000;j<=i;++j)"
    "vm+=A[j];"
    "R[i]=vm;"
    "};";

    sources.push_back({kernel_code.c_str(),kernel_code .length()});

    cl::Program program(context,sources);
    if(program.build({default_device})!=CL_SUCCESS){
    std::cout<<" Error building: "<<program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(defa ult_device)<<"\n";
    exit(1);
    }

    size_t n=1075021;
    // create buffers on the device
    cl::Buffer buffer_A(context,CL_MEM_READ_WRITE,sizeof(float)*n );
    cl::Buffer buffer_R(context,CL_MEM_READ_WRITE,sizeof(float)*n );

    float *A = new float[n];
    float *R = new float[n];

    srand (time(NULL));

    for(size_t i=0;i<n;++i)
    A[i]=rand()%10;

    cl::CommandQueue queue(context,default_device);

    cl_int ret;
    ret=queue.enqueueWriteBuffer(buffer_A,CL_TRUE,0,si zeof(float)*n,A);
    ret=queue.finish();

    cl::Kernel kernel_test=cl::Kernel(program,"test");
    kernel_test.setArg(0,buffer_A);
    kernel_test.setArg(1,buffer_R);

    size_t max_work_size=1024;
    size_t num_work_groups = (n-1) / max_work_size + 1;
    size_t global_size_padded = num_work_groups * max_work_size;

    queue.enqueueNDRangeKernel(kernel_test,cl::NullRan ge,cl::NDRange(global_size_padded),cl::NDRange(max _work_size));
    ret=queue.finish();

    ret=queue.enqueueReadBuffer(buffer_R,CL_TRUE,0,siz eof(float)*n,R);

  2. #2
    Newbie
    Join Date
    Oct 2015
    Posts
    2
    May be you want index 0 ?
    cl::Platform default_platform=all_platforms[0];

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  
Proudly hosted by Digital Ocean