Hello All,
I am pretty new in GPU computing using OpenCL. Recently my program got an error code -36 (CL_INVALID_COMMAND_QUEUE) when trying to call clFinish.
The C++ code is really long, so I just paste some major functions below.
Any comment will be appreciated!
// Step 01: Get platform/device information
OpenCLobj.err = clGetPlatformIDs( 1, &OpenCLobj.platform_id, &OpenCLobj.ret_num_platforms ); err_check( OpenCLobj.err, "clGetPlatformIDs" );
// Step 02: Get information about the device
OpenCLobj.err = clGetDeviceIDs( OpenCLobj.platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &OpenCLobj.device_id, &OpenCLobj.ret_num_devices ); err_check( OpenCLobj.err, "clGetDeviceIDs" );
// Step 03: Create OpenCL Context
OpenCLobj.context = clCreateContext( NULL, 1, &OpenCLobj.device_id, NULL, NULL, &OpenCLobj.err ); err_check( OpenCLobj.err, "clCreateContext" );
// Step 04: Create Command Queue
OpenCLobj.command_queue = clCreateCommandQueue( OpenCLobj.context, OpenCLobj.device_id, 0, &OpenCLobj.err ); err_check( OpenCLobj.err, "clCreateCommandQueue" );
// Step 06: Read kernel file
ifstream file("kernel.cl");
string prog( istreambuf_iterator<char>( file ), ( istreambuf_iterator<char>() ) );
const char *source_str = prog.c_str();
// Step 07: Create Kernel program from the read in source
OpenCLobj.program = clCreateProgramWithSource( OpenCLobj.context, 1, (const char **) &source_str, 0, &OpenCLobj.err ); err_check( OpenCLobj.err, "clCreateProgramWithSource" );
// Step 08: Build Kernel Program
OpenCLobj.err = clBuildProgram( OpenCLobj.program, 1, &OpenCLobj.device_id, NULL, NULL, NULL );
if(OpenCLobj.err == CL_BUILD_PROGRAM_FAILURE){
cout << "CL_BUILD_PROGRAM_FAILURE" ;
size_t build_log_size=sizeof(char)*900;
char * build_log = new char[900];
size_t build_log_ret;
clGetProgramBuildInfo(OpenCLobj.program,OpenCLobj.device_id,CL_PROGRAM_BUILD_LOG,build_log_size,build_log,&build_log_ret);
for(int i=0;i<(build_log_ret)/sizeof(char);i++){
cout << build_log[i];
}
}
err_check( OpenCLobj.err, "clBuildProgram" );
// Step 09: Create OpenCL Kernel
cl_kernel kernel = NULL;
kernel = clCreateKernel( OpenCLobj.program, "padding_center", &OpenCLobj.err ); err_check3( OpenCLobj.err, "clCreateKernel" );
// Step 05: Create memory objects and tranfer the data to memory buffer
cl_mem mobj_diffmatrix = NULL;
mobj_diffmatrix = clCreateBuffer(OpenCLobj.context, CL_MEM_READ_WRITE, sizeof(float)*AD.Xdim*AD.Ydim, NULL, &OpenCLobj.err); err_check3( OpenCLobj.err, "clCreateBuffer" );
OpenCLobj.err = clEnqueueWriteBuffer( OpenCLobj.command_queue, mobj_diffmatrix, CL_TRUE, 0, sizeof(float)*AD.Xdim*AD.Ydim, diffmatrix, 0, NULL, NULL ); err_check3( OpenCLobj.err, "clEnqueueWriteBuffer" );
// Step 10: Set OpenCL kernel argument
OpenCLobj.err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void *) &mobj_diffmatrix ); err_check3( OpenCLobj.err, "clSetKernelArg" );
OpenCLobj.err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *) &mobj_tmpdiff ); err_check3( OpenCLobj.err, "clSetKernelArg" );
// Step 11: Execute OpenCL kernel in data parallel
size_t work = 1000;
size_t localwork = 1000;
cl_event clEvent;
for (int i = 0; i < AD.nIter; i++)
{
clEnqueueNDRangeKernel( OpenCLobj.command_queue, kernel, 1, NULL, &work, &localwork, 0, NULL, &clEvent ); err_check3( OpenCLobj.err, "clEnqueueNDRangeKernel" );
}
OpenCLobj.err = clFlush( OpenCLobj.command_queue ); err_check3( OpenCLobj.err, "clFlush" );
OpenCLobj.err = clFinish( OpenCLobj.command_queue ); err_check3( OpenCLobj.err, "clFinish" );