I write an OpenCL program for running on CPUs, so basically the host and the device are essentially the same. When I run my program using 1 CPU thread, everything works fine, but when I use more than 1 thread to call to the device (1 thread creates context, another builds program), it dies at clBuildProgram.
I use AMD SDK v2.5 which has OpenCL 1.1. Khornos cliams that OpenCL 1.1 has Host-thread safety, enabling OpenCL commands to be enqueued from multiple host threads. What can I do to make this work? Please help.
I use CUDA OpenCL on another machine with a GPU device, and it works fine with multiple host threads.
The followings are my codes.
1 thread version (work):
cl_context cxGPUContext;
const char *ProgramSource =
"// :::: main
"
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable
"
"__kernel void kernel_main( __global float* _region__pb_rv, __global float* _region_x, int h, int w, int dim_d0_begin, int dim_d0_end, int dim_d1_begin, int dim_d1_end, int dim__pb_rv_d0, int dim_x_d0 ) {
"
"unsigned int _r3_x = get_global_id( 0 );
"
"unsigned int _r3_y = get_global_id( 1 );
"
"int i = _r3_x;
"
"int j = _r3_y;
"
"if( _r3_x >= dim_d0_begin && _r3_x < dim_d0_end && _r3_y >= dim_d1_begin && _r3_y < dim_d1_end ) {
"
"unsigned int idx__pb_rv = (_r3_x+(dim__pb_rv_d0*(_r3_y+0)));
"
"unsigned int idx_x = (_r3_x+(dim_x_d0*(_r3_y+0)));
"
"#define RETURN(x) _region__pb_rv[idx__pb_rv] = x; return
"
"RETURN ( _region_x [ idx_x ] );
"
"}
"
"}
"
"";
int main(int argc, const char** argv)
{
cl_platform_id cpPlatform = NULL;
cl_uint ciDeviceCount = 0;
cl_device_id device_id;
cl_int ciErrNum = CL_SUCCESS;
cl_uint numPlatforms;
ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
if(ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to num platforms!
");
return ciErrNum;
}
if (0 < numPlatforms)
{
cl_platform_id* platforms = new cl_platform_id[numPlatforms];
ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
if(ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to get platform id!
");
return ciErrNum;
}
for (unsigned i = 0; i < numPlatforms; ++i)
{
char pbuf[100];
ciErrNum = clGetPlatformInfo(platforms[i],
CL_PLATFORM_VENDOR,
sizeof(pbuf),
pbuf,
NULL);
if(ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to get platform info!
");
return ciErrNum;
}
cpPlatform = platforms[i];
if (!strcmp(pbuf, "Advanced Micro Devices, Inc."))
{
printf("platform %d
", i);
break;
}
}
delete[] platforms;
}
if(NULL == cpPlatform)
{
printf("NULL platform found so Exiting Application.
");
return 0;
}
ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 0, NULL, &ciDeviceCount);
if (ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to get num devices!
");
return ciErrNum;
}
ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
if (ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to get device id!
");
return ciErrNum;
}
cxGPUContext = clCreateContext(0, 1, &device_id, NULL, NULL, &ciErrNum);
if (ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to create OpenCL context!
");
return ciErrNum;
}
cl_program cpProgram = clCreateProgramWithSource(cxGPUContext,1, (const char **) &ProgramSource, NULL, &ciErrNum);
if (ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to create program
");
}
printf("clCreateProgramWithSource_main = %d success!
", cpProgram);
ciErrNum = clBuildProgram(cpProgram, 0, NULL, "-cl-fast-relaxed-math", NULL, NULL);
if (ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to build program
");
}
printf("clBuildProgram_main = %d success!
", cpProgram);
return 0;
}
2 threads version (fail):
cl_context cxGPUContext;
void mainLoop();
extern "C" void *startGpuManager(void* /*arg*/) {
//try {
mainLoop();
//}catch(petabricks::DynamicScheduler::CleanExitException e){}
return NULL;
}
const char *ProgramSource =
"// :::: main
"
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable
"
"__kernel void kernel_main( __global float* _region__pb_rv, __global float* _region_x, int h, int w, int dim_d0_begin, int dim_d0_end, int dim_d1_begin, int dim_d1_end, int dim__pb_rv_d0, int dim_x_d0 ) {
"
"unsigned int _r3_x = get_global_id( 0 );
"
"unsigned int _r3_y = get_global_id( 1 );
"
"int i = _r3_x;
"
"int j = _r3_y;
"
"if( _r3_x >= dim_d0_begin && _r3_x < dim_d0_end && _r3_y >= dim_d1_begin && _r3_y < dim_d1_end ) {
"
"unsigned int idx__pb_rv = (_r3_x+(dim__pb_rv_d0*(_r3_y+0)));
"
"unsigned int idx_x = (_r3_x+(dim_x_d0*(_r3_y+0)));
"
"#define RETURN(x) _region__pb_rv[idx__pb_rv] = x; return
"
"RETURN ( _region_x [ idx_x ] );
"
"}
"
"}
"
"";
int main(int argc, const char** argv)
{
cl_platform_id cpPlatform = NULL;
cl_uint ciDeviceCount = 0;
cl_device_id device_id;
cl_int ciErrNum = CL_SUCCESS;
cl_uint numPlatforms;
ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
if(ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to num platforms!
");
return ciErrNum;
}
printf("num platforms = %d
", numPlatforms);
if (0 < numPlatforms)
{
cl_platform_id* platforms = new cl_platform_id[numPlatforms];
ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
if(ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to get platform id!
");
return ciErrNum;
}
for (unsigned i = 0; i < numPlatforms; ++i)
{
char pbuf[100];
ciErrNum = clGetPlatformInfo(platforms[i],
CL_PLATFORM_VENDOR,
sizeof(pbuf),
pbuf,
NULL);
if(ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to get platform info!
");
return ciErrNum;
}
cpPlatform = platforms[i];
if (!strcmp(pbuf, "Advanced Micro Devices, Inc."))
{
printf("platform %d
", i);
break;
}
}
delete[] platforms;
}
if(NULL == cpPlatform)
{
printf("NULL platform found so Exiting Application.
");
return 0;
}
ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 0, NULL, &ciDeviceCount);
if (ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to get num devices!
");
return ciErrNum;
}
printf("num devices = %d
", ciDeviceCount);
ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
if (ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to get device id!
");
return ciErrNum;
}
cxGPUContext = clCreateContext(0, 1, &device_id, NULL, NULL, &ciErrNum);
if (ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to create OpenCL context!
");
return ciErrNum;
}
pthread_t _thread;
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, 0);
pthread_create(&_thread, &attr, startGpuManager, NULL);
pthread_attr_destroy(&attr);
return 0;
}
void mainLoop() {
cl_int ciErrNum = CL_SUCCESS;
// Program Setup
cl_program cpProgram = clCreateProgramWithSource(cxGPUContext,1, (const char **) &ProgramSource, NULL, &ciErrNum);
if (ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to create program
");
}
printf("clCreateProgramWithSource = %d success!
", cpProgram);
// build the program
ciErrNum = clBuildProgram(cpProgram, 0, NULL, "-cl-fast-relaxed-math", NULL, NULL);
if (ciErrNum != CL_SUCCESS)
{
printf("Error: Failed to build program
");
}
printf("clBuildProgram = %d success!
", cpProgram);
}