Problems with deinitialization of intel openCL

Hi there,

can anyone tell me, why intels openCL implementation can’t be reinitializied? A Sample for clarification first:


for (int run = 0; run < 10; run++)
{
	cl_int iError;
	cl_uint uiPlatformCount;
	clGetPlatformIDs(0, nullptr, &uiPlatformCount);
	cl_platform_id* pPlatformIDs = new cl_platform_id[uiPlatformCount];
	clGetPlatformIDs(uiPlatformCount, pPlatformIDs, nullptr);
	for (int i = 0; i < (int)uiPlatformCount; i++)
	{
		//für alle Plattformen schauen, was wir an Infos bekommen
		cl_platform_id pID = pPlatformIDs[i];
		char cPlatformInfo[1024];
		iError = clGetPlatformInfo(pID, CL_PLATFORM_NAME, sizeof(cPlatformInfo), cPlatformInfo, nullptr);
		std::wcout << "  Plattform Name:	" << cPlatformInfo <<  std::endl;
		cl_uint uiDeviceCount;
		iError = clGetDeviceIDs(pID, CL_DEVICE_TYPE_ALL, 0, nullptr, &uiDeviceCount);
		cl_device_id* pDeviceIDs = new cl_device_id[uiDeviceCount];
		iError = clGetDeviceIDs(pID, CL_DEVICE_TYPE_ALL, uiDeviceCount, pDeviceIDs, &uiDeviceCount);
		cl_context ctx;
		cl_context_properties pContextProperties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pID, 0};
		ctx = clCreateContext(pContextProperties, uiDeviceCount, pDeviceIDs,
			nullptr, nullptr, &iError);
		for (int j = 0; j < (int)uiDeviceCount; j++)
		{
			cl_device_id dID = pDeviceIDs[j];
			cl_command_queue queue = clCreateCommandQueue(ctx, dID, CL_QUEUE_PROFILING_ENABLE, &iError);
			clReleaseCommandQueue(queue);
		}
		clReleaseContext(ctx);
	}
}

In the loop, all OpenCL Devices will be initialized multiple times and will be destructed in the end. So it will take some time to run that loop, because intialization on intel will take some time but it should work in my oppinion. Or do i miss anything here?

After the first loop, the intel platform can’t be initialized.

Can anyone proof that?

Regards,
clint3112

Funny that it works with nvidia without problems. And it works with intel when you use it like the following


for (int run = 0; run < 10; run++)
{
cl_int iError;
cl_uint uiPlatformCount;
clGetPlatformIDs(0, nullptr, &uiPlatformCount);
cl_platform_id* pPlatformIDs = new cl_platform_id[uiPlatformCount];
clGetPlatformIDs(uiPlatformCount, pPlatformIDs, nullptr);
for (int i = 0; i < (int)uiPlatformCount; i++)
{
	//für alle Plattformen schauen, was wir an Infos bekommen
	cl_platform_id pID = pPlatformIDs[i];
	char cPlatformInfo[1024];
	iError = clGetPlatformInfo(pID, CL_PLATFORM_NAME, sizeof(cPlatformInfo), cPlatformInfo, nullptr);
	std::wcout << "  Plattform Name:	" << cPlatformInfo <<  std::endl;
	char *sIntel = "Intel";
	if(strstr(cPlatformInfo, sIntel))
	{
		//continue;
	}

	cl_uint uiDeviceCount;
	iError = clGetDeviceIDs(pID, CL_DEVICE_TYPE_ALL, 0, nullptr, &uiDeviceCount);
	cl_device_id* pDeviceIDs = new cl_device_id[uiDeviceCount];
	iError = clGetDeviceIDs(pID, CL_DEVICE_TYPE_ALL, uiDeviceCount, pDeviceIDs, &uiDeviceCount);
	cl_context ctx;
	cl_context_properties pContextProperties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pID, 0};
	ctx = clCreateContext(pContextProperties, uiDeviceCount, pDeviceIDs,
		nullptr, nullptr, &iError);

	CString sKernel = _T("__kernel void crunch(float f) {
 float x,y,cs,sn,xx,yy;
 x = 1.0f;
 y = 0.0f;
 cs = cos(2.0f);
 sn = sin(2.0f);
 for (int i=0;i<40000;i++){
 xx = cs * x - sn * y;
 yy = cs * y + sn * x;
 x = xx;
 y = yy;
 }
 
 }
 ");
	size_t szSourceSize = (size_t) sKernel.GetLength();
	CStringA saCode = sKernel;
	const char* cpKernel = saCode;
	cl_program pclProgram = clCreateProgramWithSource(ctx, 1, &cpKernel, &szSourceSize, &iError);
	char *pOptions = "-Werror";
	iError = clBuildProgram(pclProgram, uiDeviceCount, pDeviceIDs, /*nullptr*/pOptions, nullptr, nullptr);
	const char* kernelName = "crunch";
	cl_kernel pclKernel = clCreateKernel(pclProgram, kernelName, &iError);

	for (int j = 0; j < (int)uiDeviceCount; j++)
	{
		cl_device_id dID = pDeviceIDs[j];
		cl_command_queue queue = clCreateCommandQueue(ctx, dID, CL_QUEUE_PROFILING_ENABLE, &iError);
		cl_kernel pclKernel = clCreateKernel(pclProgram, kernelName, &iError);
		float f = 1.f;
		iError = clSetKernelArg(pclKernel, 0, sizeof(cl_mem), &f);
		size_t lws[1] = {256};
		size_t gws[1] = {256};
		iError = clEnqueueNDRangeKernel(queue, pclKernel, 1, nullptr,
			gws, lws, 0,nullptr, nullptr);
		clReleaseKernel(pclKernel);
		clReleaseProgram(pclProgram);
		iError = clReleaseCommandQueue(queue);
	}
	iError = clReleaseContext(ctx);
}
}

The code looks fine to me, and I had no problems running it on my Intel SDK for OpenCL Applications installation. I’m using 64-bit Windows with the 2012 version of the SDK. What are you using?

Also, you might consider posting Intel-specific issues here:

http://software.intel.com/en-us/forums/intel-opencl-sdk/

I reinstalled the sdk and the problem was gone. I donn’t know if it could be a problem with nvidia + intel or anything else.