Hi there,

I have a rather simple case of one program with one kernel that is being called by many threads. At each call site, the thread acquires a command queue and kernel object that are not used by any other threads. It then allocates a read only and write only buffers. This is shown in the code below. After a while, the application will lock up and all threads and stopped at wait on single object (Windows 7). It looks like most to all threads are inside of clEnqueueWriteBuffer or clReleaseMemObject. The latter surprises me as I would expect clReleaseMemObject to really not do much. Is there anything wrong the pattern that I am using? I am using AMD OpenCL.


Code :
		program = getProgram( programId ); // return already create program
		kernel = aquireKernel( program, kernelID ); // only one thread can use this kernel object at any one time
		entryCount = width * height;
		inputSize = entryCount * channelCount * sizeof( inType );
		outputSize = entryCount * channelCount * sizeof( outType );
		commandQueue = aquireCommandQueue(); // only one thread can use this command queue at any one time
		err = CL_SUCCESS;
		inMem = clCreateBuffer( openclContext, CL_MEM_READ_ONLY, inputSize, NULL, &err );
		err |= clEnqueueWriteBuffer( commandQueue, inMem, CL_TRUE, 0, inputSize, in, 0, NULL, NULL ); // often locks up
		assert( err == CL_SUCCESS );
		outMem = clCreateBuffer( openclContext, CL_MEM_WRITE_ONLY, outputSize, NULL, &err );
		err |= clSetKernelArg( kernel,  0, sizeof(cl_mem), &inMem );
		err |= clSetKernelArg( kernel,  1, sizeof(cl_mem), &outMem );
		err |= clEnqueueNDRangeKernel( commandQueue, kernel, 1, NULL, &entryCount, NULL, 0, NULL, NULL );
		// actual enqueue call == render
		err |= clFinish( commandQueue );
		// read buffer back
		err |= clEnqueueReadBuffer( commandQueue, outMem, CL_TRUE, 0, outputSize, out, 0, NULL, NULL );
		assert( err == CL_SUCCESS );
		err |= clReleaseMemObject( outMem ); // often locks up
		err |= clReleaseMemObject( inMem );
		releaseCommandQueue( commandQueue );
		releaseKernel( kernelID, kernel );