Hey, this has me stumped.
I create a set of cl_mem objects via calls such as…
void CClass::ConstructOpenCLBuffers()
{
cl_int ciErrNum;
// One per z-plane
if( 0 != m_CLBuffer_NumPointsArr )
clReleaseMemObject( m_CLBuffer_NumPointsArr );
m_CLBuffer_NumPointsArr = clCreateBuffer( m_CLGPUContext, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, m_MaxDepthCount*sizeof(int), NULL, &ciErrNum );
if( 0 != ciErrNum )
{
HandleCreateBufferErrors( ciErrNum );
CleanupOpenCL();
ASSERT( 0 );
}
...
}
I attempt to refresh the contents of the input memory buffers via something like
void CClass::RefreshData()
{
// Copy the numPoints data into the pinned memory buffer for it
int* NumPointsPinnedBufferData = (int*)clEnqueueMapBuffer( m_CLCommandQueue,
m_CLBuffer_NumPointsArr,
CL_TRUE,
CL_MAP_WRITE,
0,
m_MaxDepthCountsizeof(int),
0, NULL,
NULL,
&ciErrNum );
HandleEnqueueMapBufferErrors( ciErrNum );
…
memcpy( NumPointsPinnedBufferData, m_NumPointsArr, m_MaxDepthCountsizeof(int) );
ciErrNum = clEnqueueWriteBuffer( m_CLCommandQueue, m_CLBuffer_NumPointsArr, CL_TRUE, 0, m_MaxDepthCountsizeof(int), NumPointsPinnedBufferData, 0, NULL, NULL );
HandleEnqueueWriteBufferErrors( ciErrNum );
…
ciErrNum = clEnqueueUnmapMemObject( m_CLCommandQueue,
m_CLBuffer_NumPointsArr,
(void)NumPointsPinnedBufferData,
0,
NULL,
NULL );
HandleEnqueueUnmapBufferErrors( ciErrNum );
…
}
Then I call a series of kernel calls to produce an output into one of these cl_mem objects (defined as CL_MEM_READWRITE) via something very much like…
void CClass::GetResult()
{
DWORD* GPUBuffer = (DWORD*)clEnqueueMapBuffer( m_CLCommandQueue,
m_CLBuffer_resultBuffer,
CL_TRUE,
CL_MAP_READ,
0,
numElements*sizeof(DWORD),
0, NULL,
NULL,
&ciErrNum );
HandleEnqueueMapBufferErrors( ciErrNum );
memcpy( pCPUSideDataBuffer, GPUBuffer, numElements*sizeof(DWORD) );
ciErrNum = clEnqueueUnmapMemObject( m_CLCommandQueue,
m_CLBuffer_resultBuffer,
(void*)GPUBuffer,
0,
NULL,
NULL );
HandleEnqueueUnmapBufferErrors( ciErrNum );
}
Unfortunately this only works the first iteration. Any attempt to get a reasonable result after the first invocation of the kernels fails UNLESS I call ConstructOpenCLBuffers again. This ultimately cleans up every cl_mem object and creates new cl_mem objects.
Since this is costly, I would very much like to avoid doing this. Does anyone have any suggestions what might be going wrong? (And yes I am handling every single error I have access to, and no it never returns an error code)