clEnqueueCopyBuffer and mapped memory objects: bug or feature ?

This OpenCL example program “utl11” creates and maps two memory objects, initializes one and copies it to the other via the map pointers. It will only work if clEnqueueCopyBuffer() is called for the copy before clEnqueueMapBuffer() is called for the destination object. Why might this be the case ?

I am using the NVidia OpenCL libraries and driver NVIDIA-Linux-x86_64-340.32.


/*****************************************************************************
 * utl11 <MapCpyFlg>
 *    Copy initialized and mapped source memory object to mapped
 *    destination memory object.  Call clEnqueueCopyBuffer for destination
 *    object before or after clEnqueueMapBuffer for destination object
 *    depending on MapCpyFlg setting
 *
 * MapCpyFlg
 *    0 - call clEnqueueCopyBuffer before clEnqueueMapBuffer for
 *        destination object
 *    1 - dest clEnqueueMapBuffer before dest clEnqueueCopyBuffer
 *****************************************************************************/

#include <CL/cl.h>
#include <stdio.h>

#define ERMSG fprintf(stderr,"error: %s:%d
", __FILE__, __LINE__)
typedef int arel;
#define UTL11ARSIZ 10

/*****************************************************************************
 * create memory object, optionally call clEnqueueMapBuffer for it based
 *     on mapflg setting
 *
 * mapflg
 *    0 - do not call clEnqueueMapBuffer for object after clCreateBuffer
 *    1 - call clEnqueueMapBuffer for object after clCreateBuffer
 *****************************************************************************/
int CreatMemObjOptMap(cl_context context, int HostPtrSiz, cl_command_queue
    command_queue, cl_mem *BufObjSrc, void **SrcMapPtr, int mapflg)
{   int errflag=1;
    cl_int errcode_ret;

    if(((*BufObjSrc)=clCreateBuffer(context, CL_MEM_READ_WRITE,
        HostPtrSiz, NULL, &errcode_ret)))
    {   if(mapflg)
        {   if(((*SrcMapPtr)=clEnqueueMapBuffer(command_queue, *BufObjSrc,
                CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, HostPtrSiz, 0, NULL,
                NULL, &errcode_ret)))
            {   errflag=0;
                if(errflag)
                    clEnqueueUnmapMemObject(command_queue, *BufObjSrc,
                        *SrcMapPtr, 0, NULL, NULL);
            } else ERMSG;
        } else errflag=0;
        if(errflag) clReleaseMemObject(*BufObjSrc);
    } else ERMSG;
    return errflag;
}

/*****************************************************************************
 * clean up memory object, optionally call clEnqueueUnMapBuffer for it
 *    based on mapflg setting
 *
 * mapflg
 *    0 - do not call clEnqueueUnmapMemObject for object before
 *        clReleaseMemObject
 *    1 - call clEnqueueUnmapMemObject for object before clReleaseMemObject
 *****************************************************************************/
void FreeMemObJOptUnMap(cl_command_queue command_queue, cl_mem BufObjSrc,
    void *SrcMapPtr, int mapflg)
{   if(mapflg)
        clEnqueueUnmapMemObject(command_queue, BufObjSrc, SrcMapPtr, 0,
            NULL, NULL);
    clReleaseMemObject(BufObjSrc);
}

/*****************************************************************************
 * show contents of source and destination mapped memory objects for
 *    comparison
 *****************************************************************************/
void PrintMapSrcDstObj(void *SrcMapPtr, void *DstMapPtr)
{   int i;
    arel *mptr;

    for(i=0, mptr=(arel *) SrcMapPtr; i<UTL11ARSIZ; i++, mptr++)
        printf("%2d : %p %3d
", i, mptr, *mptr);
    printf("
");
    for(i=0, mptr=(arel *) DstMapPtr; i<UTL11ARSIZ; i++, mptr++)
        printf("%2d : %p %3d
", i, mptr, *mptr);
}

/*****************************************************************************
 * create source and destination objects.
 * initialize source object
 * copy source object to destination object
 * optionally map destination object before/after copy based on MapCpyFlg
 *    setting
 *
 * MapCpyFlg
 *    0 - call clEnqueueCopyBuffer before clEnqueueMapBuffer for
 *        destination object
 *    1 - dest clEnqueueMapBuffer before dest clEnqueueCopyBuffer
 *****************************************************************************/
int CreatObjCpyOptMapDst(cl_context context, cl_command_queue
    command_queue, int MapCpyFlg)
{   int errflag=1,i;
    void *SrcMapPtr, *DstMapPtr;
    cl_mem BufObjSrc, BufObjDst;
    arel utl11src[UTL11ARSIZ];
    cl_int errcode_ret;
    arel *mptr;

    /* create and map source object */
    if(!CreatMemObjOptMap(context, sizeof(utl11src), command_queue,
        &BufObjSrc, &SrcMapPtr, 1))
    {
        /* create and optionally map destination object before copy */
        if(!CreatMemObjOptMap(context, sizeof(utl11src), command_queue,
            &BufObjDst, &DstMapPtr, MapCpyFlg?1:0))
        {
            /* initialize mapped source object */
            for(i=0,mptr=(arel *)SrcMapPtr; i<UTL11ARSIZ; i++,mptr++)
                (*mptr)=(i*10);

            /* copy mapped source object to destination object */
            if(!clEnqueueCopyBuffer(command_queue, BufObjSrc, BufObjDst,
                0, 0, sizeof(utl11src), 0, NULL, NULL))
            {
                if(MapCpyFlg)
                {   PrintMapSrcDstObj(SrcMapPtr, DstMapPtr);
                    errflag=0;
                }
                else
                {
                    /* optionally map destination object after copy */
                    if((DstMapPtr = clEnqueueMapBuffer( command_queue,
                        BufObjDst, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0,
                        sizeof(utl11src), 0, NULL, NULL, &errcode_ret)))
                    {
                        /* print source and destination object contents
                           for comparison */
                        PrintMapSrcDstObj(SrcMapPtr, DstMapPtr);

                        errflag=0;

                        /* unmap destination object if mapped after copy */
                        clEnqueueUnmapMemObject(command_queue, BufObjDst,
                            DstMapPtr, 0, NULL, NULL);

                    } else ERMSG;
                }
            } else ERMSG;
            FreeMemObJOptUnMap(command_queue, BufObjDst, DstMapPtr, 0);
        }
        FreeMemObJOptUnMap(command_queue, BufObjSrc, SrcMapPtr, 1);
    }
    return errflag;
}

/*****************************************************************************
 * main
 *****************************************************************************/
int main(int argc, char **argv)
{   int errflag=1, MapCpyFlg;
    cl_context_properties properties[3];
    cl_uint rtnval, num_entries, num_devices;
    cl_platform_id *platforms;
    cl_device_id *devices;
    cl_int errcode_ret;
    cl_context context;
    cl_command_queue command_queue;

    /* initialization for CreatObjCpyOptMapDst */
    MapCpyFlg=atoi(argv[1]);
    if(!clGetPlatformIDs(0, NULL, &num_entries))
    {   if((platforms=malloc(sizeof(cl_platform_id)*num_entries)))
        {   if(!clGetPlatformIDs(num_entries, platforms, &rtnval))
            {   properties[0] = CL_CONTEXT_PLATFORM;
                properties[1] = (cl_context_properties)platforms[0];
                properties[2] = 0;
                if(!clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 0,
                    NULL, &num_devices))
                {   if((devices=malloc(sizeof(cl_device_id)*num_devices)))
                    {   if(!clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU,
                            num_devices, devices, &rtnval))
                        {   if((context=clCreateContext(properties,
                                num_devices, devices, NULL, NULL,
                                &errcode_ret)))
                            {   if((command_queue = clCreateCommandQueue(
                                    context, devices[0], 0, &errcode_ret)))
                                {
                                    /* create source and destination
                                       objects, copy, report contents
                                       for comparison */

                                    errflag=CreatObjCpyOptMapDst(context,
                                        command_queue, MapCpyFlg);

                                    clReleaseCommandQueue(command_queue);
                                } else ERMSG;
                                clReleaseContext(context);
                            } else ERMSG;
                        } else ERMSG;
                        free(devices);
                    } else ERMSG;
                } else ERMSG;
            } else ERMSG;
            free(platforms);
        } else ERMSG;
    } else ERMSG;
    return errflag;
};