OpenCL Memory Problem with ant simulation

Hi,

I’m programming an ant simulation in OpenCL and OpenGL (+SFML).

OpenCL gives me on my Nvidia PC the OpenCl error -6 (CL_OUT_OF_HOST_MEMORY) and on my Intel laptop -4 (CL_MEM_OBJECT_ALLOCATION_FAILURE ).

If i simulate my ants approximately 3 secounds, this error comes again and again.

Thanks for your Help.


  float *world, float *antX, float *antY,  int *targetX, int *targetY, int *speed, int *direction, float * resultX, float * resultY, float *newAntX, float *newAntY)
{
        cl_int errorcode;
        memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                sizeof(float) * ARRAY_SIZE, world, &errorcode);
        memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                sizeof(float) * ARRAY_SIZE, antX, NULL);
        memObjects[2] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                sizeof(float) * ARRAY_SIZE, antY, NULL);
        memObjects[3] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                sizeof(float) * ARRAY_SIZE, targetX, NULL);
        memObjects[4] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                sizeof(float) * ARRAY_SIZE, targetY, NULL);
        memObjects[5] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                sizeof(int) * ARRAY_SIZE, speed, NULL);
        memObjects[6] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                sizeof(float) * ARRAY_SIZE, direction, NULL);
        memObjects[7] = clCreateBuffer(context, CL_MEM_READ_WRITE,
                sizeof(float) * ARRAY_SIZE, NULL, NULL);
        memObjects[8] = clCreateBuffer(context, CL_MEM_READ_WRITE,
                sizeof(float) * ARRAY_SIZE, NULL, NULL);
        memObjects[9] = clCreateBuffer(context, CL_MEM_READ_WRITE,
                sizeof(float) * ARRAY_SIZE, NULL, NULL);
        memObjects[10] = clCreateBuffer(context, CL_MEM_READ_WRITE,
                sizeof(float) * ARRAY_SIZE, NULL, NULL);
 
        for (int i = 0 ; i < 11; i++){
                if(memObjects[i] == NULL){
               
                        std::cerr << i << "Error creating memory objects!!!!!" << errorcode << std::endl;
                        return false;
                }
        }
               
       
 
        return true;
}


int  openCLinit::startOpenCLmain(){
        if (!CreateMemObjectsAnts(context, memObjects, world, antX, antY, targetX, targetY, speed, direction, resultX, resultY, newAntX, newAntY))
        {
                //Cleanup(context, commandQueue, program, kernel, memObjects);
                return 1;
        }
         int errorNum = 0;
        // Set the kernel arguments (result, a, b)
       
        for (int i = 0; i < 11; i++){
                clSetKernelArg(kernel, i, sizeof(cl_mem), &memObjects[i]);
        }
 
 
 
       
        globalWorkSize[0] = ARRAY_SIZE ;
        localWorkSize[0] = 1;
 
        sf::Clock clocki;
        // Queue the kernel up for execution across the array
        clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL,
                globalWorkSize, localWorkSize,
                0, NULL, NULL);
       
 
        // Read the output buffer back to the Host
        clEnqueueReadBuffer(commandQueue, memObjects[9], CL_TRUE,
                0, ARRAY_SIZE * sizeof(float), newAntX,
                0, NULL, NULL);
        clEnqueueReadBuffer(commandQueue, memObjects[10], CL_TRUE,
                0, ARRAY_SIZE * sizeof(float), newAntY,
                0, NULL, NULL);
        //std::cout << clocki.getElapsedTime().asMilliseconds();
        //if (errNum != CL_SUCCESS)
        //{
                //print_cl_err(errNum);
                //Cleanup(context, commandQueue, program, kernel, memObjects);
                //return 1;
        //}
 
       
        clFinish(this->commandQueue);
        //std::cout << std::endl;
        //std::cout << "Executed program succesfully." << std::endl;
 

        return errorNum;
}
 

Your actual runtime is 3 seconds? Most platforms will terminate an OpenCL kernel if it runs for a while. Can you design your system to instead run as a series of faster kernels?