Ok - this is how i upload the data. I’ve tried to keep things to a minimum, but you can see i’m checking every line when i can…
Poly1 and poly2 are polygons made up of an array of doubles (an array of double2).
They are properly aligned etc.
size_t vertexPt_sz = sizeof(double) * 2;
size_t int_sz = sizeof(int);
cl::Buffer buf_poly1(*context, CL_MEM_READ_ONLY, vertexPt_sz * poly1.numPoints());
cl::Buffer buf_poly2(*context, CL_MEM_READ_ONLY, vertexPt_sz * poly2.numPoints());
cl::Buffer buf_result(*context, CL_MEM_READ_WRITE, int_sz * numOffsets);
cl::Buffer buf_offsets(*context, CL_MEM_READ_ONLY, vertexPt_sz * numOffsets);
cl::Buffer buf_intersects(*context, CL_MEM_READ_WRITE, int_sz * numOffsets);
cl::Buffer buf_insde(*context, CL_MEM_READ_WRITE, int_sz * numOffsets);
cl::Buffer buf_outside(*context, CL_MEM_READ_WRITE, int_sz * numOffsets);
cl::Buffer buf_dummy(*context, CL_MEM_READ_WRITE, sizeof(REAL));
cl_int status;
status = queue->enqueueWriteBuffer(*buf_poly1, CL_TRUE, 0, vertexPt_sz * poly1.numPoints(), (void*) poly1.pts.Buffer);
if (status != CL_SUCCESS)
Message(MU_TEXT("%s"), OpenCL().GetErrorText(status).str());
status = queue->enqueueWriteBuffer(*buf_poly2, CL_TRUE, 0, vertexPt_sz * poly2.numPoints(), (void*) poly2);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = queue->enqueueWriteBuffer(buf_offsets, CL_TRUE, 0, vertexPt_sz * numOffsets, (void*) offsets);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = queue->enqueueWriteBuffer(buf_intersects, CL_TRUE, 0, int_sz * numOffsets, (void*) intersects);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = queue->enqueueWriteBuffer(buf_insde, CL_TRUE, 0, int_sz * numOffsets, (void*) hasInside);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = queue->enqueueWriteBuffer(buf_outside, CL_TRUE, 0, int_sz * numOffsets, (void*) hasOutside);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = queue->enqueueWriteBuffer(buf_dummy, CL_TRUE, 0, sizeof(REAL), (void*) dummy);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
/// now create the kernel
cl::Kernel intersectKernel(*OpenCL().GetProgram(),"intersection");
// set the arguments
status = intersectKernel.setArg(0, buf_poly1);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = intersectKernel.setArg(1, poly1.numPoints());
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = intersectKernel.setArg(2, buf_poly2);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = intersectKernel.setArg(3, poly2.numPoints());
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = intersectKernel.setArg(4, buf_offsets);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = intersectKernel.setArg(5, numOffsets);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = intersectKernel.setArg(6, 0.05);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = intersectKernel.setArg(7, 0.001);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = intersectKernel.setArg(8, doingInner);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = intersectKernel.setArg(9, poly1.GetArea());
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = intersectKernel.setArg(10, poly2.GetArea());
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = intersectKernel.setArg(11, buf_intersects);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = intersectKernel.setArg(12, buf_insde);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = intersectKernel.setArg(13, buf_outside);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
status = intersectKernel.setArg(14, buf_dummy);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
// create the ndrange
cl::NDRange globalws(poly1.numPoints(), poly2.numPoints(), numOffsets);
cl::NDRange localws(1,1,1);
// run the kernel
status = queue->enqueueNDRangeKernel(intersectKernel, cl::NullRange, globalws, localws);
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
// make sure the work has finished before progressing
status = queue->finish();
if (status != CL_SUCCESS)
Message(TEXT("%s"), OpenCL().GetErrorText(status));
So when i run this code, and then after read the results back in, i don’t have any problems, unless the number of points is increased dramatically.
I’ve been used to polygons of points less than 50, generally (though some are larger), with a small number of offsets.
But when i’ve started to increase this, with polygons having greater than 1000 points, and offsets in the 100s, it fails on the last call to finish, before i read anything back.
Please forgive any typos in the above code, i’ve tried to copy and paste, and edit it so that it’s more readable.
I have previously created the device, queue and program.