Get the following error when trying to run with size of 2048²: ERROR: clEnqueueReadBuffer(-5). It works fine with 1024². Is there some buffer limits I should be aware of? A am running on a crappy NVidia Quadro FX 570.
Relevant host code:
unsigned int matrixSize = 2048;
const unsigned int matrixSizeSq = matrixSize * matrixSize;
try {
// (...)
// Create kernel data
float* a = new float[matrixSizeSq];
float* b = new float[matrixSizeSq];
float* c = new float[matrixSizeSq];
for (unsigned int i = 0; i < matrixSizeSq; ++i) {
float val = static_cast<float>(i);
a[i] = val;
b[i] = val + 1.0f;
c[i] = 0.0f;
}
// Set kernel arguments
cl::Buffer aBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
matrixSizeSq * sizeof(float), a, &err);
cl::Buffer bBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
matrixSizeSq * sizeof(float), b, &err);
cl::Buffer cBuffer(context, CL_MEM_WRITE_ONLY,
matrixSizeSq * sizeof(float), 0, &err);
// Create kernel
cl::Kernel kernel(program, "matrixMult", &err);
kernel.setArg(0, aBuffer);
kernel.setArg(1, bBuffer);
kernel.setArg(2, cBuffer);
cl::CommandQueue queue(context, devices[0], 0, &err);
std::vector<cl::Event> events;
cl::Event matMultEvent;
queue.enqueueNDRangeKernel(kernel,
cl::NullRange,
cl::NDRange(matrixSize, matrixSize),
cl::NDRange(16, 16),
NULL,
&matMultEvent);
events.push_back(matMultEvent);
queue.enqueueReadBuffer(cBuffer, CL_FALSE, 0, matrixSizeSq*sizeof(float), c,
&events);
queue.finish();
// (...)
}
catch (cl::Error err) {
std::cerr
<< "ERROR: "
<< err.what()
<< "("
<< err.err()
<< ")"
<< std::endl;
}
Kernel code
__kernel void matrixMult(__global const float* a,
__global const float* b,
__global float* c)
{
// Vector element index
int globalIdx = get_global_id(0);
int globalIdy = get_global_id(1);
int n = get_global_size(0);
float sum = 0;
for (unsigned int k = 0; k < n ; ++k)
sum = sum + a[globalIdx + (k * n)] * b[k + (globalIdy * n)];
c[globalIdx + (globalIdy * n)] = sum;
}