Hello,
I am relatively new in OpenCL programming and frustrated by this problem I am having. :?
I know the code is not OpenCL efficient, but I thought it should at least work (i.e. right results).
I thought any element of a global array can be read and written by any work-item.
But the results are terribly wrong. Not in the sense that they are off by little, but total garbage is found (like 0, -1074790400, etc).
Below,
model = double[225*6];
modelBeginIndex = int[225];
xWindowSizeArray = int[225];
yWindowSizeArray = int[225];
I give global work size of 225 and local work size of null in EnqueueNDRange.
#pragma OPENCL EXTENSION cl_khr_fp64: require
__kernel void getModel(__global double* model,
__global int* modelBeginIndex,
__global int* xWindowSizeArray,
__global int* yWindowSizeArray,
int xWindowSize,
int yWindowSize)
{
int gid = get_global_id(0);
int xWindowSizeMax = xWindowSize;
int yWindowSizeMax = yWindowSize;
int xWindowSizeMin = 3;
int yWindowSizeMin = 3;
int numberOfXWindows = (xWindowSizeMax - xWindowSizeMin) / 2 + 1;
int numberOfYWindows = (yWindowSizeMax - yWindowSizeMin) / 2 + 1;
int numberOfElements = 6;
if (gid == 0)
{
int previousXYWindowSize = 0;
for (int i = xWindowSizeMax; i >= xWindowSizeMin; i = i - 2)
{
for (int j = yWindowSizeMax; j >= yWindowSizeMin; j = j - 2)
{
int index = (xWindowSizeMax - i) / 2 * numberOfYWindows + (yWindowSizeMax - j) / 2;
modelBeginIndex[index] = index == 0 ? 0 : modelBeginIndex[index - 1] + previousXYWindowSize * numberOfElements;
xWindowSizeArray[index] = i;
yWindowSizeArray[index] = j;
previousXYWindowSize = i * j;
}
}
}
int nX = xWindowSizeArray[gid];
int nY = yWindowSizeArray[gid];
int beginIndex[6];
for (int i = 0; i < numberOfElements; i++)
{
beginIndex[i] = i == 0 ? modelBeginIndex[gid] : beginIndex[i - 1] + nX * nY;
}
int indices[6];
int ii,jj;
for (int i = 0; i < nX; i++)
{
ii = i - nX / 2;
for (int j = 0; j < nY; j++)
{
for (int k = 0; k < numberOfElements; k++)
{
indices[k] = beginIndex[k] + (j + i * nY);
}
jj = j - nY / 2;
model[indices[0]] = 1.0;
model[indices[1]] = (double)ii;
model[indices[2]] = (double)jj;
model[indices[3]] = (double)(ii * ii);
model[indices[4]] = (double)(jj * jj);
model[indices[5]] = (double)(ii * jj);
}
}
}
I would really appreciate it if anybody can point me in the right direction. I hope I am not totally misunderstanding OpenCL memory concept.