Hi dear OpenCL Community,
for my OpenCL understanding i want to port some simple code from Java to OpenCL.
The OpenCL Code works for small sizes but when i increase the vector size then my
Kernel crash.
Here is the Java Part:
FloatBuffer a = FloatBuffer.wrap(new float[]{1, 1, 1, 1});
FloatBuffer b = FloatBuffer.wrap(new float[]{2.2f, 2, 3, 4, 5});
for (int i = 0; i < a.capacity(); i++) {
float v= 0;
for (int j = 0; j < b.capacity(); j++) {
v+= a.get(i) + b.get(j);
}
System.out.println(v);
}
And the OpenCL equivalent, i call the kernel with globalsize[a.capacity(), b.capacity()]
and the output has the same size like a:
__kernel void kernel(__global const float* a, __global const float* b, __global float* output, __local float* block)
{
int gri = get_group_id(0);
int grj = get_group_id(1);
int ti = get_local_id(0);
int tj = get_local_id(1);
int gi = get_global_size(0);
int gj = get_global_size(1);
int ni = get_local_size(0);
int nj = get_local_size(1);
int gti = gri * ni + ti;
int nb = gj / nj;
float va = a[gti];
float sum = 0;
for (int jb = 0; jb < nb; jb++){
block[tj] = b[jb * nj + tj];
barrier(CLK_LOCAL_MEM_FENCE);
for(int k = 0;k < nj; k++){
sum += va + block[k];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
output[gti] = sum;
};
Thanks in advance