In the kernel function if i am doing min reduction which involves a while loop; i am getting the correct answer for smaller data sets but not getting the correct results for large datasets.
I think i have a synchronization problem between the threads, can anyone help me or tell me where do i need to put the barrier function in the kernel function and the reason for it.
__kernel void add_update_min(__global int *selected, __global float *d, __global float *A, __global float *dcopy, __global int *stVertex,__global int *stVertexcopy,__global int *destVertex, int n)
{
// Getting the global id of the thread
int tid = get_global_id(0);
int p;
p = destVertex[0];
// Updating distances of all the vertices after a new vertex is added
if(selected[tid] == 0)
{
if(d[tid] > A[tid*n + p])
{
d[tid] = A[tid*n + p];
stVertex[tid] = p;
}
}
else
{
d[tid] = 9999;
}
barrier(CLK_GLOBAL_MEM_FENCE);
stVertexcopy[tid] = tid;
dcopy[tid] = d[tid];
barrier(CLK_GLOBAL_MEM_FENCE);
//Calculating the minimum among all the d[i]'s
while(tid < n/2 )
{
if(n%2 != 0) // check if number of elements in consideration are suitable for normal min reduction (check out the else part)
{
if(tid != (n-1)/2) // if thread id is not the same as the middle element : think 9 and 4 [0,1,2,3,4]
{
if(dcopy[tid] > dcopy[tid + (n+1)/2]) // find the smalles
{
dcopy[tid] = dcopy[tid + (n+1)/2];
stVertexcopy[tid] = stVertexcopy[tid + (n+1)/2];
}
}
if(tid == 0) // shall also include the middle element in each iteration of min reduction --- think of a 23 element array [0....22]
{
// every thread is going to execute this
if(dcopy[tid] > dcopy[(n-1)/2])
{
dcopy[tid] = dcopy[(n-1)/2];
stVertexcopy[tid] = stVertexcopy[(n-1)/2];
}
}
n = (n-1)/2;
}
else
{
if(dcopy[tid] > dcopy[tid + n/2])
{
dcopy[tid] = dcopy[tid + n/2];
stVertexcopy[tid] = stVertexcopy[tid + n/2];
}
n = n/2;
}
barrier(CLK_GLOBAL_MEM_FENCE);
}
barrier(CLK_GLOBAL_MEM_FENCE);
if(tid == 0)
{
destVertex[0] = stVertexcopy[0];
p = destVertex[0];
selected[p] = 1;
}
}
Here i have the global item size as n
local item size i have specified it as NULL.