Well the problem I have is the following:
I have this kernel that tries to calculate the betweenness centrality of a graph in parallel. What happens is actually very strange since a loop is executed two times by one of the work groups. In average once every 7-8 runs the second while loop ( while(count[k]<nr_roots) ) gets executed twice by a workgroup although at the first iteration the count[k] variable is incremented. So for my case I have a 12 vertices graph and nr_roots is 1, count[k] is 0, the while gets executed, count[k] gets incremented by 1, and still cout[k] will be viewed as 0 one more time.
This happends only once every 6-7 runs, remeber that, not always. Does anyone have any idea why? I also tried making count a __local variable (shared by group) and also made it __private ( shared by the work-item only), no success. Any tips, suggestions are more than welcome!
" while ( found_local != 0){
"
"
"
"
"
"
"
" if(i==0) { level_local = atomic_add(nr_level,0); atomic_xchg(found,0);
"
" pozition_local = atomic_add(pozition,0);
"
" nr_roots = atomic_add(&level[level_local],0)/j; atomic_xchg(&count[k],0); nr=0; rest = atomic_add(&level[level_local],0)%j;
"
" if(k<rest) nr_roots = nr_roots + 1;}
"
"
"
"
"
" barrier(CLK_GLOBAL_MEM_FENCE);
"
"
"
" while(count[k] < nr_roots ){
"
"
"
" if(i==0){
"
" root = stack[pozition_local + count[k]j + k];
"
" succ_index[root] = 0;
"
" nr_neigh = firstnbr[root+1] - firstnbr[root]; }
"
" barrier(CLK_LOCAL_MEM_FENCE);
"
"
"
" neigh_per_thread = nr_neigh/size;
"
" if(i<nr_neigh%size)
"
" neigh_per_thread ++;
"
" h = 0;
"
" while(h<neigh_per_thread)
"
" {
"
" node = nbr[firstnbr[root] + sizeh + i];
"
"
"
" dw = atomic_cmpxchg(&d[node], -1, level_local + 1);
"
"
"
" if(dw == -1)
"
" {
"
" atomic_inc(&level[level_local + 1]);
"
" atomic_cmpxchg(found,0,1);
"
" dw = level_local + 1;
"
" gh = atomic_inc(nr_stack);
"
" stack[gh] = node;
"
"
"
" }
"
"if(dw == level_local + 1)
"
" {
"
"
"
" temp = atomic_inc(&succ_index[root]);
"
" succ[firstnbr[root] + temp] = node;
"
" GetSemaphor2(&sem[0]); temporal = atomic_xchg(&sigma[node],0); temporal2=atomic_xchg(&sigma[root],sigma[root]);
"
" atomic_xchg(&sigma[node],temporal+temporal2);ReleaseSemaphor2(&sem[0]);
"
" }
"
"h++;
"
"}
"
"
"
"if(glob%6==1) {atomic_add(&count[k],1);if(root==4&&nr1==1) BC[8] = 1;}
"
" barrier(CLK_GLOBAL_MEM_FENCE); }
"
"
"
" barrier(CLK_LOCAL_MEM_FENCE);
"
"if(glob==0) {f= atomic_add(&level[level_local],0); atomic_add(pozition,f); atomic_add(nr_level,1);
"
" }
"
"
"
" if(i==0)
"
" { atomic_add(global_sync,1);
"
" if ( k==0) { while(atomic_add(global_sync,0)< j); atomic_xchg(global_sync, 0); }
"
" else { while(atomic_add(global_sync,0) > 0); }}
"
"barrier(CLK_LOCAL_MEM_FENCE);if(i==0) found_local = atomic_add(found,0);barrier(CLK_LOCAL_MEM_FENCE);
"
"} if(glob==11) BC[glob] = atomic_xchg(&sigma[11],sigma[11]); }
";