Barrier

Hi, I am new to OpenCL.
There is a problem making me confused.
Without barrier, C[0] and C[1] have the different value.
C[0] and C[1] should get the same value with the barrier function.

the code get the wrong answer.
where is the bug in the code?

opencl code

__kernel void barrier_example ( __global int *C )
{
    
    //Get the index of the current element
    int t = get_local_id(0);   
    __local int *a1 ;   
    a1 = 0 ;

    //barrier test
    if ( t == 1) {    
    	for( int j = 0 ; j < 1000 ; j ++  ) ;
	a1 = 100 ;    	
    }
    
    barrier ( CLK_LOCAL_MEM_FENCE ) ;
    
    if ( t == 0 ) C [ t ] = a1 ;   
    if ( t == 1 ) C [ t ] = a1 ;
    C [ 2 ] = 88 ;

}

main.c


#include <stdio.h>
#include <stdlib.h>

#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif

#define MAX_SOURCE_SIZE (0x100000)

int main(void) {
    // Create the two input vectors   
    const int LIST_SIZE = 1024;

    // Load the kernel source code into the array source_str
    FILE *fp;
    char *source_str;
    size_t source_size;

    fp = fopen("barrier_example.cl", "r");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.
");
        exit(1);
    }
    source_str = (char*)malloc(MAX_SOURCE_SIZE);
    source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
    fclose( fp );

    // Get platform and device information
    cl_platform_id platform_id = NULL;
    cl_device_id device_id = NULL;   
    cl_uint ret_num_devices;
    cl_uint ret_num_platforms;
    cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_ALL, 1, 
            &device_id, &ret_num_devices);

    // Create an OpenCL context
    cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);

    // Create a command queue
    cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

    // Create memory buffers on the device for each vector 
    cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
            LIST_SIZE * sizeof(int), NULL, &ret);

    // Create a program from the kernel source
    cl_program program = clCreateProgramWithSource(context, 1, 
            (const char **)&source_str, (const size_t *)&source_size, &ret);

    // Build the program
    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);

    // Create the OpenCL kernel
    cl_kernel kernel = clCreateKernel(program, "barrier_example", &ret);

    // Set the arguments of the kernel
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&c_mem_obj);
    
    // Execute the OpenCL kernel on the list
    size_t global_item_size = 6 ; // Process the entire lists
    size_t local_item_size = 3 ; // Process in groups of 64
    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, 
            &global_item_size, &local_item_size, 0, NULL, NULL);

    // Read the memory buffer C on the device to the local variable C
    int *C = (int*)malloc(sizeof(int)*3*2);
    ret = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0, 
            3*2 * sizeof(int), C, 0, NULL, NULL);

    // Display the result to the screen
    for(int i = 0; i < 3; i++)
        printf("i= %d , %d
",  i, C[i]);

    // Clean up
    ret = clFlush(command_queue);
    ret = clFinish(command_queue);
    ret = clReleaseKernel(kernel);
    ret = clReleaseProgram(program);

    ret = clReleaseMemObject(c_mem_obj);
    ret = clReleaseCommandQueue(command_queue);
    ret = clReleaseContext(context);

    free(C);
    
    return 0;
}

__local int *a1 ;?

that should be:

local int a1;

based on the way you’re using it.

“local int a1;” is not working without the extension

#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable

A good news, the code is working.
but I am more comfused that some of if statment are working but some aren’t.
I marked the improved part with red color.

#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable__kernel void barrier_example ( __global int *C )
{
//Get the index of the current element
int t = get_local_id(0);

__local int a1 ;
a1 = 0 ;
  
//barrier test   
//if ( t &lt; 1 ) { // this does not work
//if ( t == 1 ) {	// this does not work
//if ( t == 0 ) {	// this does not work
//if ( t &gt; 1 ) { // this works   
if ( t == 2 ) { // this works   	
   for( int j = 0 ; j &lt; 1000 ; j ++  ) ;
  	a1 = 64 ;    	
}

barrier ( CLK_LOCAL_MEM_FENCE ) ;
  
C [ 0 ] = 104 ;
C [ 1 ] = a1 ;
C [ 2 ] = a1 ;

C [ t + 3 ] = t ;

}