Variadic Macros in OpenCL: Where are they supported?

(This post is the same question I posed on StackOverflow, at Variadic Macros in OpenCL: Where are they supported? - Stack Overflow)

I have been trying to find information on the (non-standard) support for variadic macros in OpenCL implementations.

I have access to the following platforms, all of which support variadic macros:

[ul]
[li] Mac OS X, Intel CPU, OpenCL 1.2, driver: 1.1[/li][li] Mac OS X, Intel GPU, OpenCL 1.2, driver: 1.2(Dec 23 2014 00:18:31)[/li][li] Mac OS X, ATI GPU, OpenCL 1.2, driver: 1.2 (Aug 17 2014 20:27:52)[/li][li] Mac OS X, Nvidia GPU, OpenCL 1.2, driver: 10.2.7 310.41.25f01[/li][/ul]

Could others please check their available implementations so that we can have a map of implementations that supported variadic macros?

[HR][/HR]
Here is a self-contained test program that makes uses of a variadic macro.


#include <stdlib.h>
#include <stdio.h>

#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif

const char* SOURCE =
"#define KERNEL(name, ...) kernel void name(__VA_ARGS__) 
"
"                                                        
"
"KERNEL(test, global float* input, global float* output) 
"
"{                                                       
"
"    int i = get_global_id(0);                           
"
"    output[i] = input[i];                               
"
"}                                                       
"
"                                                        
"
;

static const int GPU = 1;

int main(int argc, char** argv)
{
    int err;
      
    cl_float input[16];
    cl_float output[16];
    
    size_t global = 16;
    size_t local = 16;
    
    cl_device_id device_id;
    cl_context context;
    cl_command_queue command_queue;
    cl_program program;
    cl_kernel kernel;
    cl_mem input_buf;
    cl_mem output_buf;
    
    err = clGetDeviceIDs(NULL, GPU ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
    if(err != CL_SUCCESS)
        return EXIT_FAILURE;
    
    context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
    if(err != CL_SUCCESS)
        return EXIT_FAILURE;
    
    command_queue = clCreateCommandQueue(context, device_id, 0, &err);
    if(err != CL_SUCCESS)
        return EXIT_FAILURE;
    
    program = clCreateProgramWithSource(context, 1, &SOURCE, NULL, &err);
    if(err != CL_SUCCESS)
        return EXIT_FAILURE;
    
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        size_t len;
        char buffer[2048];
        
        printf("error: build program
");
        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        printf("%s
", buffer);
        
        return EXIT_FAILURE;
    }
    
    kernel = clCreateKernel(program, "test", &err);
    if(err != CL_SUCCESS)
        return EXIT_FAILURE;
    
    input_buf = clCreateBuffer(context, CL_MEM_READ_ONLY, 16*sizeof(cl_float), NULL, NULL);
    output_buf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 16*sizeof(cl_float), NULL, NULL);
    if(!input_buf || !output_buf)
        return EXIT_FAILURE;
    
    err = clEnqueueWriteBuffer(command_queue, input_buf, CL_TRUE, 0, 16*sizeof(cl_float), input, 0, NULL, NULL);
    if(err != CL_SUCCESS)
        return EXIT_FAILURE;
    
    err = 0;
    err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_buf);
    err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output_buf);
    if(err != CL_SUCCESS)
        return EXIT_FAILURE;
    
    err = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
    if(err != CL_SUCCESS)
        return EXIT_FAILURE;
    
    clFinish(command_queue);
    
    err = clEnqueueReadBuffer(command_queue, output_buf, CL_TRUE, 0, 16*sizeof(cl_float), output, 0, NULL, NULL );
    if(err != CL_SUCCESS)
        return EXIT_FAILURE;
    
    clReleaseMemObject(input_buf);
    clReleaseMemObject(output_buf);
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(command_queue);
    clReleaseContext(context);
    
    return EXIT_SUCCESS;
}

I have dropped this in AMD CodeXL 1.7 and it compiled successfully on the kernel analyzer.

Thanks MaxDZ8 for testing, could you quickly add some details about the system you have tried this on?

I was able to test on RedHat Enterprise Linux 5 with Nvidia’s CUDA SDK 4.0, and it also supports variadic macros.