Hi !
I’m trying to add 2 vectors but the kernel is not executed. I can’t see where the problem is.
I’m running the code on OS X Snow Leopard with the following Makefile:
CC = gcc
SRC = addition.c
DEST = addition
LDFLAGS = -framework OpenCL
all: $(SRC)
$(CC) -o $(DEST) $(SRC) $(LDFLAGS)
Here is the code:
/* OpenCL core library */
#include <OpenCL/opencl.h>
#include <stdio.h>
#include <stdlib.h>
/* GPU Kernel call */
int opencl_call(long* a, long* b, long* c, size_t n);
static const char* opencl_code[1] = {\
"__kernel void addition(__constant long* a, __constant long* b, __global long* c) \
{ \
unsigned int i = get_global_id(0); \
c[i] = a[i] + b[i]; \
}"};
int main(int argc, char** argv)
{
int i = 0;
long a[5] = {1, 2, 3, 4, 5};
long b[5] = {7, 5, 1, 9, 42};
long c[5] = {0,0,0,0,0};
if(opencl_call(a, b, c, 5) == -1)
printf("Error !");
for(i = 0; i < 5; i++)
printf("c[%ld] = %ld
", i, c[i]);
return 0;
}
int opencl_call(long* a, long* b, long* c, size_t n)
{
/* Return Status */
cl_int status = CL_SUCCESS;
size_t device_list_size;
cl_device_id* devices = NULL;
cl_command_queue queue;
cl_command_queue_properties prop = 0;
cl_mem a_buffer, b_buffer, c_buffer;
cl_program program;
cl_kernel kernel;
size_t global_work_size[1], local_work_size[1];
/* Create OpenCL context */
cl_context context = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
/* Check whether we really have a GPU */
if(status != CL_SUCCESS)
{
printf("Sorry, your GPU is not supported, using CPU instead.
");
context = clCreateContextFromType(0, CL_DEVICE_TYPE_CPU, NULL, NULL, &status);
}
if(status != CL_SUCCESS)
return -1;
/* Check how many GPU we have */
status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &device_list_size);
if(status != CL_SUCCESS)
return -1;
printf("We have %d devices.
", device_list_size);
/* Get the device list */
devices = (cl_device_id*)malloc(device_list_size);
status = clGetContextInfo(context, CL_CONTEXT_DEVICES, device_list_size, devices, NULL);
if(status != CL_SUCCESS)
return -1;
/* Create command queue */
queue = clCreateCommandQueue(context, devices[0], prop, &status);
if(status != CL_SUCCESS)
return -1;
/* Allocate memory buffers */
a_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, n*sizeof(long), a, &status);
b_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, n*sizeof(long), b, &status);
if(status != CL_SUCCESS)
return -1;
/* Output buffer */
c_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, n*sizeof(long), c, &status);
if(status != CL_SUCCESS)
return -1;
/* Create a CL program using the kernel source */
program = clCreateProgramWithSource(context, 1, opencl_code, NULL, &status);
if(status != CL_SUCCESS)
return -1;
/* Build OpenCL program. */
status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
if(status != CL_SUCCESS)
return -1;
/* Create OpenCL kernel */
kernel = clCreateKernel(program, "addition", &status);
/* Set kernel arguments */
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &a_buffer);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &b_buffer);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &c_buffer);
if(status != CL_SUCCESS)
return -1;
/* Kernel execution */
global_work_size[0] = n;
local_work_size[0] = 1;
status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
status = clEnqueueReadBuffer(queue, c_buffer, CL_TRUE, 0, n*sizeof(long), c, 0, NULL, NULL);
/* Free memory */
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
}
Thanks.