Cannot create a context

Hello all,

This is the issue that I’m having. I’m running Arch Linux on a 6 year old laptop. That’s ok, I can use my CPU for the tasks at hand. The problem is that when I try to create the context, my application segfaults. The code:

// matvec.c:
#define PROGRAM_FILE "matvec.cl"
#define KERNEL_FUNC "matvec_mult"

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>

#ifdef MAC
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif

int main(int argc, char * argv[])
{
  printf("Start of program...

");

  cl_platform_id platform;
  cl_device_id device;
  cl_context context;
  cl_command_queue queue;
  cl_int i;
  cl_int err;
  cl_program program;
  cl_kernel kernel;
  cl_mem mat_buff;
  cl_mem vec_buff;
  cl_mem res_buff;

  FILE * program_handle;

  char * program_buffer;
  char * program_log;
  size_t program_size;
  size_t log_size;
  size_t work_units_per_kernel = 4;

  float mat[16];
  float vec[4];
  float result[4];
  float correct[4] = {0.0f, 0.0f, 0.0f, 0.0f};

  printf("Variables declared...

");

  // data initialization.
  for(i = 0; i < 16; i++)
  {
    mat[i] = i * 2.0f;
  }

  for(i = 0; i < 4; i++)
  {
    vec[i] = i * 3.0f;
    correct[0] += mat[i]      * vec[i];
    correct[1] += mat[i + 4]  * vec[i];
    correct[2] += mat[i + 8]  * vec[i];
    correct[3] += mat[i + 12] * vec[i];
  }

  printf("Data initialized...

");

  // set the platform, device and context.
  clGetPlatformIDs(1, & platform, NULL);
  // uncomment either one of the lines below in order to run the *.cl code on
  //  either the CPU or GPU.
  //clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, & device, NULL);
  clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, & device, NULL);
  context = clCreateContext(NULL, 1, & device, NULL, NULL, & err);

  printf("Set the platforms...

");

  // read the *.cl program file.
  program_handle = fopen(PROGRAM_FILE, "r");
  fseek(program_handle, 0, SEEK_END);
  program_size = ftell(program_handle);
  rewind(program_handle);
  program_buffer = (char * )malloc(program_size + 1);
  program_buffer[program_size] = '\0';
  fread(program_buffer, sizeof(char), program_size, program_handle);
  fclose(program_handle);

  printf("Read the *.cl file...

");

  // compile the program.
  program = clCreateProgramWithSource(context, 1,
    (const char ** ) & program_buffer, & program_size, & err);
  free(program_buffer);
  clBuildProgram(program, 0, NULL, NULL, NULL, NULL);

  printf("Compiled the program...

");

  // create a kernel and a queue.
  kernel = clCreateKernel(program, KERNEL_FUNC, & err);
  queue = clCreateCommandQueue(context, device, 0, & err);

  mat_buff = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
    sizeof(float) * 16, mat, & err);
  vec_buff = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
    sizeof(float) * 4, vec, & err);
  res_buff = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * 4,
    NULL, & err);

  printf("The kernel and the queue have been created...

");

  // set kernel arguments.
  clSetKernelArg(kernel, 0, sizeof(cl_mem), & mat_buff);
  clSetKernelArg(kernel, 1, sizeof(cl_mem), & vec_buff);
  clSetKernelArg(kernel, 2, sizeof(cl_mem), & res_buff);

  printf("Kernel arguments have been set...

");

  // execute the kernel.
  clEnqueueNDRangeKernel(queue, kernel, 1, NULL, & work_units_per_kernel,
    NULL, 0, NULL, NULL);

  clEnqueueReadBuffer(queue, res_buff, CL_TRUE, 0, sizeof(float) * 4, result,
    0, NULL, NULL);

  if((result[0] == correct[0]) && (result[1] == correct[1])
    && (result[2] == correct[2]) && (result[3] == correct[3]))
  {
    printf("Matrix-vector multiplication succesful!
");
  }
  else
  {
    printf("Matrix-vecotr multiplication unsuccesful!
");
  }

  clReleaseMemObject(mat_buff);
  clReleaseMemObject(vec_buff);
  clReleaseMemObject(res_buff);
  clReleaseKernel(kernel);
  clReleaseCommandQueue(queue);
  clReleaseProgram(program);
  clReleaseContext(context);

  return 0;
}

This is the kernel code:

__kernel void matvec_mult(__global float4 * matrix,
                          __global float4 * vector,
                          __global float  * result)
{
  int i = get_global_id(0);
  result[i] = dot(matrix[i], vector[0]);
}

This is what happens when I run it:

% ./matvec
Start of program...

Variables declared...

Data initialized...

zsh: segmentation fault (core dumped)  ./matvec

Using printf, I traced the problem down to where the clCreateContext call is made. But, I don’t understand why. And this is how I do the compilation:

% gcc -lOpenCL matvec.c -o matvec

When I tried to insert the -g flag, it wouldn’t compile:

% gcc -lOpenCL matvec.c -o -g matvec
matvec: In function `__x86.get_pc_thunk.bx':
(.text+0x30): multiple definition of `__x86.get_pc_thunk.bx'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/../../../crti.o:(.gnu.linkonce.t.__x86.get_pc_thunk.bx+0x0): first defined here
matvec: In function `_fini':
(.fini+0x0): multiple definition of `_fini'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/../../../crti.o:(.fini+0x0): first defined here
matvec: In function `data_start':
(.data+0x0): multiple definition of `__data_start'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/../../../crt1.o:(.data+0x0): first defined here
matvec: In function `data_start':
(.data+0x4): multiple definition of `__dso_handle'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/crtbegin.o:(.data+0x0): first defined here
matvec:(.rodata+0x4): multiple definition of `_IO_stdin_used'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/../../../crt1.o:(.rodata.cst4+0x0): first defined here
matvec: In function `_start':
(.text+0x0): multiple definition of `_start'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/../../../crt1.o:(.text+0x0): first defined here
matvec:(.rodata+0x0): multiple definition of `_fp_hw'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/../../../crt1.o:(.rodata+0x0): first defined here
matvec: In function `main':
(.text+0xfb): multiple definition of `main'
/tmp/ccPEYT4R.o:matvec.c:(.text+0x0): first defined here
matvec: In function `_init':
(.init+0x0): multiple definition of `_init'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/../../../crti.o:(.init+0x0): first defined here
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/crtend.o:(.tm_clone_table+0x0): multiple definition of `__TMC_END__'
matvec:(.data+0x8): first defined here
/usr/bin/ld: error in matvec(.eh_frame); no .eh_frame_hdr table will be created.
collect2: error: ld returned 1 exit status

I’m going through the book OpenCL in Action… fyi…

Some more info. After outputting some of the debug information, this is the problem that I have:

% ./matvec
Start of program...

Variables declared...

Data initialized...

The platform returned: 0
The device returned:   -1217555276

This is the code that I’m working with:

  printf("Data initialized...

");

  // set the platform, device and context.
  clGetPlatformIDs(1, & platform, NULL);

  printf("The platform returned: %d
", platform);

  // uncomment either one of the lines below in order to run the *.cl code on
  //  either the CPU or GPU.
  //clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, & device, NULL);
  clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, & device, NULL);

  printf("The device returned:   %d
", device);

  context = clCreateContext(NULL, 1, & device, NULL, NULL, & err);

  printf("The context returned:  %d
", context);

  printf("Set the platforms...

");

Perhaps my OpenCL is not properly configured/installed?

I think you should look at the error codes coming out of your OpenCL library calls and make sure none of them are failing. If they are failing, perhaps the particular error code will give you some clue as to why.

Why do you create CPU context? Maybe you could try GPU or ALL. I have experience only with CL_DEVICE_TYPE_ALL.

I do it like this:
http://paste.ofcode.org/VT43fdW6PSjy9rKUsexNW9