Hello all,
This is the issue that I’m having. I’m running Arch Linux on a 6 year old laptop. That’s ok, I can use my CPU for the tasks at hand. The problem is that when I try to create the context, my application segfaults. The code:
// matvec.c:
#define PROGRAM_FILE "matvec.cl"
#define KERNEL_FUNC "matvec_mult"
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#ifdef MAC
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
int main(int argc, char * argv[])
{
printf("Start of program...
");
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_int i;
cl_int err;
cl_program program;
cl_kernel kernel;
cl_mem mat_buff;
cl_mem vec_buff;
cl_mem res_buff;
FILE * program_handle;
char * program_buffer;
char * program_log;
size_t program_size;
size_t log_size;
size_t work_units_per_kernel = 4;
float mat[16];
float vec[4];
float result[4];
float correct[4] = {0.0f, 0.0f, 0.0f, 0.0f};
printf("Variables declared...
");
// data initialization.
for(i = 0; i < 16; i++)
{
mat[i] = i * 2.0f;
}
for(i = 0; i < 4; i++)
{
vec[i] = i * 3.0f;
correct[0] += mat[i] * vec[i];
correct[1] += mat[i + 4] * vec[i];
correct[2] += mat[i + 8] * vec[i];
correct[3] += mat[i + 12] * vec[i];
}
printf("Data initialized...
");
// set the platform, device and context.
clGetPlatformIDs(1, & platform, NULL);
// uncomment either one of the lines below in order to run the *.cl code on
// either the CPU or GPU.
//clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, & device, NULL);
clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, & device, NULL);
context = clCreateContext(NULL, 1, & device, NULL, NULL, & err);
printf("Set the platforms...
");
// read the *.cl program file.
program_handle = fopen(PROGRAM_FILE, "r");
fseek(program_handle, 0, SEEK_END);
program_size = ftell(program_handle);
rewind(program_handle);
program_buffer = (char * )malloc(program_size + 1);
program_buffer[program_size] = '\0';
fread(program_buffer, sizeof(char), program_size, program_handle);
fclose(program_handle);
printf("Read the *.cl file...
");
// compile the program.
program = clCreateProgramWithSource(context, 1,
(const char ** ) & program_buffer, & program_size, & err);
free(program_buffer);
clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
printf("Compiled the program...
");
// create a kernel and a queue.
kernel = clCreateKernel(program, KERNEL_FUNC, & err);
queue = clCreateCommandQueue(context, device, 0, & err);
mat_buff = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
sizeof(float) * 16, mat, & err);
vec_buff = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
sizeof(float) * 4, vec, & err);
res_buff = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * 4,
NULL, & err);
printf("The kernel and the queue have been created...
");
// set kernel arguments.
clSetKernelArg(kernel, 0, sizeof(cl_mem), & mat_buff);
clSetKernelArg(kernel, 1, sizeof(cl_mem), & vec_buff);
clSetKernelArg(kernel, 2, sizeof(cl_mem), & res_buff);
printf("Kernel arguments have been set...
");
// execute the kernel.
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, & work_units_per_kernel,
NULL, 0, NULL, NULL);
clEnqueueReadBuffer(queue, res_buff, CL_TRUE, 0, sizeof(float) * 4, result,
0, NULL, NULL);
if((result[0] == correct[0]) && (result[1] == correct[1])
&& (result[2] == correct[2]) && (result[3] == correct[3]))
{
printf("Matrix-vector multiplication succesful!
");
}
else
{
printf("Matrix-vecotr multiplication unsuccesful!
");
}
clReleaseMemObject(mat_buff);
clReleaseMemObject(vec_buff);
clReleaseMemObject(res_buff);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseProgram(program);
clReleaseContext(context);
return 0;
}
This is the kernel code:
__kernel void matvec_mult(__global float4 * matrix,
__global float4 * vector,
__global float * result)
{
int i = get_global_id(0);
result[i] = dot(matrix[i], vector[0]);
}
This is what happens when I run it:
% ./matvec
Start of program...
Variables declared...
Data initialized...
zsh: segmentation fault (core dumped) ./matvec
Using printf, I traced the problem down to where the clCreateContext call is made. But, I don’t understand why. And this is how I do the compilation:
% gcc -lOpenCL matvec.c -o matvec
When I tried to insert the -g flag, it wouldn’t compile:
% gcc -lOpenCL matvec.c -o -g matvec
matvec: In function `__x86.get_pc_thunk.bx':
(.text+0x30): multiple definition of `__x86.get_pc_thunk.bx'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/../../../crti.o:(.gnu.linkonce.t.__x86.get_pc_thunk.bx+0x0): first defined here
matvec: In function `_fini':
(.fini+0x0): multiple definition of `_fini'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/../../../crti.o:(.fini+0x0): first defined here
matvec: In function `data_start':
(.data+0x0): multiple definition of `__data_start'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/../../../crt1.o:(.data+0x0): first defined here
matvec: In function `data_start':
(.data+0x4): multiple definition of `__dso_handle'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/crtbegin.o:(.data+0x0): first defined here
matvec:(.rodata+0x4): multiple definition of `_IO_stdin_used'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/../../../crt1.o:(.rodata.cst4+0x0): first defined here
matvec: In function `_start':
(.text+0x0): multiple definition of `_start'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/../../../crt1.o:(.text+0x0): first defined here
matvec:(.rodata+0x0): multiple definition of `_fp_hw'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/../../../crt1.o:(.rodata+0x0): first defined here
matvec: In function `main':
(.text+0xfb): multiple definition of `main'
/tmp/ccPEYT4R.o:matvec.c:(.text+0x0): first defined here
matvec: In function `_init':
(.init+0x0): multiple definition of `_init'
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/../../../crti.o:(.init+0x0): first defined here
/usr/lib/gcc/i686-pc-linux-gnu/4.9.2/crtend.o:(.tm_clone_table+0x0): multiple definition of `__TMC_END__'
matvec:(.data+0x8): first defined here
/usr/bin/ld: error in matvec(.eh_frame); no .eh_frame_hdr table will be created.
collect2: error: ld returned 1 exit status
I’m going through the book OpenCL in Action… fyi…