Hi Friends,
Well I beginner in OpenCL…
I am trying to sum a list of num… But I am getting Error…
Error: clBuildProgram(-11)
Please, See my code and help me to Solve it…
<code cpp>
#include <iostream>
#include <cstdlib>
#include <fstream>
#include <string>
//#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef APPLE
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define MAT_SIZE 4096
using namespace std;
void err_check( int err, string err_code ) {
if ( err != CL_SUCCESS ) {
cout << "Error: " << err_code << “(” << err << “)” << endl;
exit(-1);
}
}
int main()
{
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int err;
float mat_a[ MAT_SIZE ];
for ( cl_int i = 0; i < MAT_SIZE; i++ ) {
mat_a[i] = i;
}
// Step 01: Get platform/device information
err = clGetPlatformIDs( 1, &platform_id, &ret_num_platforms );
err_check( err, "clGetPlatformIDs" );
// Step 02: Get information about the device
err = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices );
err_check( err, "clGetDeviceIDs" );
// Step 03: Create OpenCL Context
context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &err );
err_check( err, "clCreateContext" );
// Step 04: Create Command Queue
command_queue = clCreateCommandQueue( context, device_id, 0, &err );
err_check( err, "clCreateCommandQueue" );
// Step 05: Create memory objects and tranfer the data to memory buffer
cl_mem idata, odata;
idata = clCreateBuffer( context, CL_MEM_READ_ONLY, MAT_SIZE * sizeof(float), NULL, &err );
err = clEnqueueWriteBuffer( command_queue, idata, CL_TRUE, 0, MAT_SIZE * sizeof(float), mat_a, 0, NULL, NULL );
err_check( err, "clEnqueueWriteBuffer" );
odata = clCreateBuffer(context, CL_MEM_READ_WRITE, ( (MAT_SIZE/256)*sizeof(float) ), NULL, &err);
// Step 06: Read kernel file
ifstream file("par_sum_kernel.cl");
string prog( istreambuf_iterator<char>( file ), ( istreambuf_iterator<char>() ) );
const char *source_str = prog.c_str();
// Step 07: Create Kernel program from the read in source
program = clCreateProgramWithSource( context, 1, (const char **) &source_str, 0, &err );
err_check( err, "clCreateProgramWithSource" );
// Step 08: Build Kernel Program
err = clBuildProgram( program, 1, &device_id, NULL, NULL, NULL );
err_check( err, "clBuildProgram" );
// Step 09: Create OpenCL Kernel
kernel = clCreateKernel( program, "sum", &err );
err_check( err, "clCreateKernel" );
// Step 10: Set OpenCL kernel argument
err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void *) &idata );
err_check( err, "clSetKernelArg" );
err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *) &odata );
err_check( err, "clSetKernelArg" );
// Step 11: Execute OpenCL kernel in data parallel
size_t GWsize[] = { MAT_SIZE, 1, 1 };
size_t LWsize[] = {256 , 1, 1};
clEnqueueNDRangeKernel( command_queue, kernel, 1, NULL, GWsize, LWsize, 0, 0, 0 );
err_check( err, "clEnqueueNDRangeKernel" );
//-----------------
err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void *) &idata );
err_check( err, “clSetKernelArg” );
err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *) &odata );
err_check( err, "clSetKernelArg" );
LWsize[0] = (MAT_SIZE/256);
GWsize[0] = 1;
clEnqueueNDRangeKernel( command_queue, kernel, 1, NULL, GWsize, LWsize, 0, 0, 0 );
err_check( err, "clEnqueueNDRangeKernel" );
//--------------------
// Step 12: Read (Transfer result) from the memory buffer
float mat_b[LWsize[0]];
err = clEnqueueReadBuffer( command_queue, odata, CL_TRUE, 0, MAT_SIZE * sizeof(float), mat_b, 0, NULL, NULL );
// Step 13: Free objects
err = clFlush( command_queue );
err = clFinish( command_queue );
err = clReleaseKernel( kernel );
err = clReleaseProgram( program );
err = clReleaseMemObject( idata );
err = clReleaseMemObject( odata );
err = clReleaseCommandQueue( command_queue );
err = clReleaseContext( context );
// Display result
cout<<mat_b[0];
return 0;
}
</code >
Kernel Code:
<code cpp>
__kernel void sum( __global float *idata, __global float *odata )
{
int gid = get_global_id(0);
int lid = get_local_id(0);
int bid = get_group_id(0);
__local float sdata[get_num_groups(0)];
sdata[lid] = idata[gid];
barrier(CLK_LOCAL_MEM_FENCE);
for( int dist = get_local_size(0); dist>0; dist/=2 )
{
if(lid < dist){
sdata[lid] += sdata[lid + dist];
barrier(CLK_LOCAL_MEM_FENCE);
}
}
if(lid == 0)
odata[bid] += sdata[0];
}
</code>