Hi,
i wrote my first OpenCL Program with VS 2008 CLR Forms.
I always get a: “clEnqueueNDRangeKernel Failed: -54” Error and i dont know why.
Can someone take a look at my code?
Its a Visual Studio 2008 Project:
http://free.doublebackslash.net/Studium … OpenCL.rar
Click first on the button “Devices suchen + Infos lesen” and then on the button “Context + CommandQueues erstellen”
For all others without Visual Studio, here is the Code: (OpenCL.lib is linked!)
#include <utility>
#define __NO_STD_VECTOR
#define __NO_STD_STRING
#include <CL/cl.h>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
#include <iterator>
#include <math.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
static char vendor_cpu[65536];
static char vendor_gpu[65536];
static std::size_t size;
static cl_uint num_devices_returned;
static cl_device_id devices[2];
static cl_ulong platform;
static cl_ulong frequ, max_par_units, addr_bits;
static cl_int err = CL_SUCCESS;
static cl_uint num_platforms;
static cl_platform_id * platforms;
static bool cpu = false;
static bool gpu = false;
static cl_kernel kernel;
static cl_context context;
static size_t local;
const unsigned int cnBlockSize= 512;
const unsigned int cnBlocks =3;
static size_t cnDimension = cnBlocks * cnBlockSize;
const char * kernelcode = "__kernel void vectorAdd(
"
"__global const float * a,
"
"__global const float * b,
"
"__global float * c)
"
"{
"
" // Vector element index
"
" int nIndex = get_global_id(0);
"
" c[nIndex] = a[nIndex] + b[nIndex];
"
"}
";
private: System::Void bt_getdeviceinfos_Click(System::Object^ sender, System::EventArgs^ e)
{
this->rtb_log->AppendText("##Get Platform ID##
");
err = clGetPlatformIDs(0, NULL, &num_platforms);
platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * num_platforms);
clGetPlatformIDs(num_platforms, platforms, NULL); // Get device IDs
cl_platform_id platform_id = platforms[0];
platform = (cl_ulong)platform_id;
this->l_num_platform->Text = "Platform ID: " + platform;
this->rtb_log->AppendText("##Get CPU Devices##
");
err = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_CPU, 1, &devices[0], &num_devices_returned);
this->l_num_cpu->Text = "Anzahl der CPU-Devices: " + num_devices_returned.ToString();
this->rtb_cpu->AppendText(num_devices_returned.ToString() + " CPU Device gefunden
");
//cout << "Fehlercode: " << err << endl;
if(err == 0) cpu = true;
this->rtb_log->AppendText("##Get GPU Devices##
");
err = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 1, &devices[1], &num_devices_returned);
this->l_num_gpu->Text = "Anzahl der GPU-Devices: " + num_devices_returned.ToString();
this->rtb_gpu->AppendText(num_devices_returned.ToString() + " GPU Device gefunden
");
if(err == 0) gpu = true;
if(cpu == true)
{
this->rtb_log->AppendText("##Get CPU Device Infos##
");
clGetDeviceInfo(devices[0], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(frequ), &frequ, &size);
this->rtb_cpu->AppendText("Maximum clock frequency of the device in MHz: " + frequ.ToString() + "
");
clGetDeviceInfo(devices[0], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_par_units), &max_par_units, &size);
this->rtb_cpu->AppendText("The number of parallel compute cores on the OpenCL device: " + max_par_units.ToString() + "
");
clGetDeviceInfo(devices[0], CL_DEVICE_NAME, sizeof(vendor_cpu), vendor_cpu, &size);
this->rtb_cpu->AppendText("Vendor name: " + gcnew String(vendor_cpu) + "
");
clGetDeviceInfo(devices[0], CL_DEVICE_ADDRESS_BITS, sizeof(addr_bits), &addr_bits, &size);
this->rtb_cpu->AppendText("CPU ADDRESS BITS: " + addr_bits.ToString() + "
");
}
if(gpu == true)
{
this->rtb_gpu->AppendText("##Get GPU Device Infos##
");
clGetDeviceInfo(devices[1], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(frequ), &frequ, &size);
this->rtb_gpu->AppendText("Maximum clock frequency of the device in MHz: " + frequ.ToString() + "
");
clGetDeviceInfo(devices[1], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_par_units), &max_par_units, &size);
this->rtb_gpu->AppendText("The number of parallel compute cores on the OpenCL device: " + max_par_units.ToString() + "
");
clGetDeviceInfo(devices[1], CL_DEVICE_NAME, sizeof(vendor_gpu), vendor_gpu, &size);
this->rtb_gpu->AppendText("Vendor name: " + gcnew String(vendor_gpu) + "
");
clGetDeviceInfo(devices[1], CL_DEVICE_ADDRESS_BITS, sizeof(addr_bits), &addr_bits, &size);
this->rtb_gpu->AppendText("GPU ADDRESS BITS: " + addr_bits.ToString() + "
");
}
}
private: System::Void bt_contextcq_Click(System::Object^ sender, System::EventArgs^ e)
{
this->rtb_log->AppendText("##Create Context##
");
if(cpu == true && gpu == true)
context = clCreateContext(0, 2, devices, NULL, NULL, &err);
if(cpu == true && gpu == false)
context = clCreateContext(0, 1, devices, NULL, NULL, &err);
if(gpu == true && cpu == false)
context = clCreateContext(0, 1, devices, NULL, NULL, &err);
this->rtb_log->AppendText("##Create CommandQueue's##
");
cl_command_queue queue_cpu, queue_gpu;
if(cpu == true)
queue_cpu = clCreateCommandQueue(context, devices[0], 0, &err);
if(gpu == true)
queue_gpu = clCreateCommandQueue(context, devices[1], 0, &err);
this->rtb_log->AppendText("##Create Program Codes for OpenCL##
");
cl_program program;
size_t kernelsize = strlen(kernelcode);
program = clCreateProgramWithSource(context, 1, (const char**)&kernelcode, &kernelsize, &err);
err = clBuildProgram(program, num_devices_returned, NULL, NULL, NULL, NULL);
if(err != 0)
{
char log[1024] = "";
err = clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL);
MessageBox::Show(gcnew String(log));
}
this->rtb_log->AppendText("##Create Kernel Codes for OpenCL##
");
kernel = clCreateKernel(program, "vectorAdd", &err);
if(err != 0)
MessageBox::Show("clCreateKernel: " + err.ToString());
float * pA = new float[cnDimension];
float * pB = new float[cnDimension];
float * pC = new float[cnDimension];
float * pC1 = new float[cnDimension];
memset(pC, 0, cnDimension * sizeof(float));
memset(pC1, 0, cnDimension * sizeof(float));
// initialize host memory
int i;
for(i=0; i < cnDimension; i++)
{
pA[i] = pC[i] = pC1[i] = 0;
pB[i] = i;
}
// allocate device memory
cl_mem hDeviceMemA, hDeviceMemB, hDeviceMemC;
hDeviceMemA = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, cnDimension * sizeof(cl_float), pA, 0);
hDeviceMemB = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, cnDimension * sizeof(cl_float), pB, 0);
hDeviceMemC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, cnDimension * sizeof(cl_float), 0, 0);
// setup parameter values
err = 0;
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&hDeviceMemA);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&hDeviceMemB);
err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&hDeviceMemC);
if (err != CL_SUCCESS)
{
MessageBox::Show("Error: Failed to set kernel args: " + err.ToString() + "
");
}
// Get the maximum work-group size for executing the kernel on the device
err = clGetKernelWorkGroupInfo(kernel, devices[0], CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
if (err != CL_SUCCESS)
{
MessageBox::Show("Error: clGetKernelWorkGroupInfo Failed: " + err.ToString() + "
");
}
// execute kernel
err = clEnqueueNDRangeKernel(queue_cpu, kernel, 1, NULL, (size_t*)(&cnDimension), &local, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
MessageBox::Show("Error: clEnqueueNDRangeKernel Failed: " + err.ToString() + "
");
}
// copy results from device back to host
clEnqueueReadBuffer(queue_cpu, hDeviceMemC, CL_TRUE, 0, cnDimension * sizeof(cl_float),
pC, 0, NULL, NULL);
// wait for command queue
clFinish(queue_cpu);
for(i=0; i < cnDimension; i++)
{
this->rtb_log->AppendText(pC[i].ToString() + "
");
}
}
private: System::Void Form1_Load(System::Object^ sender, System::EventArgs^ e)
{
}
};
}