Perhaps more information would aid in figuring out the problem. So here is the host program, note this is an adaption of the “Hello World” program which I’m sure you all have seen before online.
Host Program:
#include <utility> // for pairs
#define __NO_STD_VECTOR // Use cl::vector instead of STL version
#include <CL/cl.hpp>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
#include <iterator>
using namespace std;
void chkError(cl_int, string);
int main (int argc, char * argv[]) {
// Get a list of available OpenCL Platforms
cl_int err;
cl::vector <cl::Platform> platforms;
cl::Platform::get(&platforms);
// See if there was an error getting the platforms
chkError (platforms.size()!=0 ? CL_SUCCESS : -1,
"cl::Platform::get()");
cout << "Platforms returned from CL: " << platforms.size()
<< endl;
// Get the vendor of the first returned platform
string platformVendor;
platforms[0].getInfo((cl_platform_info)CL_PLATFORM_VENDOR,
&platformVendor);
cout << "Platform Vendor: " << platformVendor << endl;
// Setup the context of our OpenCL program
cl_context_properties cprops[3] =
{CL_CONTEXT_PLATFORM,
(cl_context_properties)(platforms[0])(), 0};
cl::Context context(
CL_DEVICE_TYPE_GPU,
cprops,
NULL,
NULL,
&err);
// Make sure there were no errors
chkError(err, "Context::Context()");
// This creates some memory for our OpenCL program to use
// directly, giving it a pointer to the memory and giving it
// write access
unsigned int * outH = new unsigned int[2];
cl::Buffer outCL(
context,
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
2,
outH,
&err);
// Make sure there were no errors
chkError(err, "Buffer::Buffer()");
unsigned int * input = new unsigned int[2];
input[0]=555;
input[1]=666;
cl::Buffer inCL(
context,
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
2,
input,
&err);
// Gives us a list of devices to be used in the context
cl::vector<cl::Device> devices;
devices = context.getInfo<CL_CONTEXT_DEVICES>();
// Make sure there were no errors
chkError(devices.size() > 0 ? CL_SUCCESS : -1,
"cl::Context::getInfo()");
// Open our OpenCL program for reading
ifstream file;
file.open("helloWorld_kernels.cl");
// Make sure there were no errors
chkError(file.is_open() ? CL_SUCCESS : -1,
"ifstream(helloWorld_kernels.cl)");
// Create a string of our source
string prog(istreambuf_iterator<char>(file),
(istreambuf_iterator<char>()));
// Create a source from our source code string
cl::Program::Sources source(
1,
make_pair(prog.c_str(), prog.length()+1));
// Compile and build the program for source for our device
cl::Program program(context, source);
err = program.build(devices,"");
chkError(err, "Program::build()");
// Build a kernel and tell it to start in the hello() method
cl::Kernel kernel(program, "hello", &err);
chkError(err, "Kernel::Kernel()");
// Set the argument at index 0 for our kernel to be the buffer
// we made for the program
err = kernel.setArg(0, outCL);
chkError(err, "Kernel::setArg(0)");
err = kernel.setArg(1, inCL);
chkError(err, "Kernel::setArg(1)");
// Create a queue of work to be done on device[0]
cl::CommandQueue queue(context, devices[0], 0, &err);
chkError(err, "CommandQueue::CommandQueue()");
cl::Event event;
// Enqueue the workitems on the device
err = queue.enqueueNDRangeKernel(
kernel,
cl::NullRange,
cl::NDRange(2),
cl::NDRange(1, 1),
NULL,
&event);
chkError(err, "CommandQueue::enqueueNDRangeKernel()");
// Blocks until all work items have been completed
event.wait();
// Reads the buffer outCL which was updated by the kernel into
// the local variable outH
err = queue.enqueueReadBuffer(
outCL,
CL_TRUE,
0,
2,
outH);
chkError(err, "CommandQueue::enqueueReadBuffer()");
cout << "0: " << outH[0] << endl;
cout << "1: " << outH[1] << endl;
return 0;
}
void chkError (cl_int status, string name) {
if (status != CL_SUCCESS) {
cerr << "::ERROR:: " << name << endl;
exit (EXIT_FAILURE);
}
}
And the full kernel code, once again:
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
//__constant char hw[] = "Hello World
";
__kernel void hello(__global uint * out, __global uint * hw)
{
size_t tid = get_global_id(0);
out[tid] = hw[tid];
}