Help: Adding two arrays

I’m trying to write my first program, that adds two arrays, but it’s not working. It’s giving error on func.wait() saying “Access violation reading location 0x00000000” and I don’t know why. What am I doing wrong?


#define __NO_STD_VECTOR
#define __NO_STD_STRING

#include <malloc.h>
#include <cl.hpp>
#include <iostream>

using namespace std;
using namespace cl;

const char *kernelSrc =	"__kernel void"
						"vectorAdd(__global const int *a,"
						"__global const int *b,"
						"__global int *resultado)"						
						"{"
						"// Vector element index"
						"int nIndex = get_global_id(0);"
						"resultado[nIndex] = a[nIndex] + b[nIndex];"
						"}";


int main(void)
{	
	const int size = 4;

	// get platform
	vector<Platform> platformList;
	Platform::get(&platformList);
	cout << "Platform number is: " << platformList.size() << endl;
	
	// show platform info
	cl::string platformInfo;
	platformList[0].getInfo(CL_PLATFORM_VERSION, &platformInfo);
	cout << "Platform Info: " << platformInfo.c_str() << endl;
	
	// get available devices
	vector<Device> devices;
	platformList[0].getDevices(CL_DEVICE_TYPE_GPU, &devices);
	cout << devices.size() << " device(s) found!" << endl;

	// show device info
	cl::string name;
	devices[0].getInfo(CL_DEVICE_NAME, &name);
	cout << "Device name: " << name.c_str() << endl;
	
	// create OpenCL context
	Context hContext(devices, 0, NULL, NULL, NULL);

	// queue
	CommandQueue queue(hContext, devices[0], 0, 0);

	// Load source
	cl::Program::Sources source(1, std::make_pair(kernelSrc,strlen(kernelSrc)));   

	// create and build program
	Program program(hContext, source, NULL);
	program.build(devices, NULL, NULL, NULL);

	// Get the kernel so we can use it
	Kernel kernel(program, "vectorAdd", NULL);

	// arrays on host
	int *a = new int[size];
	int *b = new int[size];
	int *c = new int[size];
	
	// initialize
	a[0]=1; a[1]=2; a[2]=20; a[3]=8;
	b[0]=29; a[1]=28; a[2]=10; a[3]=22;
	
	// result should be {30, 30, 30, 30}
	
	// Allocate memory on device and copy the arrays there
	Buffer bufferA(hContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * size, a, NULL);
	Buffer bufferB(hContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * size, b, NULL);

	// Create buffer to get output on kernel	
	Buffer bufferResultado(hContext, CL_MEM_WRITE_ONLY, sizeof(int)*size, NULL, NULL);

	// set arguments
	kernel.setArg(0, bufferA);
	kernel.setArg(1, bufferB);
	kernel.setArg(2, bufferResultado);

	// execute kernel
	NDRange global(size);
	NDRange local(2);
	KernelFunctor func = kernel.bind(queue, global, local);	

	func().wait(); // error here.................................................................................................


	cout << "Done!" << endl;
	
	// copy from device to host
	queue.enqueueReadBuffer(bufferResultado, CL_TRUE, 0, sizeof(int)*size,  c, NULL, NULL);

	for(int i=0; i<size; i++)
		cout << c[i] << endl;	

	getchar();
	return EXIT_SUCCESS;
}

I am not sure whether I am answering you correctly or not since I am a newbie myself but the only thing which I didn’t understand was that why have you made the local(2) just before you execute the kernel. You seem to have 4 elements and your global work size is 4 so why is the local work size (2). Might be changing that to 0 could solve your problem. This is just a thought…