passing array of typedef'd structs to kernel

so here’s de deal Im having trouble with for 2 days or something.
I have a program I want to use a typedef struct in. These structs need to go to a kernel, where some calculation is done.
I have narrowed the problem down to the following very simple code:
main:


#include <stdio.h>
#include <stdlib.h>
#include <conio.h>
#include <math.h>
 
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
 
#define MAX_kernelSize (0x100000)

typedef struct {
	int x;
	int y;
}Coord;
 


int main(void) {

    int i;
    const int LIST_SIZE = 100;
	FILE *fp;
    const char *kernelSource;
	size_t kernelSize;
    cl_platform_id platformID = NULL;
    cl_device_id deviceID = NULL;   
    cl_uint numDevices;
    cl_uint numPlatforms;
	cl_int ret;
	cl_context context;
	cl_command_queue commandQueue;
	cl_mem memA;
	cl_mem memB;
	cl_program program;
	cl_kernel kernel;
	size_t global_item_size = LIST_SIZE;
    size_t local_item_size = 1;
    int *B = (int*)calloc(LIST_SIZE, sizeof(int));
	Coord *A = (Coord*)calloc(LIST_SIZE, sizeof(Coord));

	for(i = 0; i < LIST_SIZE; i++)
	{
		A[i].x = i;
		A[i].y = LIST_SIZE - i;

		B[i] = 0;
	}

	fp = fopen("kernel.cl", "r");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.
");
        exit(1);
    }
    kernelSource = (char*)malloc(MAX_kernelSize);
    kernelSize = fread( kernelSource, 1, MAX_kernelSize, fp);
    fclose( fp );
	
    
	ret = clGetPlatformIDs(1, &platformID, &numPlatforms);
    ret = clGetDeviceIDs( platformID, CL_DEVICE_TYPE_DEFAULT, 1, &deviceID, &numDevices);
 
    context = clCreateContext( NULL, 1, &deviceID, NULL, NULL, &ret);
 
    commandQueue = clCreateCommandQueue(context, deviceID, 0, &ret);
 
    memB = clCreateBuffer(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int), NULL, &ret);
    memA = clCreateBuffer(context, CL_MEM_WRITE_ONLY, LIST_SIZE * sizeof(Coord), NULL, &ret);
 
    ret = clEnqueueWriteBuffer(commandQueue, memA, CL_TRUE, 0, LIST_SIZE * sizeof(Coord), A, 0, NULL, NULL);
     
    program = clCreateProgramWithSource(context, 1, (const char **)&kernelSource, (const size_t *)&kernelSize, &ret);
 
    ret = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL);
 
    kernel = clCreateKernel(program, "test", &ret);
 
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memA);
    ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&memB);
 
    ret = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);
 
    ret = clEnqueueReadBuffer(commandQueue, memB, CL_TRUE, 0, LIST_SIZE * sizeof(int), B, 0, NULL, NULL);
 
    for(i = 0; i < LIST_SIZE; i++)
		printf("%d + %d = %d
", A[i].x, A[i].y, B[i]);
 
    ret = clFlush(commandQueue);
    ret = clFinish(commandQueue);
    ret = clReleaseKernel(kernel);
    ret = clReleaseProgram(program);
    ret = clReleaseMemObject(memA);
    ret = clReleaseMemObject(memB);
    ret = clReleaseCommandQueue(commandQueue);
    ret = clReleaseContext(context);
    free(A);
    free(B);

	getch();
    return 0;
}

kernel:


__kernel void test(__global const struct Coord* *A, __global int *B)
{
    int i = get_global_id(0);
	B[i] = A[i].x + A[i].y;
}

the problem is that if I print B, I get an array of zeros :stuck_out_tongue:
Im still a beginner in OpenCL, just starting, so I think I understood something wrong, or made another mistake. But I really can’t think of anything anymore…
So please help? :slight_smile:

Did you check whether any of the function calls returns an error code?

but if it would give an error it would also break the program isn’t it? (developing in Visual Studio 2008)
And how do I exactly check for erros? I know those function calls return an error code that’s caught in ‘ret’, but how do I specificly check for errors?

You might send the error code to a switch which converts them into a string and print it, something like this for example:


    bool checkError(cl_int errMsg, const char *at)
    {
        char errorMessage[255];

        switch(errMsg)
        {
            case CL_SUCCESS: strcpy(errorMessage, "CL_SUCCESS"); return true;
            case CL_DEVICE_NOT_FOUND: strcpy(errorMessage, "CL_DEVICE_NOT_FOUND"); break;
            case CL_DEVICE_NOT_AVAILABLE: strcpy(errorMessage, "CL_DEVICE_NOT_AVAILABLE"); break;
            case CL_COMPILER_NOT_AVAILABLE: strcpy(errorMessage, "CL_COMPILER_NOT_AVAILABLE"); break;
            case CL_MEM_OBJECT_ALLOCATION_FAILURE: strcpy(errorMessage, "CL_MEM_OBJECT_ALLOCATION_FAILURE"); break;
            case CL_OUT_OF_RESOURCES: strcpy(errorMessage, "CL_OUT_OF_RESOURCES"); break;
            case CL_OUT_OF_HOST_MEMORY: strcpy(errorMessage, "CL_OUT_OF_HOST_MEMORY"); break;
            case CL_PROFILING_INFO_NOT_AVAILABLE: strcpy(errorMessage, "CL_PROFILING_INFO_NOT_AVAILABLE"); break;
            case CL_MEM_COPY_OVERLAP: strcpy(errorMessage, "CL_MEM_COPY_OVERLAP"); break;
            case CL_IMAGE_FORMAT_MISMATCH: strcpy(errorMessage, "CL_IMAGE_FORMAT_MISMATCH"); break;
            case CL_IMAGE_FORMAT_NOT_SUPPORTED: strcpy(errorMessage, "CL_IMAGE_FORMAT_NOT_SUPPORTED"); break;
            case CL_BUILD_PROGRAM_FAILURE: strcpy(errorMessage, "CL_BUILD_PROGRAM_FAILURE"); break;
            case CL_MAP_FAILURE: strcpy(errorMessage, "CL_MAP_FAILURE"); break;
            case CL_MISALIGNED_SUB_BUFFER_OFFSET: strcpy(errorMessage, "CL_MISALIGNED_SUB_BUFFER_OFFSET"); break;
            case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: strcpy(errorMessage, "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"); break;
            case CL_INVALID_VALUE: strcpy(errorMessage, "CL_INVALID_VALUE"); break;
            case CL_INVALID_DEVICE_TYPE: strcpy(errorMessage, "CL_INVALID_DEVICE_TYPE"); break;
            case CL_INVALID_PLATFORM: strcpy(errorMessage, "CL_INVALID_PLATFORM"); break;
            case CL_INVALID_DEVICE: strcpy(errorMessage, "CL_INVALID_DEVICE"); break;
            case CL_INVALID_CONTEXT: strcpy(errorMessage, "CL_INVALID_CONTEXT"); break;
            case CL_INVALID_QUEUE_PROPERTIES: strcpy(errorMessage, "CL_INVALID_QUEUE_PROPERTIES"); break;
            case CL_INVALID_COMMAND_QUEUE: strcpy(errorMessage, "CL_INVALID_COMMAND_QUEUE"); break;
            case CL_INVALID_HOST_PTR: strcpy(errorMessage, "CL_INVALID_HOST_PTR"); break;
            case CL_INVALID_MEM_OBJECT: strcpy(errorMessage, "CL_INVALID_MEM_OBJECT"); break;
            case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: strcpy(errorMessage, "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"); break;
            case CL_INVALID_IMAGE_SIZE: strcpy(errorMessage, "CL_INVALID_IMAGE_SIZE"); break;
            case CL_INVALID_SAMPLER: strcpy(errorMessage, "CL_INVALID_SAMPLER"); break;
            case CL_INVALID_BINARY: strcpy(errorMessage, "CL_INVALID_BINARY"); break;
            case CL_INVALID_BUILD_OPTIONS: strcpy(errorMessage, "CL_INVALID_BUILD_OPTIONS"); break;
            case CL_INVALID_PROGRAM: strcpy(errorMessage, "CL_INVALID_PROGRAM"); break;
            case CL_INVALID_PROGRAM_EXECUTABLE: strcpy(errorMessage, "CL_INVALID_PROGRAM_EXECUTABLE"); break;
            case CL_INVALID_KERNEL_NAME: strcpy(errorMessage, "CL_INVALID_KERNEL_NAME"); break;
            case CL_INVALID_KERNEL_DEFINITION: strcpy(errorMessage, "CL_INVALID_KERNEL_DEFINITION"); break;
            case CL_INVALID_KERNEL: strcpy(errorMessage, "CL_INVALID_KERNEL"); break;
            case CL_INVALID_ARG_INDEX: strcpy(errorMessage, "CL_INVALID_ARG_INDEX"); break;
            case CL_INVALID_ARG_VALUE: strcpy(errorMessage, "CL_INVALID_ARG_VALUE"); break;
            case CL_INVALID_ARG_SIZE: strcpy(errorMessage, "CL_INVALID_ARG_SIZE"); break;
            case CL_INVALID_KERNEL_ARGS: strcpy(errorMessage, "CL_INVALID_KERNEL_ARGS"); break;
            case CL_INVALID_WORK_DIMENSION: strcpy(errorMessage, "CL_INVALID_WORK_DIMENSION"); break;
            case CL_INVALID_WORK_GROUP_SIZE: strcpy(errorMessage, "CL_INVALID_WORK_GROUP_SIZE"); break;
            case CL_INVALID_WORK_ITEM_SIZE: strcpy(errorMessage, "CL_INVALID_WORK_ITEM_SIZE"); break;
            case CL_INVALID_GLOBAL_OFFSET: strcpy(errorMessage, "CL_INVALID_GLOBAL_OFFSET"); break;
            case CL_INVALID_EVENT_WAIT_LIST: strcpy(errorMessage, "CL_INVALID_EVENT_WAIT_LIST"); break;
            case CL_INVALID_EVENT: strcpy(errorMessage, "CL_INVALID_EVENT"); break;
            case CL_INVALID_OPERATION: strcpy(errorMessage, "CL_INVALID_OPERATION"); break;
            case CL_INVALID_GL_OBJECT: strcpy(errorMessage, "CL_INVALID_GL_OBJECT"); break;
            case CL_INVALID_BUFFER_SIZE: strcpy(errorMessage, "CL_INVALID_BUFFER_SIZE"); break;
            case CL_INVALID_MIP_LEVEL: strcpy(errorMessage, "CL_INVALID_MIP_LEVEL"); break;
            case CL_INVALID_GLOBAL_WORK_SIZE: strcpy(errorMessage, "CL_INVALID_GLOBAL_WORK_SIZE"); break;
            case CL_INVALID_PROPERTY: strcpy(errorMessage, "CL_INVALID_PROPERTY"); break;
            default: strcpy(errorMessage, "unknown error"); break;
        }

        std::cout << errorMessage << at << std::endl;

        return false;
    }

right… How can I get it to recognize the bool correctly? I’ve already searched different sites and some programming guides, but I can’t find it. As far as I understood including stdbool.h has to be the solution, but when compiling Visual Studio says it can’t open it. I also can’t find the file by myself. (maybe good to mention that I’m working with a Nvidia GPU, so I have to make use of the CUDA toolkit etc.)

ow, and when we’re busy, can somebody explain this to me:

std::cout << errorMessage << at << std::endl;

I have seen this kind more, but Im still using printf, and I’d like to get it explained.
C is a bit new for me, since we’ve been teached only C# at school (“this is the new standard everybody is going to use, C and C++ is almost not used anymore”, yea, until you come in the real bussiness world :P)

so I’ve been able to solve my problem in a test-code, wich only function it was to test with the typedef struct. That worked, so I was all-happy, and modified it to do what it was supposed to do.
But now there’s a problem with clEnqueueNDRangeKernel, as it keeps stubbornly saying that there is a fault with the local workgroup size, whatever I try…
main code: http://pastebin.com/S6R6t3iF
kernel code: http://pastebin.com/Mrhr8B4v

The problem it throws is CL_INVALID_WORK_GROUP_SIZE

can somebody lend me a hand in solving this?

why can’t I edit my posts…? :expressionless:

I also found some solutions wich state that you have to query the maximum local work size of the kernel (more complex kernels have a smaller maximum work size) I also tried that, but that gives 1024, the same as clGetDeviceInfo gives.

Look harder! You’ve got the local worksize and global worksize swapped around.

localworksize = 128, global worksize = 16

but why should it be swapped around? this is, for future expansions, what I want, why isn’t it possible then?

Re-read the manual about work-groups and it should make perfect sense.

I think nozted is recommending you to have a look at the man page for clEnqueueNDRangeKernel and perhaps section 3.2 of the OpenCL 1.1. specification as well.