so here’s de deal Im having trouble with for 2 days or something.
I have a program I want to use a typedef struct in. These structs need to go to a kernel, where some calculation is done.
I have narrowed the problem down to the following very simple code:
main:
#include <stdio.h>
#include <stdlib.h>
#include <conio.h>
#include <math.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define MAX_kernelSize (0x100000)
typedef struct {
int x;
int y;
}Coord;
int main(void) {
int i;
const int LIST_SIZE = 100;
FILE *fp;
const char *kernelSource;
size_t kernelSize;
cl_platform_id platformID = NULL;
cl_device_id deviceID = NULL;
cl_uint numDevices;
cl_uint numPlatforms;
cl_int ret;
cl_context context;
cl_command_queue commandQueue;
cl_mem memA;
cl_mem memB;
cl_program program;
cl_kernel kernel;
size_t global_item_size = LIST_SIZE;
size_t local_item_size = 1;
int *B = (int*)calloc(LIST_SIZE, sizeof(int));
Coord *A = (Coord*)calloc(LIST_SIZE, sizeof(Coord));
for(i = 0; i < LIST_SIZE; i++)
{
A[i].x = i;
A[i].y = LIST_SIZE - i;
B[i] = 0;
}
fp = fopen("kernel.cl", "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.
");
exit(1);
}
kernelSource = (char*)malloc(MAX_kernelSize);
kernelSize = fread( kernelSource, 1, MAX_kernelSize, fp);
fclose( fp );
ret = clGetPlatformIDs(1, &platformID, &numPlatforms);
ret = clGetDeviceIDs( platformID, CL_DEVICE_TYPE_DEFAULT, 1, &deviceID, &numDevices);
context = clCreateContext( NULL, 1, &deviceID, NULL, NULL, &ret);
commandQueue = clCreateCommandQueue(context, deviceID, 0, &ret);
memB = clCreateBuffer(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int), NULL, &ret);
memA = clCreateBuffer(context, CL_MEM_WRITE_ONLY, LIST_SIZE * sizeof(Coord), NULL, &ret);
ret = clEnqueueWriteBuffer(commandQueue, memA, CL_TRUE, 0, LIST_SIZE * sizeof(Coord), A, 0, NULL, NULL);
program = clCreateProgramWithSource(context, 1, (const char **)&kernelSource, (const size_t *)&kernelSize, &ret);
ret = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL);
kernel = clCreateKernel(program, "test", &ret);
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memA);
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&memB);
ret = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);
ret = clEnqueueReadBuffer(commandQueue, memB, CL_TRUE, 0, LIST_SIZE * sizeof(int), B, 0, NULL, NULL);
for(i = 0; i < LIST_SIZE; i++)
printf("%d + %d = %d
", A[i].x, A[i].y, B[i]);
ret = clFlush(commandQueue);
ret = clFinish(commandQueue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(memA);
ret = clReleaseMemObject(memB);
ret = clReleaseCommandQueue(commandQueue);
ret = clReleaseContext(context);
free(A);
free(B);
getch();
return 0;
}
kernel:
__kernel void test(__global const struct Coord* *A, __global int *B)
{
int i = get_global_id(0);
B[i] = A[i].x + A[i].y;
}
the problem is that if I print B, I get an array of zeros
Im still a beginner in OpenCL, just starting, so I think I understood something wrong, or made another mistake. But I really can’t think of anything anymore…
So please help?