strange output

hello folks,

[i am newbi]

im working on a code more complex than below where i encounter a problem but the below code is the ‘heart’ of the problem

host file: create 2 arrays startpt,endpt , array startpt is random and endpt:=f(startpt) from kernel.
problem: i print the buffer endpt after the kernel has finished and it prints me 0s but i wait for values like startpt.

maybe something i dont do it right, if you see something strange in my code,please write it.

host file


#include <stdio.h>
#include <stdlib.h>

#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif

#define MAX_SOURCE_SIZE (0x100000)

#define MAX_VECTORS 96
#define NVARS       8


int timeval_subtract (struct timeval *result,struct timeval *x,struct timeval *y);

int main(void) {
    int i, j, jj,nvars=NVARS,max_vectors=MAX_VECTORS;
    double startpt[NVARS*MAX_VECTORS],endpt[NVARS*MAX_VECTORS];

    // Load the kernel source code into the array source_str
    FILE *fp;
    char *source_str;
    size_t source_size;

    fp = fopen("kernels.cl", "r");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.
");
        exit(1);
    }
    source_str = (char*)malloc(MAX_SOURCE_SIZE);
    source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
    fclose( fp );

    // Get platform and device information
    cl_platform_id platform_id = NULL;
    cl_device_id device_id = NULL;
    cl_uint ret_num_devices;
    cl_uint ret_num_platforms;

    cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    if(ret != CL_SUCCESS) {
      printf("clGetPlatformIDs failed
");
      exit(-1);
    }

    ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1,&device_id, &ret_num_devices);
    if(ret != CL_SUCCESS) {
      printf("clGetPlatformIDs failed
");
      exit(-1);
    }


    // Create an OpenCL context
    cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
    if(ret != CL_SUCCESS || context == NULL) {
      printf("clCreateContext failed
");
      exit(-1);
    }


    // Create a command queue
    cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
    if(ret != CL_SUCCESS || command_queue == NULL) {
      printf("clCreateCommandQueue failed
");
      exit(-1);
    }

    // Create memory buffers on the device for each vector 
    cl_mem startpt_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,NVARS*MAX_VECTORS * sizeof(double), startpt, &ret);
    if(ret != CL_SUCCESS || startpt_obj == NULL) {
      printf("clCreateBuffer failed
");
      exit(-1);
    }

    cl_mem endpt_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY,NVARS*MAX_VECTORS * sizeof(double), NULL, &ret);
    if(ret != CL_SUCCESS || endpt_obj == NULL) {
      printf("clCreateBuffer failed
");
      exit(-1);
    }

    // Copy the lists A and B to their respective memory buffers
    ret = clEnqueueWriteBuffer(command_queue, startpt_obj, CL_TRUE, 0,NVARS*MAX_VECTORS * sizeof(double), startpt, 0, NULL, NULL);

    // Create a program from the kernel source
    cl_program program = clCreateProgramWithSource(context, 1,(const char **)&source_str, (const size_t *)&source_size, &ret);
    if(ret != CL_SUCCESS) {
      printf("clCreateProgramWithSource failed
");
      exit(-1);
    }

    // Build the program
    ret = clBuildProgram(program, 1, &device_id,"-w", NULL, NULL);

    // If there are build errors, print them to the screen
    if(ret != CL_SUCCESS) {
      printf("Program failed to build.
");
      cl_build_status buildStatus;
      clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_STATUS,
                       sizeof(cl_build_status), &buildStatus, NULL);

      printf("%d

",buildStatus);

      char *buildLog;
      size_t buildLogSize;
      clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
                       0, NULL, &buildLogSize);
      buildLog = (char*)malloc(buildLogSize);
      if(buildLog == NULL) {
         perror("malloc");
         exit(-1);
      }
      clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
                       buildLogSize, buildLog, NULL);
      buildLog[buildLogSize-1] = '\0';
      printf(" Build Log:
%s
", buildLog);
      free(buildLog);
      exit(0);
    }

    // Create the OpenCL kernel
    cl_kernel kernel = clCreateKernel(program, "multistart", &ret);
    if(ret != CL_SUCCESS) {
       printf("clCreateKernel failed
");
       exit(-1);
    }
    // Set the arguments of the kernel
    ret  = clSetKernelArg(kernel, 0, sizeof(int), &nvars);
    ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &startpt_obj);
    ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &endpt_obj);
    ret |= clSetKernelArg(kernel, 3, sizeof(int), &max_vectors);
    if(ret != CL_SUCCESS) {
       printf("clSetKernelArg failed
");
       exit(-1);
    }


        for(j = 0; j < NVARS*MAX_VECTORS; j++)
                startpt[j]=rand() / (double)RAND_MAX * 4.0 - 2.0 ;

        size_t global_item_size = MAX_VECTORS;
        size_t local=32;
        ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,&global_item_size,&local, 0, NULL, NULL);

        if(ret != CL_SUCCESS) {
                printf("clEnqueueNDRangeKernel failed
");
                exit(-1);
        }

        clFinish(command_queue);

        ret = clEnqueueReadBuffer(command_queue, endpt_obj, CL_TRUE, 0,NVARS*MAX_VECTORS * sizeof(double), endpt, 0, NULL, NULL);
printf("%d
",ret);
        if(ret != CL_SUCCESS) {
                printf("ReadBuffer endpt failed
");
                exit(-1);
        }

    // Display the result to the screen
    for(i = 0; i < NVARS*MAX_VECTORS; i++){
         if(i%NVARS==0 && i>0)
                printf(" 
");

         printf("%lf ", startpt[i]);
    }
    printf("

");

    // Display the result to the screen
    for(i = 0; i < NVARS*MAX_VECTORS; i++){
         if(i%NVARS==0 && i>0)
        printf("
");

         printf("%lf ", endpt[i]);
    }
}

kernel file



#pragma OPENCL EXTENSION cl_khr_fp64: enable
#define fabs(x) x>=0.0?x:-x

__kernel void multistart(int nvars,__global double *pstartpt,__global double *pendpt,int max_vectors) {

    int j;

    int id = get_global_id(0);

    for(j=0;j<nvars*max_vectors;j++){
        if(j>=id*nvars && j<(id+1)*nvars)
                pendpt[j]=pstartpt[j];
        else if (j>=(id+1)*nvars)
                break;
    }

}


The problem appears to be this:



ret = clEnqueueWriteBuffer(command_queue, startpt_obj, CL_TRUE, 0,NVARS*MAX_VECTORS * sizeof(double), startpt, 0, NULL, NULL);

[...]

for(j = 0; j < NVARS*MAX_VECTORS; j++)
                startpt[j]=rand() / (double)RAND_MAX * 4.0 - 2.0 ;


You are copying startpt into starpt_obj first, and only later you store random numbers into starpt. You need to do it the other way around. You must first initialize starpt with random numbers and later you need to call clEnqueueWriteBuffer() to copy that data into starpt_obj.

pffff…thank you sir!