Results 1 to 2 of 2

Thread: Final Sum Strategy

  1. #1
    Newbie
    Join Date
    Jan 2018
    Posts
    1

    Final Sum Strategy

    Hello, for my class project i need to implement sum reduction using 1k or more array with float's. I would like to compute the final sum by calling the kernel "X" time's, i just can't understand how to do it right, if i have 512 work groups how many time's i have to call it? and why? which functions i should use to call it again? how can i add the returned value and send it again?

    Part of main code:

    error = clGetPlatformIDs(2, cp_Platform, NULL); //array with two devices

    error = clGetDeviceIDs(cp_Platform[1], CL_DEVICE_TYPE_GPU, 1, &Device_ID, NULL); // cp_platform[1] = Nvidia GPU

    context = clCreateContext(NULL, 1, &Device_ID, NULL, NULL, &error); // creating openCL context ----> error 2

    queue = clCreateCommandQueue(context, Device_ID, 0, &error); // creating command queue, executing openCL context on device cp_Platform[1] ****

    program = clCreateProgramWithSource(context, 1, (const char **)& kernelSource, (const size_t *)&source_size, &error); //this function creates a program object for this specific openCL

    error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); //compiles and links a program executable from the program source

    kernel = clCreateKernel(program, "GPUfunction", &error); //creating kernel object

    d_a = clCreateBuffer(context, CL_MEM_READ_ONLY, n * sizeof(float), NULL, NULL);

    d_b = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float), NULL, NULL);

    error = clEnqueueWriteBuffer(queue, d_a, CL_TRUE, 0, n * sizeof(float), h_a, 0, NULL, NULL); //Enqueue commands to write to a buffer object from host memory.

    error |= clEnqueueWriteBuffer(queue, d_b, CL_TRUE, 0, sizeof(float), h_s, 0, NULL, NULL); //Enqueue commands to write to a buffer object from host memory.

    error |= clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a); //Used to set the argument value for a specific argument of a kernel.

    error |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b);

    error |= clSetKernelArg(kernel, 2, sizeof(int), &n);

    error |= clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize, 0, NULL, NULL); // Enqueues a command to execute a kernel on a device.

    clFinish(queue);

    clEnqueueReadBuffer(queue, d_b, CL_TRUE, 0, sizeof(float), h_s, 0, NULL, NULL); ////writing data from the device (d_b) to host(h_b)



    Kernel code:

    __kernel void GPUfunction(__global float *vec, __global float *outPutSum, int n)
    {
    __local float tempSum[512];

    int i;
    int globalID = get_global_id(0);
    int tid = get_local_id(0);
    int BlockDIM = get_local_size(0);


    if (globalID < n)
    {
    tempSum[tid] = vec[globalID];

    }
    else
    {
    tempSum[tid] = 0;
    }
    barrier(CLK_LOCAL_MEM_FENCE);

    for (i = BlockDIM / 2; i > 0; i /= 2)
    {
    barrier(CLK_LOCAL_MEM_FENCE);
    if (tid < i)
    {

    tempSum[tid] += tempSum[tid + i];
    }


    }


    if (tid == 0)
    {
    outPutSum[get_group_id(0)] = tempSum[0];
    }

    }

  2. #2
    Senior Member
    Join Date
    Apr 2015
    Posts
    310
    , if i have 512 work groups how many time's i have to call it?
    This means problem size is reduced by 512 times* after each iteration. At some point it will be <= 512, which means it is the last iteration.

    * Hint: you need to think of the case it is not divisible by 512.

    which functions i should use to call it again?
    You simply use swap buffers you used on previous iterations and do this part again:

    error |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b);

    error |= clSetKernelArg(kernel, 2, sizeof(int), &n);

    error |= clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize, 0, NULL, NULL); // Enqueues a command to execute a kernel on a device.
    Also, size of the buffer d_b should be proportional to the number of workgroups.
    Last edited by Salabar; 01-12-2018 at 02:28 AM.

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  
Proudly hosted by Digital Ocean