Results 1 to 3 of 3

Thread: Matrix Addition - what is wrong?

  1. #1

    Matrix Addition - what is wrong?

    Hello,

    I am kinda new to the OpenCL and I tried to write this small testing program of matrix add (don't get confused by the kernel function name )

    The code runs successfully, however the output matrix is full of zeros instead of the proper result 3.

    Could You please help me find the mistake in my code?

    Thanks


    #include <stdlib.h>
    #include <stdio.h>

    #include <CL/cl.h>


    const char *OpenCLSource[] = {
    "void MatMul(__global int* matAA, __global int* matBB, __global int* matCC)",
    "{",
    " unsigned int i = get_global_id(0);",
    " matCC[i] = matAA[i] + matBB[i];",
    "}"
    };


    // Main function
    // ************************************************** *******************
    int main(int argc, char *argv[])
    {
    int clerror = CL_SUCCESS;

    cl_mem matAA;
    cl_mem matBB;
    cl_mem matCC;

    const int dsize = 16 * 16;

    int matA[dsize];
    int matB[dsize];
    int matC[dsize];

    for(int i = 0; i < dsize; i++)
    {
    //matA[i] = matB[i] = rand();
    matA[i] = 1;
    matB[i] = 2;
    }

    // Query platform ID
    cl_platform_id platform;
    clGetPlatformIDs (1, &platform, NULL);

    // Setup context properties
    cl_context_properties props[3];
    props[0] = (cl_context_properties)CL_CONTEXT_PLATFORM;
    props[1] = (cl_context_properties)platform;
    props[2] = (cl_context_properties)0;

    // Create a context to run OpenCL on our CUDA-enabled NVIDIA GPU
    cl_context GPUContext = clCreateContextFromType(props, CL_DEVICE_TYPE_GPU,NULL, NULL, NULL);

    // Get the list of GPU devices associated with this context
    size_t ParmDataBytes;
    clGetContextInfo(GPUContext, CL_CONTEXT_DEVICES, 0, NULL, &ParmDataBytes);
    cl_device_id* GPUDevices = (cl_device_id*)malloc(ParmDataBytes);
    clGetContextInfo(GPUContext, CL_CONTEXT_DEVICES, ParmDataBytes, GPUDevices, NULL);

    // Create a command-queue on the first GPU device
    cl_command_queue GPUCommandQueue = clCreateCommandQueue(GPUContext, GPUDevices[0], 0, NULL);

    // Allocate GPU memory for source vectors AND initialize from CPU memory
    matAA = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * dsize, matA, NULL);
    matBB = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * dsize, matB, NULL);
    matCC = clCreateBuffer(GPUContext, CL_MEM_WRITE_ONLY, sizeof(int) * dsize, NULL, NULL);

    // Create OpenCL program with source code
    cl_program OpenCLProgram = clCreateProgramWithSource(GPUContext, 18, OpenCLSource, NULL, NULL);

    // Build the program (OpenCL JIT compilation)
    clBuildProgram(OpenCLProgram, 0, NULL, NULL, NULL, NULL);

    // Create a handle to the compiled OpenCL function (Kernel)
    cl_kernel matMulKernel = clCreateKernel(OpenCLProgram, "MatMul", NULL);

    size_t global_work_size[1];
    size_t local_work_size[1];

    global_work_size[0] = dsize;
    local_work_size[0] = dsize;

    // In the next step we associate the GPU memory with the Kernel arguments
    clSetKernelArg(matMulKernel, 0, sizeof(cl_mem), (void*)&matA);
    clSetKernelArg(matMulKernel, 1, sizeof(cl_mem), (void*)&matB);
    clSetKernelArg(matMulKernel, 2, sizeof(cl_mem), (void*)&matC);

    // Launch the Kernel on the GPU
    clEnqueueNDRangeKernel(GPUCommandQueue, matMulKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);

    // Copy the output in GPU memory back to CPU memory
    clEnqueueReadBuffer(GPUCommandQueue, matCC, CL_TRUE, 0, global_work_size[0], matC, 0, NULL, NULL);

    // Print out the results
    for (int i = 0; i < 10; i++)
    {
    printf("%d\n", matC[i]);
    }

    // Cleanup
    free(GPUDevices);
    clReleaseKernel(matMulKernel);
    clReleaseProgram(OpenCLProgram);
    clReleaseCommandQueue(GPUCommandQueue);
    clReleaseContext(GPUContext);
    clReleaseMemObject(matAA);
    clReleaseMemObject(matBB);
    clReleaseMemObject(matCC);

    getchar();

    return 0;
    }

  2. #2
    Senior Member
    Join Date
    May 2010
    Location
    Toronto, Canada
    Posts
    845

    Re: Matrix Addition - what is wrong?

    I'm almost surprised that the code didn't crash.

    This is always the first step you should follow: check the error code returned by all functions. Currently the code is not checking for any errors.

    Second, the issue appear to be here:

    Code :
    clSetKernelArg(matMulKernel, 0, sizeof(cl_mem), (void*)&matA);
    clSetKernelArg(matMulKernel, 1, sizeof(cl_mem), (void*)&matB);
    clSetKernelArg(matMulKernel, 2, sizeof(cl_mem), (void*)&matC);

    Do you see it now? matA, matB and matC are your local arrays, not the CL buffers. What you were trying to do is this:

    Code :
    clSetKernelArg(matMulKernel, 0, sizeof(cl_mem), (void*)&matAA);
    clSetKernelArg(matMulKernel, 1, sizeof(cl_mem), (void*)&matBB);
    clSetKernelArg(matMulKernel, 2, sizeof(cl_mem), (void*)&matCC);

    I recommend using clearly different names for local variables and for CL objects to avoid this error.
    Disclaimer: Employee of Qualcomm Canada. Any opinions expressed here are personal and do not necessarily reflect the views of my employer. LinkedIn profile.

  3. #3

    Re: Matrix Addition - what is wrong?

    Thank You very much for reply. In the end it crashed as You said

    However there was a mistake a bit earlier. In the definition of the kernel the name of the function was not preceded by the keyword __kernel so the clCreateKernel function did not create the right kernel and everything afterwards just passed without computing anything.

    Problema solved!!!

Similar Threads

  1. Replies: 0
    Last Post: 06-30-2012, 11:24 PM
  2. matrix addition problem
    By bubu in forum OpenCL
    Replies: 6
    Last Post: 04-23-2011, 02:29 PM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  
Proudly hosted by Digital Ocean