Results 1 to 2 of 2

Thread: Want to write from global to local and local to global memory

  1. #1
    Newbie
    Join Date
    Mar 2018
    Location
    Greensboro, NC
    Posts
    2

    Want to write from global to local and local to global memory

    Hi,
    I am very new in OpenCL. I am trying to write from global to local and then local to global memory on Intel FPGA. But I am not getting the right answer. I got all output values equal zero. Could anyone please tell me what happened?
    Thanks in advance.

    Kernel code:
    __kernel void g_l_g(__global int* restrict in, __global int* restrict out) {
    local int lmem [32];
    int gi = get_global_id(0);
    int li = get_local_id(0);
    int res = in[gi];
    #pragma unroll
    for ( int i = 0; i<32; i++){
    lmem[li] = res;
    res >>=1;}
    barrier(CLK_GLOBAL_MEM_FENCE);
    res = 0;
    #pragma unroll
    for ( int i = 0; i<32; i++){
    res ^= lmem[li];}
    out[gi] = res;}

    Host:

    unsigned N = 64; // problem size
    unsigned lmem = 32; // problem size
    const unsigned num_block_rows = N / lmem;
    for(unsigned i = 0; i < num_devices; ++i) {
    queue[i] = clCreateCommandQueue(context, device[i], CL_QUEUE_PROFILING_ENABLE, &status);
    checkError(status, "Failed to create command queue");
    const char *kernel_name = "g_l_g";
    kernel[i] = clCreateKernel(program, kernel_name, &status);
    checkError(status, "Failed to create kernel");
    n_per_device[i] = num_block_rows / num_devices; // number of elements handled by this device
    if(i < (num_block_rows % num_devices)) {
    n_per_device[i]++;
    }

    n_per_device[i] *= lmem;
    input_a_buf[i] = clCreateBuffer(context, CL_MEM_READ_ONLY,
    n_per_device[i] * sizeof(float), NULL, &status);
    checkError(status, "Failed to create buffer for input A");
    output_buf[i] = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
    n_per_device[i] * sizeof(float), NULL, &status);
    checkError(status, "Failed to create buffer for output");
    }
    return true;
    }


    if(num_devices == 0) {
    checkError(-1, "No devices");
    }
    input_a.reset(num_devices);
    output.reset(num_devices);
    ref_output.reset(num_devices);
    for(unsigned i = 0; i < num_devices; ++i) {
    input_a[i].reset(n_per_device[i]);
    output[i].reset(n_per_device[i]);
    ref_output[i].reset(n_per_device[i]);
    for(unsigned j = 0; j < n_per_device[i]; ++j) {
    input_a[i][j] = rand_float();
    ref_output[i][j] = input_a[i][j];
    }

    cl_int status;
    const double start_time = getCurrentTimestamp();
    scoped_array<cl_event> kernel_event(num_devices);
    scoped_array<cl_event> finish_event(num_devices);

    for(unsigned i = 0; i < num_devices; ++i) {
    cl_event write_event[1];
    status = clEnqueueWriteBuffer(queue[i], input_a_buf[i], CL_FALSE,
    0, n_per_device[i] * sizeof(float), input_a[i], 0, NULL, &write_event[0]);
    checkError(status, "Failed to transfer input A");
    unsigned argi = 0;
    status = clSetKernelArg(kernel[i], argi++, sizeof(cl_mem), &input_a_buf[i]);
    checkError(status, "Failed to set argument %d", argi - 1);
    status = clSetKernelArg(kernel[i], argi++, sizeof(cl_mem), &output_buf[i]);
    checkError(status, "Failed to set argument %d", argi - 1);
    const size_t global_work_size = n_per_device[i];
    const size_t local_work_size[1] = {32};
    status = clEnqueueNDRangeKernel(queue[i], kernel[i], 1, NULL,
    &global_work_size, local_work_size, 1, write_event, &kernel_event[i]);
    checkError(status, "Failed to launch kernel")
    status = clEnqueueReadBuffer(queue[i], output_buf[i], CL_FALSE,
    0, n_per_device[i] * sizeof(float), output[i], 1, &kernel_event[i], &finish_event[i]);
    checkError(status, "Failed to read output matrix");
    clReleaseEvent(write_event[0]);}

  2. #2
    Newbie
    Join Date
    Mar 2018
    Location
    Greensboro, NC
    Posts
    2
    I figured it out. Thanks, everybody.

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  
Proudly hosted by Digital Ocean