Hi all
I’m relatively new to the OpenCL, therefore producing tons of errors and have lots of questions.
would really appreciate if you’d help me
I am using OpenCL for Nvidia NVS 5200m quadro graphic card
basically I want to go through the grey scaled image of siye 352x288 and check if the value of it is equal to 15.0.
here’s my host code:
#include <stdio.h>
#include <stdlib.h>
#include <CL/cl.h>
#include <cv.h>
#include <create2dmap.h>
#include <Node.h>
#include <RandomTree.h>
#define MAX_SOURCE_SIZE (0x100000)
void runopencl(std::vector<Node*> Forest, cv::Mat DepthImage, int width, int height)
{
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem resultBuf = NULL;
cl_mem depthBuf = NULL;
cl_mem matrixBuf = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
int i, j;
float *result = new float[101376];
float *depthArray = new float[101376];
float Matrix[1024][16];
float *MatrixInLine = new float[1024*16];
cl_float16 Matrix16[1024];
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x++)
{depthArray[x+y*width]=DepthImage.at<float>(y,x);
result[x+y*width]=0;}
converttree(Forest, Matrix);
for (int y = 0; y < 1024; y++)
for (int x = 0; x < 16; x++)
MatrixInLine[y*16+x] = Matrix[y][x];
FILE *fp;
const char fileName[] = "D:\\USERDATA\\aevaus2\
ewADTF\\ADTFDevelopment\\src\\oclPersonSegmentationFilter\\classifypixel.cl";
size_t source_size;
char *source_str;
// Load kernel source file
fp = fopen(fileName, "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.
");
exit(1);
}
source_str = (char *)malloc(MAX_SOURCE_SIZE);
source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);
// Get Platform/Device Information
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices);
// Create OpenCL Context
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
// Create command queue
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
// Create Buffer Object
depthBuf = clCreateBuffer(context, CL_MEM_READ_WRITE, 101376*sizeof(float), NULL, &ret);
matrixBuf = clCreateBuffer(context, CL_MEM_READ_WRITE, 1024*16*sizeof(float), NULL, &ret);
resultBuf = clCreateBuffer(context, CL_MEM_READ_WRITE, 101376*sizeof(float), NULL, &ret);
/// Copy input data to the memory buffer
ret = clEnqueueWriteBuffer(command_queue, depthBuf, CL_TRUE, 0, 101376*sizeof(float), depthArray, 0, NULL, NULL);
ret = clEnqueueWriteBuffer(command_queue, matrixBuf, CL_TRUE, 0, 1024*16*sizeof(float), &MatrixInLine, 0, NULL, NULL);
ret = clEnqueueWriteBuffer(command_queue, resultBuf, CL_TRUE, 0, 101376*sizeof(float), result, 0, NULL, NULL);
// Create kernel program from source file
program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
// Create data parallel OpenCL kernel
kernel = clCreateKernel(program, "dataParallel", &ret);
// Set OpenCL kernel arguments
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&depthBuf);
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&matrixBuf);
ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&resultBuf);
size_t global_item_size[2] = {352,288};
size_t local_item_size[2] = {1,1};
// Execute OpenCL kernel as data parallel
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_item_size, local_item_size, 0, NULL, NULL);
// Transfer result to host
ret = clEnqueueReadBuffer(command_queue, resultBuf, CL_TRUE, 0, 101376*sizeof(float), result, 0, NULL, NULL);
//Finalization
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(resultBuf);
ret = clReleaseMemObject(depthBuf);
ret = clReleaseMemObject(matrixBuf);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
free(source_str);
}
and my simple kernel code
__kernel void dataParallel(__global float* depthImage, __global float* MatrixInLinePtr, __global float* result)
{
int x = get_gloabal_id(0);
int y = get_gloabal_id(1);
if ((x>=352 || y>=288) return ;
if (depthImage[x+y*width]==15.0) result[x+y*width] = 14;
}
as a result I receive error 11, error during building the program, but I don’t get what could be wrong with the kernel
thank you for your attention!