Results 1 to 3 of 3

Thread: Error : CL_BUILD_PROGRAM_FAILURE, the source code seems ok

  1. #1
    Join Date
    Aug 2014

    Error : CL_BUILD_PROGRAM_FAILURE, the source code seems ok

    Hi all

    I'm relatively new to the OpenCL, therefore producing tons of errors and have lots of questions.
    would really appreciate if you'd help me

    I am using OpenCL for Nvidia NVS 5200m quadro graphic card

    basically I want to go through the grey scaled image of siye 352x288 and check if the value of it is equal to 15.0.
    here's my host code:

    Code :
    #include <stdio.h>	
    #include <stdlib.h>
    #include <CL/cl.h>	
    #include <cv.h>
    #include <create2dmap.h>
    #include <Node.h>
    #include <RandomTree.h>
    #define MAX_SOURCE_SIZE (0x100000)	
    void runopencl(std::vector<Node*> Forest, cv::Mat DepthImage, int width, int height)
    	cl_platform_id platform_id = NULL;
    	cl_device_id device_id = NULL;
    	cl_context context = NULL;
    	cl_command_queue command_queue = NULL;
    	cl_mem resultBuf = NULL;	
    	cl_mem depthBuf = NULL;
    	cl_mem matrixBuf = NULL;
    	cl_program program = NULL;
    	cl_kernel kernel = NULL;	
    	cl_uint ret_num_devices;
    	cl_uint ret_num_platforms;
    	cl_int ret;
    	int i, j;
    	float *result = new float[101376];
    	float *depthArray = new float[101376];
    	float Matrix[1024][16];
    	float *MatrixInLine = new float[1024*16];
    	cl_float16 Matrix16[1024];
    	for (int y = 0; y < height; y++)
    		for (int x = 0; x < width; x++)
    	converttree(Forest, Matrix);
    	for (int y = 0; y < 1024; y++)
    		for (int x = 0; x < 16; x++)
    			MatrixInLine[y*16+x] = Matrix[y][x];
    	FILE *fp;
    	const char fileName[] = "D:\\USERDATA\\aevaus2\\newADTF\\ADTFDevelopment\\src\\oclPersonSegmentationFilter\\";
    	size_t source_size;
    	char *source_str;
    	// Load kernel source file 
    	fp = fopen(fileName, "r");
    	if (!fp) {
    		fprintf(stderr, "Failed to load kernel.\n");	
    	source_str = (char *)malloc(MAX_SOURCE_SIZE);
    	source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
    	// Get Platform/Device Information
    	ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);	
    	ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices);
     	// Create OpenCL Context 
    	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
     	// Create command queue 
    	command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
    	// Create Buffer Object 
    	depthBuf = clCreateBuffer(context, CL_MEM_READ_WRITE, 101376*sizeof(float), NULL, &ret);
    	matrixBuf = clCreateBuffer(context, CL_MEM_READ_WRITE, 1024*16*sizeof(float), NULL, &ret); 
    	resultBuf = clCreateBuffer(context, CL_MEM_READ_WRITE, 101376*sizeof(float), NULL, &ret);
    	/// Copy input data to the memory buffer 
    	ret = clEnqueueWriteBuffer(command_queue,  depthBuf, CL_TRUE, 0, 101376*sizeof(float), depthArray, 0, NULL, NULL);
    	ret = clEnqueueWriteBuffer(command_queue, matrixBuf, CL_TRUE, 0, 1024*16*sizeof(float), &MatrixInLine, 0, NULL, NULL);
    	ret = clEnqueueWriteBuffer(command_queue, resultBuf, CL_TRUE, 0, 101376*sizeof(float), result, 0, NULL, NULL);
    	// Create kernel program from source file
    	program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);	
    	ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
    	// Create data parallel OpenCL kernel 	
    	kernel = clCreateKernel(program, "dataParallel", &ret);
    	// Set OpenCL kernel arguments 
    	ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&depthBuf);
    	ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&matrixBuf);
    	ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&resultBuf);
    	size_t global_item_size[2] = {352,288};
    	size_t local_item_size[2] = {1,1};
    	// Execute OpenCL kernel as data parallel 
    	ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_item_size, local_item_size, 0, NULL, NULL);
    	// Transfer result to host 
    	ret = clEnqueueReadBuffer(command_queue, resultBuf, CL_TRUE, 0, 101376*sizeof(float), result, 0, NULL, NULL);
    	ret = clFlush(command_queue);	
    	ret = clFinish(command_queue);
    	ret = clReleaseKernel(kernel);
    	ret = clReleaseProgram(program);
    	ret = clReleaseMemObject(resultBuf);
    	ret = clReleaseMemObject(depthBuf);
    	ret = clReleaseMemObject(matrixBuf);
    	ret = clReleaseCommandQueue(command_queue);
    	ret = clReleaseContext(context);	

    and my simple kernel code

    Code :
    __kernel void dataParallel(__global float* depthImage, __global float* MatrixInLinePtr, __global float* result)
    	int x = get_gloabal_id(0);
    	int y = get_gloabal_id(1);
    	if ((x>=352 || y>=288) return ;
    	if (depthImage[x+y*width]==15.0) result[x+y*width] = 14;	

    as a result I receive error 11, error during building the program, but I don't get what could be wrong with the kernel

    thank you for your attention!

  2. #2
    Join Date
    Aug 2014
    okay, I've fixed the kernel, now it looks like that

    Code :
    __kernel void dataParallel(__constant float* depthImage, __constant float* MatrixInLinePtr, __global float* result)
    	int x = get_global_id(0);
    	int y = get_global_id(1);
    	if (x>=352 || y>=288) return ;
    	if (depthImage[x+y*352]==15.0) result[x+y*352] = 14;

    and I receive error 52 CL_INVALID_KERNEL_ARGS

  3. #3
    Senior Member
    Join Date
    Oct 2012
    Check the max size of a constant buffer with CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE.
    It is generally 64KB on a GPU, so your buffer is probably too big to fit into a constant buffer.

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
Proudly hosted by Digital Ocean