Results 1 to 5 of 5

Thread: program error

  1. #1

    program error

    //program of vector addition between two array on gpu----------------

    #include "stdafx.h"
    #include <stdio.h>
    #include <stdlib.h>
    #include "CL/cl.h"

    // OpenCL source code
    const char* OpenCLSource[] = {
    "__kernel void VectorAdd(__global int* c, __global int* a,__global int* b)",
    " // Index of the elements to add \n",
    " unsigned int n = get_global_id(0);",
    " // Sum the níth element of vectors a and b and store in c \n",
    " c[n] = a[n] + b[n];",
    // Some interesting data for the vectors
    int InitialData1[20] = {37,50,54,50,56,0,43,43,74,71,32,36,16,43,56,100,5 0,25,15,17};
    int InitialData2[20] = {35,51,54,58,55,32,36,69,27,39,35,40,16,44,55,14,5 8,75,18,15};
    // Number of elements in the vectors to be added
    #define SIZE 2048
    // Main function
    // ************************************************** *******************
    int main(void)
    // Two integer source vectors in Host memory
    int HostVector1[SIZE], HostVector2[SIZE];
    // Initialize with some interesting repeating data
    for(int c = 0; c < SIZE; c++)
    HostVector1[c] = InitialData1[c%20];
    HostVector2[c] = InitialData2[c%20];
    // Create a context to run OpenCL on our CUDA-enabled NVIDIA GPU
    cl_context GPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU,
    // Get the list of GPU devices associated with this context
    size_t ParmDataBytes;
    clGetContextInfo(GPUContext, CL_CONTEXT_DEVICES, 0, NULL, &ParmDataBytes);
    cl_device_id* GPUDevices = (cl_device_id*)malloc(ParmDataBytes);
    clGetContextInfo(GPUContext, CL_CONTEXT_DEVICES, ParmDataBytes, GPUDevices, NULL);

    // Create a command-queue on the first GPU device
    cl_command_queue GPUCommandQueue = clCreateCommandQueue(GPUContext,
    GPUDevices[0], 0, NULL);
    // Allocate GPU memory for source vectors AND initialize from CPU memory
    cl_mem GPUVector1 = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY |
    CL_MEM_COPY_HOST_PTR, sizeof(int) * SIZE, HostVector1, NULL);
    cl_mem GPUVector2 = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY |
    CL_MEM_COPY_HOST_PTR, sizeof(int) * SIZE, HostVector2, NULL);
    // Allocate output memory on GPU
    cl_mem GPUOutputVector = clCreateBuffer(GPUContext, CL_MEM_WRITE_ONLY,
    sizeof(int) * SIZE, NULL, NULL);
    // Create OpenCL program with source code
    cl_program OpenCLProgram = clCreateProgramWithSource(GPUContext, 7,
    OpenCLSource, NULL, NULL);
    // Build the program (OpenCL JIT compilation)
    clBuildProgram(OpenCLProgram, 0, NULL, NULL, NULL, NULL);
    // Create a handle to the compiled OpenCL function (Kernel)
    cl_kernel OpenCLVectorAdd = clCreateKernel(OpenCLProgram, "VectorAdd", NULL);
    // In the next step we associate the GPU memory with the Kernel arguments
    clSetKernelArg(OpenCLVectorAdd, 0, sizeof(cl_mem),(void*)&GPUOutputVector);
    clSetKernelArg(OpenCLVectorAdd, 1, sizeof(cl_mem), (void*)&GPUVector1);
    clSetKernelArg(OpenCLVectorAdd, 2, sizeof(cl_mem), (void*)&GPUVector2);
    // Launch the Kernel on the GPU
    size_t WorkSize[1] = {SIZE}; // one dimensional Range
    clEnqueueNDRangeKernel(GPUCommandQueue, OpenCLVectorAdd, 1, NULL,
    WorkSize, NULL, 0, NULL, NULL);
    // Copy the output in GPU memory back to CPU memory
    int HostOutputVector[SIZE];
    clEnqueueReadBuffer(GPUCommandQueue, GPUOutputVector, CL_TRUE, 0,
    SIZE * sizeof(int), HostOutputVector, 0, NULL, NULL);
    // Cleanup
    // Print out the results
    for (int Rows = 0; Rows < (SIZE/20); Rows++, printf("\n")){
    for(int c = 0; c <20; c++){
    printf("%c",(char)HostOutputVector[Rows * 20 + c]);
    return 0;}

  2. #2

    Re: program error

    this program giving the error CXX0076
    on line---------->

    cl_command_queue GPUCommandQueue = clCreateCommandQueue(GPUContext,
    GPUDevices[0], 0, NULL);

    how it can be solved?

  3. #3
    Junior Member
    Join Date
    Sep 2012

    Re: program error

    Hm the error is not very helpful.

    Code :
    // Includes
    #include <stdio.h>
    #include <stdlib.h>
    #include <stdbool.h>
    #include <CL/cl.h>
    const char* programSource =
    	"__kernel					\n"
    	"void vecadd(	__global int *A,	     \n"
    	"				__global int *B, \n"
    	"				__global int *C)	   \n"
    	"{						\n"
    	"	int idx = get_global_id(0);	   \n"
    	"	C[idx] = A[idx] + B[idx];	   \n"
    	"}					\n";
    int main()
    	// This code executes on the OpenCL host
    	// Host Data
    	int *A = NULL;		// Input Array
    	int *B = NULL;		// Input Array
    	int *C = NULL;		// Output Array
    	// Elements in each array
    	const int elements = 20;
    	// compute size of the data
    	size_t datasize = sizeof(int)*elements;
    	// Allocate space for input/output data
    	A = (int*)malloc(datasize);
    	B = (int*)malloc(datasize);
    	C = (int*)malloc(datasize);
    	//initialize the input data
    	int i;
    	for (i = 0; i < elements; i++)
    		A[i] = i;
    		B[i] = i;
    	// Use this to check the output of each API call
    	cl_int status;
    	cl_uint numPlatforms = 0;
    	cl_platform_id *platforms = NULL;
    	cl_uint numDevices = 0;
    	cl_device_id *devices = 0;
    	cl_context context = NULL;
    	cl_command_queue cmdQueue;
    	cl_kernel kernel = NULL;
    	cl_program program = NULL;
    	// Uses to recieve the number of platforms
    	status = clGetPlatformIDs(0,NULL, &numPlatforms);
    	// Allocate enough space for each platform
    	platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id));
    	// Fill in platforms
    	status = clGetPlatformIDs(numPlatforms, platforms, NULL);
    	// Retrieve the number of devices present
    	status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices);
    	// Allocate enough space for each device
    	devices = (cl_device_id*)malloc(numDevices*sizeof(cl_device_id));
    	// Fill in devices
    	status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, numDevices, devices, NULL);
    	// Create a context and associate it with the devices
    	context = clCreateContext(NULL,numDevices, devices, NULL, NULL, &status);
    	// Create a command queue and associate it with the device you want to execute on
    	cmdQueue = clCreateCommandQueue(context,devices[0], 0, &status);
    	cl_mem bufferA; // Input array on the device
    	cl_mem bufferB; // Input array on the device
    	cl_mem bufferC; // Output array on the device
    	// Create buffer objects that will contain the data from the arrays
    	bufferA = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status);
    	bufferB = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status);
    	bufferC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, datasize, NULL, &status);
    	// Write input arrays to the device buffers
    	status = clEnqueueWriteBuffer(cmdQueue, bufferA, CL_FALSE, 0, datasize, A, 0, NULL, NULL);
    	status = clEnqueueWriteBuffer(cmdQueue, bufferB, CL_FALSE, 0, datasize, A, 0, NULL, NULL);
    	// Create a program
    	program = clCreateProgramWithSource(context, 1, (const char**)&programSource, NULL, &status);
    	//Build the program for the devices
    	status = clBuildProgram(program, numDevices, devices, NULL, NULL, NULL);
    	// Create kernel from the vector addition function
    	kernel = clCreateKernel(program, "vecadd", &status);
    	// Associate the input and output buffers with the kernel
    	status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferA);
    	status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferB);
    	status |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufferC);
    	// Define an index space (global work size) of work items for execution
    	// A workgroup size (local work size) is not required but can be used
    	size_t globalWorkSize[1];
    	// There are 'elements' work-items
    	globalWorkSize[0] = elements;
    	// Execute the kernel
    	status = clEnqueueNDRangeKernel(cmdQueue, kernel, 1 , NULL, globalWorkSize, NULL, 0, NULL, NULL);
    	clEnqueueReadBuffer(cmdQueue, bufferC, CL_TRUE, 0, datasize, C, 0 , NULL, NULL);
    	// Verify the output
    	bool result = true;
    	for (i = 0; i < elements; i++)
    		if(C[i] != i+i)
    			result = false;
    		printf("Output is correct\n");
    		printf("Output is incorrect\n");
    	int c;
    	// Print out the results
    	for (c = 0; c < 20; c++)
    		printf("%d ", C[c]);
    	// Free OpenCl resources
    	// Free host resources
    	return (0);

    Here's my implementation. Runs fine. You can check for the differences. Sorry but i don't have time to do so now. Try to use the build-in OpenCL error code to localize the problem. (I also didn't do this in this implementation)

  4. #4

    Re: program error

    this error is giving for GPUDevices[0] now i m not getting that what is the problem regarding gpudevices?
    if any suggestion

  5. #5
    Junior Member
    Join Date
    Sep 2012

    Re: program error

    Quote Originally Posted by jai
    this error is giving for GPUDevices[0] now i m not getting that what is the problem regarding gpudevices?
    if any suggestion
    OpenCL has its own error codes. Try to use them to figure out whats going wrong.
    And compare both sourcecodes to find differences.

Similar Threads

  1. Program Build Error
    By dlw in forum OpenCL
    Replies: 5
    Last Post: 06-28-2011, 01:34 PM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
Proudly hosted by Digital Ocean