matrix addition problem

Hi!

I’m having some trouble with this code:


#include <oclUtils.h>
#include <stdio.h>
#include <stdlib.h>
#include <tchar.h>
#include <CL/cl.h>
#include <conio.h>

#define N 10

void write(int A[N][N])
{
	int s = 0;
	for (int i = 0; i < N; i++)
	{
		for (int j = 0; j < N; j++)
		{
			if (s > N-1)
			{
				printf("
");
				s = 0;
			}
			printf(" %d", A[i][j]);
			s++;
		}
	}
	printf("
 
");
}

const char* OpenCLSource1[] = {
"__kernel void matrixAdd(__global int* c, __global int* a, __global int* b)
",
"{
",
"       unsigned int x = get_global_id(0);
",
"       unsigned int y = get_global_id(1);
",
"       
",
"       c[x] = a[x] + b[x];
",
"}
"
};

int main(int argc, const char** argv) 
{
	int host_vector1[N][N], host_vector2[N][N];
	int host_vector[N][N];
  
	for(int i = 0; i < N; i++)
    {
		for(int j = 0; j < N; j++)
		{
			host_vector1[i][j] = j;
			host_vector2[i][j] = j;
		}
    }

	write(host_vector1);
	write(host_vector2);
 
    cl_int error = 0;
 
    cl_uint numPlatforms;
    cl_platform_id* clSelectedPlatformID = NULL;
    clGetPlatformIDs(0, NULL, &numPlatforms);
    clSelectedPlatformID = (cl_platform_id*)malloc(sizeof(cl_platform_id)*numPlatforms);
    error = clGetPlatformIDs(numPlatforms, clSelectedPlatformID, NULL);
    if(error != CL_SUCCESS) 
        return 0;
 
    cl_uint ciDeviceCount;
    cl_device_id* clDevices =  NULL;
    error = clGetDeviceIDs(clSelectedPlatformID[0], CL_DEVICE_TYPE_GPU, 0, NULL, &ciDeviceCount);

    clDevices = (cl_device_id*) malloc(sizeof(cl_device_id) * ciDeviceCount);
    error = clGetDeviceIDs(clSelectedPlatformID[0], CL_DEVICE_TYPE_GPU, ciDeviceCount, clDevices, &ciDeviceCount);
 
	cl_context GPU_context = clCreateContext(0, 1, clDevices, NULL, NULL, &error);
 
    cl_command_queue GPUCommandQueue = clCreateCommandQueue(GPU_context, clDevices[0], CL_QUEUE_PROFILING_ENABLE, NULL);
	cl_event event1;

	cl_mem GPU_vector1 = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * N*N, host_vector1, NULL);
    cl_mem GPU_vector2 = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * N*N, host_vector2, NULL);
    cl_mem GPU_result_vector = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(int) * N*N, NULL, NULL);

	cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, 7, OpenCLSource1, NULL, &error);

    error = clBuildProgram(OpenCLProgram, 0, NULL, NULL, NULL, NULL);
  
	cl_kernel OpenCLVectorAdd = clCreateKernel(OpenCLProgram, "matrixAdd", NULL); 
    clSetKernelArg(OpenCLVectorAdd, 0, sizeof(cl_mem),(void*)&GPU_result_vector);
    clSetKernelArg(OpenCLVectorAdd, 1, sizeof(cl_mem), (void*)&GPU_vector1);
    clSetKernelArg(OpenCLVectorAdd, 2, sizeof(cl_mem), (void*)&GPU_vector2);
 
	size_t WorkSize[2] = {N, N};
    cl_int temp = clEnqueueNDRangeKernel(GPUCommandQueue, OpenCLVectorAdd, 2, NULL, WorkSize, NULL, 0, NULL, &event1);
	
    clEnqueueReadBuffer(GPUCommandQueue, GPU_result_vector, CL_TRUE, 0, N*N * sizeof(int), host_vector, 0, NULL, NULL);

	write(host_vector);

	_getch();
 
    return 0;
}

It runs now, but if I remove the

"       
"

line form the kernel, the program crashes. Also my original goal is a simple matrix addition, but if I add this (instead of the current c[x] = a[x] + b[x]; line) to the kernel

c[x][y] = a[x][y] + b[x][y];

it just won’t work. I get a matrix filled with zeros.

Any thoughts?

Where does the program crash? How does it crash (any error messages?)?

As general advice, I would replace:

cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, 7, OpenCLSource1, NULL, &error);

with


cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, sizeof(OpenCLSource1)/sizeof(OpenCLSource1[0]), OpenCLSource1, NULL, &error);

As for “If I add this (instead of the current c[x] = a[x] + b[x]; line) to the kernel it just won’t work”, the code you posted is syntactically incorrect. Have you checked whether clBuildProgram() returns any compilation errors?

Where does the program crash? How does it crash (any error messages?)?

The message was:
Unhandled exception at 0x7593b760 in ocl1.exe: Microsoft C++ exception: std::bad_alloc at memory location 0x0012f878…

cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, sizeof(OpenCLSource1)/sizeof(OpenCLSource1[0]), OpenCLSource1, NULL, &error);

With this, that problem is gone. THX!

I haven’t checked clBuildProgram for compilation errors. You mean this is incorrect?

const char* OpenCLSource1[] = {
"__kernel void matrixAdd(__global int* c, __global int* a, __global int* b)
",
"{
",
"       unsigned int x = get_global_id(0);
",
"       unsigned int y = get_global_id(1);
",
"       c[x][y] = a[x][y] + b[x][y];
",
"}
"
};

you can use clGetProgramBuildInfo with the CL_PROGRAM_BUILD_LOG option to get detailed compilation error/warning messages.

http://www.khronos.org/registry/cl/sdk/ … dInfo.html

I found a solution (which I will post a bit later), thx for the replies. Obviously my kernel was wrong, but my problem was, that I wasn’t even sure, if my host code was OK.

Hello Bubu,
as per i know, for new line in kernel function us delimiter \ instead of
; 2 dimensional matrix operation wont work here.

praveenraj
BE student
Pesit

as per i know, for new line in kernel function us delimiter \ instead of

That is not correct. Newlines (
) in OpenCL C work the same as in C99.

Backslash () is the line continuation character in the preprocessor, which is a different thing.