matrix addition problem

bubu · April 14, 2011, 7:41am

Hi!

I’m having some trouble with this code:


#include <oclUtils.h>
#include <stdio.h>
#include <stdlib.h>
#include <tchar.h>
#include <CL/cl.h>
#include <conio.h>

#define N 10

void write(int A[N][N])
{
	int s = 0;
	for (int i = 0; i < N; i++)
	{
		for (int j = 0; j < N; j++)
		{
			if (s > N-1)
			{
				printf("
");
				s = 0;
			}
			printf(" %d", A[i][j]);
			s++;
		}
	}
	printf("
 
");
}

const char* OpenCLSource1[] = {
"__kernel void matrixAdd(__global int* c, __global int* a, __global int* b)
",
"{
",
"       unsigned int x = get_global_id(0);
",
"       unsigned int y = get_global_id(1);
",
"       
",
"       c[x] = a[x] + b[x];
",
"}
"
};

int main(int argc, const char** argv) 
{
	int host_vector1[N][N], host_vector2[N][N];
	int host_vector[N][N];
  
	for(int i = 0; i < N; i++)
    {
		for(int j = 0; j < N; j++)
		{
			host_vector1[i][j] = j;
			host_vector2[i][j] = j;
		}
    }

	write(host_vector1);
	write(host_vector2);
 
    cl_int error = 0;
 
    cl_uint numPlatforms;
    cl_platform_id* clSelectedPlatformID = NULL;
    clGetPlatformIDs(0, NULL, &numPlatforms);
    clSelectedPlatformID = (cl_platform_id*)malloc(sizeof(cl_platform_id)*numPlatforms);
    error = clGetPlatformIDs(numPlatforms, clSelectedPlatformID, NULL);
    if(error != CL_SUCCESS) 
        return 0;
 
    cl_uint ciDeviceCount;
    cl_device_id* clDevices =  NULL;
    error = clGetDeviceIDs(clSelectedPlatformID[0], CL_DEVICE_TYPE_GPU, 0, NULL, &ciDeviceCount);

    clDevices = (cl_device_id*) malloc(sizeof(cl_device_id) * ciDeviceCount);
    error = clGetDeviceIDs(clSelectedPlatformID[0], CL_DEVICE_TYPE_GPU, ciDeviceCount, clDevices, &ciDeviceCount);
 
	cl_context GPU_context = clCreateContext(0, 1, clDevices, NULL, NULL, &error);
 
    cl_command_queue GPUCommandQueue = clCreateCommandQueue(GPU_context, clDevices[0], CL_QUEUE_PROFILING_ENABLE, NULL);
	cl_event event1;

	cl_mem GPU_vector1 = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * N*N, host_vector1, NULL);
    cl_mem GPU_vector2 = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * N*N, host_vector2, NULL);
    cl_mem GPU_result_vector = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(int) * N*N, NULL, NULL);

	cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, 7, OpenCLSource1, NULL, &error);

    error = clBuildProgram(OpenCLProgram, 0, NULL, NULL, NULL, NULL);
  
	cl_kernel OpenCLVectorAdd = clCreateKernel(OpenCLProgram, "matrixAdd", NULL); 
    clSetKernelArg(OpenCLVectorAdd, 0, sizeof(cl_mem),(void*)&GPU_result_vector);
    clSetKernelArg(OpenCLVectorAdd, 1, sizeof(cl_mem), (void*)&GPU_vector1);
    clSetKernelArg(OpenCLVectorAdd, 2, sizeof(cl_mem), (void*)&GPU_vector2);
 
	size_t WorkSize[2] = {N, N};
    cl_int temp = clEnqueueNDRangeKernel(GPUCommandQueue, OpenCLVectorAdd, 2, NULL, WorkSize, NULL, 0, NULL, &event1);
	
    clEnqueueReadBuffer(GPUCommandQueue, GPU_result_vector, CL_TRUE, 0, N*N * sizeof(int), host_vector, 0, NULL, NULL);

	write(host_vector);

	_getch();
 
    return 0;
}

It runs now, but if I remove the

"       
"

line form the kernel, the program crashes. Also my original goal is a simple matrix addition, but if I add this (instead of the current c[x] = a[x] + b[x]; line) to the kernel

c[x][y] = a[x][y] + b[x][y];

it just won’t work. I get a matrix filled with zeros.

Any thoughts?

david.garcia · April 14, 2011, 10:01am

Where does the program crash? How does it crash (any error messages?)?

As general advice, I would replace:

cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, 7, OpenCLSource1, NULL, &error);

with


cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, sizeof(OpenCLSource1)/sizeof(OpenCLSource1[0]), OpenCLSource1, NULL, &error);

As for “If I add this (instead of the current c[x] = a[x] + b[x]; line) to the kernel it just won’t work”, the code you posted is syntactically incorrect. Have you checked whether clBuildProgram() returns any compilation errors?

bubu · April 14, 2011, 10:42am

Where does the program crash? How does it crash (any error messages?)?

The message was:
Unhandled exception at 0x7593b760 in ocl1.exe: Microsoft C++ exception: std::bad_alloc at memory location 0x0012f878…

cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, sizeof(OpenCLSource1)/sizeof(OpenCLSource1[0]), OpenCLSource1, NULL, &error);

With this, that problem is gone. THX!

I haven’t checked clBuildProgram for compilation errors. You mean this is incorrect?

const char* OpenCLSource1[] = {
"__kernel void matrixAdd(__global int* c, __global int* a, __global int* b)
",
"{
",
"       unsigned int x = get_global_id(0);
",
"       unsigned int y = get_global_id(1);
",
"       c[x][y] = a[x][y] + b[x][y];
",
"}
"
};

fangq · April 15, 2011, 11:53am

you can use clGetProgramBuildInfo with the CL_PROGRAM_BUILD_LOG option to get detailed compilation error/warning messages.

http://www.khronos.org/registry/cl/sdk/ … dInfo.html

bubu · April 16, 2011, 1:44am

I found a solution (which I will post a bit later), thx for the replies. Obviously my kernel was wrong, but my problem was, that I wasn’t even sure, if my host code was OK.

paviraj_1989 · April 23, 2011, 10:27am

Hello Bubu,
as per i know, for new line in kernel function us delimiter \ instead of
; 2 dimensional matrix operation wont work here.

praveenraj
BE student
Pesit

david.garcia · April 23, 2011, 1:29pm

as per i know, for new line in kernel function us delimiter \ instead of

That is not correct. Newlines (
) in OpenCL C work the same as in C99.

Backslash () is the line continuation character in the preprocessor, which is a different thing.