Hi!
I’m having some trouble with this code:
#include <oclUtils.h>
#include <stdio.h>
#include <stdlib.h>
#include <tchar.h>
#include <CL/cl.h>
#include <conio.h>
#define N 10
void write(int A[N][N])
{
int s = 0;
for (int i = 0; i < N; i++)
{
for (int j = 0; j < N; j++)
{
if (s > N-1)
{
printf("
");
s = 0;
}
printf(" %d", A[i][j]);
s++;
}
}
printf("
");
}
const char* OpenCLSource1[] = {
"__kernel void matrixAdd(__global int* c, __global int* a, __global int* b)
",
"{
",
" unsigned int x = get_global_id(0);
",
" unsigned int y = get_global_id(1);
",
"
",
" c[x] = a[x] + b[x];
",
"}
"
};
int main(int argc, const char** argv)
{
int host_vector1[N][N], host_vector2[N][N];
int host_vector[N][N];
for(int i = 0; i < N; i++)
{
for(int j = 0; j < N; j++)
{
host_vector1[i][j] = j;
host_vector2[i][j] = j;
}
}
write(host_vector1);
write(host_vector2);
cl_int error = 0;
cl_uint numPlatforms;
cl_platform_id* clSelectedPlatformID = NULL;
clGetPlatformIDs(0, NULL, &numPlatforms);
clSelectedPlatformID = (cl_platform_id*)malloc(sizeof(cl_platform_id)*numPlatforms);
error = clGetPlatformIDs(numPlatforms, clSelectedPlatformID, NULL);
if(error != CL_SUCCESS)
return 0;
cl_uint ciDeviceCount;
cl_device_id* clDevices = NULL;
error = clGetDeviceIDs(clSelectedPlatformID[0], CL_DEVICE_TYPE_GPU, 0, NULL, &ciDeviceCount);
clDevices = (cl_device_id*) malloc(sizeof(cl_device_id) * ciDeviceCount);
error = clGetDeviceIDs(clSelectedPlatformID[0], CL_DEVICE_TYPE_GPU, ciDeviceCount, clDevices, &ciDeviceCount);
cl_context GPU_context = clCreateContext(0, 1, clDevices, NULL, NULL, &error);
cl_command_queue GPUCommandQueue = clCreateCommandQueue(GPU_context, clDevices[0], CL_QUEUE_PROFILING_ENABLE, NULL);
cl_event event1;
cl_mem GPU_vector1 = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * N*N, host_vector1, NULL);
cl_mem GPU_vector2 = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * N*N, host_vector2, NULL);
cl_mem GPU_result_vector = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(int) * N*N, NULL, NULL);
cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, 7, OpenCLSource1, NULL, &error);
error = clBuildProgram(OpenCLProgram, 0, NULL, NULL, NULL, NULL);
cl_kernel OpenCLVectorAdd = clCreateKernel(OpenCLProgram, "matrixAdd", NULL);
clSetKernelArg(OpenCLVectorAdd, 0, sizeof(cl_mem),(void*)&GPU_result_vector);
clSetKernelArg(OpenCLVectorAdd, 1, sizeof(cl_mem), (void*)&GPU_vector1);
clSetKernelArg(OpenCLVectorAdd, 2, sizeof(cl_mem), (void*)&GPU_vector2);
size_t WorkSize[2] = {N, N};
cl_int temp = clEnqueueNDRangeKernel(GPUCommandQueue, OpenCLVectorAdd, 2, NULL, WorkSize, NULL, 0, NULL, &event1);
clEnqueueReadBuffer(GPUCommandQueue, GPU_result_vector, CL_TRUE, 0, N*N * sizeof(int), host_vector, 0, NULL, NULL);
write(host_vector);
_getch();
return 0;
}
It runs now, but if I remove the
"
"
line form the kernel, the program crashes. Also my original goal is a simple matrix addition, but if I add this (instead of the current c[x] = a[x] + b[x]; line) to the kernel
c[x][y] = a[x][y] + b[x][y];
it just won’t work. I get a matrix filled with zeros.
Any thoughts?