Hi to everyone,
I’ve created a simply opencl program that performs a matrix multiplication, my kernel works with cuda but with opencl works only with datasets of square matrices. Someone could help me to figure out please?
Thanks
IN THE HOST
…
size_t bd[]={16,16};
size_t gd[]={bd[0](numCColumns/bd[0]+1), bd[1](numCRows/bd[1]+1)};
err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, gd,
bd, 0, NULL, NULL);
…
DEVICE
__kernel void matrixMultiply(__global float * A, __global float * B, __global float * C,
int numARows, int numAColumns, int numBRows, int numBColumns,
int numCRows, int numCColumns) {
int i=get_global_id(0);
int j=get_global_id(1);
int len=numAColumns;
if(i<numCColumns && j<numCRows)
{
float sum=0;
"
for(int k=0;k<len;k++)
"
sum+=A[jnumAColumns+k]B[knumBColumns+i];
C[jnumCColumns+i]=sum;
}
}
dataset 0
MAT A MAT B
64x64 64x64 OK
dataset 1
128x64 64x128 OK
dataset 2
100x128 128x56 NOT WORKING
dataset 3
128x64 64x128 OK
dataset 4
128x32 32x128 OK
dataset 5
200x100 100x256 NOT WORKING
dataset 6
256x256 256x256 OK
dataset 7
300x256 256x300 OK
dataset 8
128x64 64x128 OK
dataset 9
256x256 256x257 NOT WORKING