Hello All,
As a general rule I try to implement something in a very basic kernel prior to parallelizing it, just to be sure everything outputs correctly and I have a good function to expand on (the results match the current implementation). I am attempting to copy a set of if statements from C into a kernel but they error out after one run on index 682. I’ve confirmed that every array and value that is transferred to the kernel reads correctly from the kernel, but the results I receive are still incorrect. The C code is listed below, followed by the overhead and kernel code.
int i, id;
double e[9][2] = { {0, 0}, {1, 0}, {0, 1}, {-1, 0}, {0, -1}, {1, 1}, {-1, 1}, {-1, -1}, {1, -1}};
double t_1;
double uf, vf;
uf = 0.;
vf = 0.;
t_1 = 1.0 / 9.0;
uf = 0.;
vf = 0.;
for(i=0;i<Ninflow;++i){
id = mapI[i];
fluxQ1[id] = 0.;
if ( (e[1][0]*nx[id] + e[1][1]*ny[id]) <0 ) fluxQ1[id] = (Q1[id]-Q3[id] -2.*t_1*1.*(e[1][0]*uf+e[1][0]*vf)*3.) * (e[1][0]*nx[id] + e[1][1]*ny[id]);
}
uf = 0.01;
vf = 0.;
for(i=0;i<Nfar;++i){
id = mapF[i];
fluxQ1[id] = 0.;
if ( (e[1][0]*nx[id] + e[1][1]*ny[id]) <0 ) fluxQ1[id] = (Q1[id]-Q3[id] -2.*t_1*1.*(e[1][0]*uf+e[1][0]*vf)*3.) * (e[1][0]*nx[id] + e[1][1]*ny[id]);
}
The overhead:
err = clSetKernelArg(kernel[8], 0, sizeof(cl_int), (void*)&Ninflowcl);
err = clSetKernelArg(kernel[8], 1, sizeof(cl_int), (void*)&Nfarcl);
err = clSetKernelArg(kernel[8], 2, sizeof(cl_mem), (void*)&mapIcl);
err = clSetKernelArg(kernel[8], 3, sizeof(cl_mem), (void*)&nxcl);
err = clSetKernelArg(kernel[8], 4, sizeof(cl_mem), (void*)&nycl);
err = clSetKernelArg(kernel[8], 5, sizeof(cl_mem), (void*)&mapFcl);
err = clSetKernelArg(kernel[8], 6, sizeof(cl_mem), (void*)&Q1cl);
err = clSetKernelArg(kernel[8], 7, sizeof(cl_mem), (void*)&Q3cl);
err = clSetKernelArg(kernel[8], 8, sizeof(cl_mem), (void*)&fluxQ1cl);
The Kernel that I am trying to implement:
__kernel void umBC(int Ninflow,
int Nfar,
__global int* mapI,
__global double* nx,
__global double* ny,
__global int* mapF,
__global double* Q1,
__global double* Q3,
__global double* fluxQ1)
{
int i, j;
int id;
double e[9][2] = { {0.0, 0.0}, {1.0, 0.0}, {0.0, 1.0}, {-1.0, 0.0}, {0.0, -1.0}, {1.0, 1.0}, {-1.0, 1.0}, {-1.0, -1.0}, {1.0, -1.0}};
double t_1 = 1.0 / 9.0;
double uf = 0.0;
double vf = 0.0;
for(i=0; i<Ninflow; ++i){
id = mapI[i];
fluxQ1[id] = ((e[1][0]*nx[id] + e[1][1]*ny[id]) < 0)*((Q1[id]-Q3[id] -2.*t_1*1.*(e[1][0]*uf+e[1][0]*vf)*3.) * (e[1][0]*nx[id] + e[1][1]*ny[id])) + 0.0;
}
uf = 0.01;
vf = 0.0;
for(j=0; j<Nfar; ++j){
id = mapF[j];
fluxQ1[id] = ((e[1][0]*nx[id] + e[1][1]*ny[id]) < 0)*((Q1[id]-Q3[id] -2.*t_1*1.*(e[1][0]*uf+e[1][0]*vf)*3.) * (e[1][0]*nx[id] + e[1][1]*ny[id])) + 0.0;
}
}
I’m at a complete loss as to why this will return everything as expected, except for the actual calculation. Any help would be greatly appreciated!