I have a struct in C++ and I have problems to pass a pointer to a subroutine.
#pragma pack(push, 1) // exact fit - no padding
typedef struct my_struct{
cl_uchar8 ck;
cl_uint8 iA;
} my_struct;
#pragma pack(pop)
I create a buffer on host side:
my_struct *result_pk = (my_struct *)_aligned_malloc(sizeof(my_struct), 4096);
Than I pass this buffer to the kernel as second parameter by CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR:
typedef struct __attribute__ ((packed)) my_struct{
uchar8 ck;
uint8 iA;
} my_struct;
__kernel void do_calc(__global uchar8 *in, __global my_struct *out)
{
out->ck = *in; // works
out->iA = (__global uint8)(1,2,3,4,5,6,7,8); // works
out->iA.s0 = (__global uint)(8); // works
// for subroutine a pointer to iA is needed to modify the data
__global uint8 *test = (__global uint8 *)(&(out->iA)); // works, printf is showing me that test & out-iA have the same pointer address
*test = (__global uint8) (6,6,6,6,6,6,6,6); // fails
}
Why this fails on a Nvidia GPU? It is working on an AMD GPU.
I get no build error and the kernel is starting but also the kernel crash and nothing is done.