Hello to everyone,
I have a problem with this piece of code:
int f_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
const unsigned char *in_arg, unsigned int nbytes) {
int result = d_true, error;
size_t global_work_size[1] = {nbytes}, local_work_size[1] = {16};
void *pointer;
memcpy(out_arg, in_arg, nbytes);
v_cl_out_buffer = clCreateBuffer(v_cl_context, (CL_MEM_READ_WRITE|
CL_MEM_USE_HOST_PTR),
nbytes, out_arg, &error);
if ((error = clSetKernelArg(v_cl_kernel, 1, sizeof(v_cl_out_buffer),
(void *)&v_cl_out_buffer)) == CL_SUCCESS) {
if (v_space_initialized == d_false) {
v_cl_key_buffer = clCreateBuffer(v_cl_context, CL_MEM_READ_ONLY,
const_b, NULL, &error);
clEnqueueWriteBuffer(v_cl_command_queue, v_cl_key_buffer,
CL_TRUE, 0, const_b, expanded_key, 0,
NULL, NULL);
if (((error = clSetKernelArg(v_cl_kernel, 0,
sizeof(v_cl_key_buffer),
(void *)&v_cl_key_buffer))
!= CL_SUCCESS) ||
((error = clSetKernelArg(v_cl_kernel, 2,
sizeof(int),
(void *)&const_a))
!= CL_SUCCESS)) {
d_debug("can't hook parameters to the kernel");
result = d_false;
}
v_space_initialized = d_true;
}
}
if (error == CL_SUCCESS) {
if ((error = clEnqueueNDRangeKernel(v_cl_command_queue,
v_cl_kernel, 1, NULL,
global_work_size,
local_work_size,
0, NULL, NULL))
== CL_SUCCESS) {
pointer = clEnqueueMapBuffer(v_cl_command_queue,
v_cl_out_buffer, CL_TRUE,
CL_MAP_READ, nbytes, 0, 0, NULL,
NULL, NULL);
memcpy(out_arg, pointer, nbytes);
clEnqueueUnmapMemObject(v_cl_command_queue, v_cl_out_buffer,
pointer, 0, NULL, NULL);
}
clReleaseMemObject(v_cl_out_buffer);
}
return result;
}
This piece of code is called many times (in my personal test case only two times) with a different nbytes value (but nbytes%16 is always 0), a different pointer to an in_arg string (initial_in_arg+last_nbytes foreach step) and a different pointer to an already allocated out_arg (out_arg has the same dimension of the in_arg parameter and work in the same way).
expanded_key and const_a are global variables and they don’t change their content.
My kernel is very simple (it’s just for test):
__kernel void example(__global uint *rkey, __global uint *odata, uint steps) {
int executions;
unsigned int *pointer[4], box[4];
unsigned char *character;
character = (unsigned char *)odata[0];
character[0] = 'T';
character[1] = 'H';
character[2] = 'I';
character[3] = 'S';
character = (unsigned char *)odata[1];
character[0] = ' ';
character[1] = 'I';
character[2] = 'S';
character[3] = ' ';
character = (unsigned char *)odata[2];
character[0] = 'A';
character[1] = ' ';
character[2] = 'T';
character[3] = 'E';
character = (unsigned char *)odata[3];
character[0] = 'S';
character[1] = 'T';
character[2] = '!';
character[3] = '!';
}
Now, when I execute this piece of code on GPU everything freeze (the entire system, Mountain Lion) and I’ve to force the shutdown pressing the power button for 4/5 seconds.
No logs are written in this time and I’ve got no relevant informations from my system. So, my question is: what’s wrong with this? I can’t test my kernel or my kernel’s loader code if everything freeze!
Thank you so much for your patience!