Hello again,
I managed to modify my code so that now I’m sure that anything override anything else. But still I have problem with the memory object and I really need an explaination here. I think I’m completly mistaken in how to make a good use of the cl_mem objects.
Here is the code of the creation of my mem objects:
this->distanceVectorMem = clCreateBuffer(context,
CL_MEM_READ_WRITE, sizeof(int) * summitCount,
NULL, NULL);
if (!this->distanceVectorMem)
{
cout << "Error: Failed to create distanceVectorMem" << endl;
exit(EXIT_FAILURE);
}
this->usedSummitVectorMem = clCreateBuffer(context,
CL_MEM_READ_WRITE, sizeof(int) * summitCount,
NULL, NULL);
if (!this->usedSummitVectorMem)
{
cout << "Error: Failed to create usedSummitVectorMem" << endl;
exit(EXIT_FAILURE);
}
Now I run it this way:
// Set the arguments to our compute kernel
err = 0;
err = clSetKernelArg(this->initKernel, 0, sizeof(int), &summitIndex);
err |= clSetKernelArg(this->initKernel, 1, sizeof(cl_mem),
&this->distanceVectorMem);
err |= clSetKernelArg(this->initKernel, 2, sizeof(cl_mem),
&this->usedSummitVectorMem);
err |= clSetKernelArg(this->initKernel, 3, sizeof(int),
&size);
err |= clSetKernelArg(this->initKernel, 4, sizeof(int),
&startIndex);
err |= clSetKernelArg(this->initKernel, 5, sizeof(int),
&count);
err |= clSetKernelArg(this->initKernel, 6, sizeof(cl_mem),
&this->adjacencyListMem);
err |= clSetKernelArg(this->initKernel, 7, sizeof(cl_mem),
&this->adjacencyListDistancesMem);
if (err != CL_SUCCESS)
{
printf("Error: Failed to set kernel arguments
");
exit(EXIT_FAILURE);
}
size_t global = size;
if (size % 2 != 0) global++;
// Execute the kernel over the entire range of the data set
err = clEnqueueNDRangeKernel(this->queue, this->initKernel, 1, NULL,
&global, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to execute the kernel
");
exit(EXIT_FAILURE);
}
With this kernel code:
__kernel void init_int(const int summitIndex, __global int* distanceVector,
__global int* usedSummitVector,
const int size, const int startIndex, const int count,
__global const int* adjacencyList,
__global const int* adjacencyListDistances)
{
int i = get_global_id(0);
printf("entering in init (%d)
", i);
if (i < size)
{
distanceVector[i] = -1;
usedSummitVector[i] = 0;
}
distanceVector[summitIndex] = 0;
usedSummitVector[summitIndex] = 1;
if (i == 0)
{
for (int k = startIndex + 1; k < startIndex + 1 + count; k++)
{
distanceVector[adjacencyList[k] - 1] = adjacencyListDistances[k];
printf("distanceVector[%d] = %d
", adjacencyList[k] - 1, distanceVector[adjacencyList[k] - 1]);
}
}
printf("exiting in init (%d)
", i);
}
The trace of printf result is actually good something like that :
entering in init (0)
distanceVector[1] = 10
distanceVector[3] = 4
distanceVector[5] = 10
exiting in init (0)
After that I directly use distanceVector and usedSummitVector in my find min function:
err = 0;
err = clSetKernelArg(this->findMinKernel, 0, sizeof(cl_mem),
&this->distanceVectorMem);
err |= clSetKernelArg(this->findMinKernel, 1, sizeof(cl_mem),
&this->usedSummitVectorMem);
err |= clSetKernelArg(this->findMinKernel, 2, sizeof(int),
&size);
err |= clSetKernelArg(this->findMinKernel, 3, sizeof(cl_mem),
&this->pFindMinMem);
err |= clSetKernelArg(this->findMinKernel, 4, sizeof(cl_mem),
&this->findMinMem);
err |= clSetKernelArg(this->findMinKernel, 5, sizeof(int),
&firstPass);
err |= clSetKernelArg(this->findMinKernel, 6, sizeof(int),
&lmax);
if (err != CL_SUCCESS)
{
printf("Error: Failed to set kernel arguments
");
exit(EXIT_FAILURE);
}
// Execute the kernel over the entire range of the data set
err = clEnqueueNDRangeKernel(this->queue, this->findMinKernel, 1,
NULL,
&global, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to execute the kernel
");
exit(EXIT_FAILURE);
}
without doing anything between the kernels executions. Here is the find min kernel:
__kernel void find_min(__global int* distanceVector,
__global int* usedSummitVector, const int size,
__global int* oldMin, __global int* lMin, const int firstPass,
const int lmax)
{
int k = get_global_id(0);
printf("entering in find min (%d)
", k);
int i = k * 2;
if (firstPass == 1) {
int index = i;
printf("select %d as index in %d with size %d
", i, k, size);
if (index + 1 < size)
{
printf("select %d as index + 1 in %d with size %d
", i + 1, k, size);
printf("distanceVector[%d] = %d
", index, distanceVector[index]);
printf("usedSummitVector[%d] = %d
", index + 1, usedSummitVector[index + 1]);
printf("usedSummitVector[%d] = %d
", index, usedSummitVector[index]);
if (distanceVector[index] <= 0 && usedSummitVector[index + 1] == 0
|| usedSummitVector[index] != 0)
{
index = i + 1;
}
else
{
printf("distanceVector[%d] = %d
", index + 1, distanceVector[index + 1]);
printf("distanceVector[%d] = %d
", index, distanceVector[index]);
printf("usedSummitVector[%d] = %d
", index + 1, usedSummitVector[index + 1]);
if (distanceVector[index + 1] > 0
&& distanceVector[index + 1] < distanceVector[index]
&& usedSummitVector[index + 1] == 0)
{
index = i + 1;
}
}
}
lMin[k] = index;
}
else
{
int index = oldMin[i];
printf("distanceVector[%d] = %d
", index, distanceVector[index]);
printf("usedSummitVector[%d] = %d
", index, usedSummitVector[index]);
if (i + 1 < lmax)
{
int index2 = oldMin[i + 1];
printf("distanceVector[%d] = %d
", index2, distanceVector[index2]);
printf("usedSummitVector[%d] = %d
", index2, usedSummitVector[index2]);
if (distanceVector[index] <= 0
&& usedSummitVector[index2] == 0
|| usedSummitVector[index] != 0)
index = index2;
else
{
if (distanceVector[index2] > 0
&& distanceVector[index2] < distanceVector[index]
&& usedSummitVector[index2] == 0)
{
index = index2;
}
}
}
lMin[k] = index;
}
printf("found %d as min in %d first pass ? %d
", lMin[k], k, firstPass);
printf("exiting find min (%d)
", k);
}
And the printf trace tells me here:
distanceVector[1] = -1
Why on the earth the memory isn’t committed between the two kernels execution ? What am I missing to do or what am I doing wrong here ?
Thank you so much in advance for the answer,
Benjamin.