ssarangi

03-24-2010, 08:05 PM

Hi All,

I am implementing a simple bounding box algorithm on opencl and I have problem with the computations.

I have a set of vertices which I pass into the kernel and I compute the bounding box accordingly. I also pass in another array just to read back the vertices back

from the kernel to find that they are all different. I am running on WinXP with Quadro FX 570M and 197.15 (Nvidia Driver).

The code for the C part is this.

float bbox[8], results[8];

memset(results, 0, 8 * sizeof(float));

bbox[0] = (float)mesh->vertex[0].pt[0]; bbox[4] = (float)mesh->vertex[0].pt[0];

bbox[1] = (float)mesh->vertex[0].pt[1]; bbox[5] = (float)mesh->vertex[0].pt[1];

bbox[2] = (float)mesh->vertex[0].pt[2]; bbox[6] = (float)mesh->vertex[0].pt[2];

// GPU Calculation

float* vertices = static_cast<float *>(malloc(4 * mesh->vertex_count * sizeof(float)));

float* fnew_verts = static_cast<float *>(malloc(4 * mesh->vertex_count * sizeof(float)));

memset(vertices, 0, 4 * mesh->vertex_count * sizeof(float));

memset(fnew_verts, 0, 4 * mesh->vertex_count * sizeof(float));

float* vertex = vertices;

for (int i = 0; i < mesh->vertex_count; i++)

{

*vertex++ = (float)mesh->vertex[i].pt[0];

*vertex++ = (float)mesh->vertex[i].pt[1];

*vertex++ = (float)mesh->vertex[i].pt[2];

*vertex++;

}

ShaderManager* shm = ShaderManager::Instance();

cl_int err;

cl_mem vertices_mem, bbox_mem, new_verts;

vertices_mem = clCreateBuffer(shm->opencl_context(), CL_MEM_READ_ONLY, 4 * mesh->vertex_count * sizeof(float), NULL, NULL);

err = clEnqueueWriteBuffer(shm->opencl_command_queue(), vertices_mem, CL_TRUE, 0, 4 * mesh->vertex_count * sizeof(float),

(void *)vertices, 0, NULL, NULL);

bbox_mem = clCreateBuffer(shm->opencl_context(), CL_MEM_READ_WRITE, 8 * sizeof(float), NULL, NULL);

err = clEnqueueWriteBuffer(shm->opencl_command_queue(), bbox_mem, CL_TRUE, 0, 8 * sizeof(float), (void *)bbox, 0, NULL, NULL);

new_verts = clCreateBuffer(shm->opencl_context(), CL_MEM_WRITE_ONLY, 4 * mesh->vertex_count * sizeof(float), NULL, NULL);

clFinish(shm->opencl_command_queue());

err = clSetKernelArg(shm->opencl_kernel(), 0, sizeof(cl_mem), &vertices_mem);

err |= clSetKernelArg(shm->opencl_kernel(), 1, sizeof(cl_mem), &bbox_mem);

err |= clSetKernelArg(shm->opencl_kernel(), 2, sizeof(cl_mem), &new_verts);

size_t global_work_size = mesh->vertex_count;

err = clEnqueueNDRangeKernel(shm->opencl_command_queue(), shm->opencl_kernel(), 1, NULL,

&global_work_size, NULL, 0, NULL, NULL);

#pragma warning(disable:4189)

const char* str = errorString(err);

clFinish(shm->opencl_command_queue());

err = clEnqueueReadBuffer(shm->opencl_command_queue(), bbox_mem, CL_TRUE, 0, 8 * sizeof(float),

results, 0, NULL, NULL);

err = clEnqueueReadBuffer(shm->opencl_command_queue(), new_verts, CL_TRUE, 0, 4 * mesh->vertex_count * sizeof(float),

fnew_verts, 0, NULL, NULL);

clFinish(shm->opencl_command_queue());

// Verify with input data

for (int i = 0; i < 4 * mesh->vertex_count; i++)

assert(fnew_verts[i] == vertices[i]);

for (int i = 0; i < mesh->vertex_count; i++)

{

mesh->vertex[i].pt[0] = (double)(fnew_verts[0]);

mesh->vertex[i].pt[1] = (double)(fnew_verts[1]);

mesh->vertex[i].pt[2] = (double)(fnew_verts[2]);

fnew_verts += 4;

}

free(vertices);

The kernel is defined like this as follows:

__kernel void bound_mesh(__global float4* vertices, __global float4* bbox, __global float4 new_verts)

{

int gid = get_global_id(0);

if (vertices[gid].x < bbox[0].x)

bbox[0].x = vertices[gid].x;

if (vertices[gid].y < bbox[0].y)

bbox[0].y = vertices[gid].y;

if (vertices[gid].z < bbox[0].z)

bbox[0].z = vertices[gid].z;

if (vertices[gid].x > bbox[1].x)

bbox[1].x = vertices[gid].x;

if (vertices[gid].y > bbox[1].y)

bbox[1].y = vertices[gid].y;

if (vertices[gid].z > bbox[2].z)

bbox[1].z = vertices[gid].z;

new_verts[gid = vertices[gid];

bbox[0].w = -1.0; bbox[1].w = -1.0;

}

So new_verts should atleast give me the correct vertices considering I am assigning them the same value for the vertices.

So neither the bounding box nor the new_verts has the correct values in them. Any suggestions as to what I might be doing wrong.

Thanks for your help.

I am implementing a simple bounding box algorithm on opencl and I have problem with the computations.

I have a set of vertices which I pass into the kernel and I compute the bounding box accordingly. I also pass in another array just to read back the vertices back

from the kernel to find that they are all different. I am running on WinXP with Quadro FX 570M and 197.15 (Nvidia Driver).

The code for the C part is this.

float bbox[8], results[8];

memset(results, 0, 8 * sizeof(float));

bbox[0] = (float)mesh->vertex[0].pt[0]; bbox[4] = (float)mesh->vertex[0].pt[0];

bbox[1] = (float)mesh->vertex[0].pt[1]; bbox[5] = (float)mesh->vertex[0].pt[1];

bbox[2] = (float)mesh->vertex[0].pt[2]; bbox[6] = (float)mesh->vertex[0].pt[2];

// GPU Calculation

float* vertices = static_cast<float *>(malloc(4 * mesh->vertex_count * sizeof(float)));

float* fnew_verts = static_cast<float *>(malloc(4 * mesh->vertex_count * sizeof(float)));

memset(vertices, 0, 4 * mesh->vertex_count * sizeof(float));

memset(fnew_verts, 0, 4 * mesh->vertex_count * sizeof(float));

float* vertex = vertices;

for (int i = 0; i < mesh->vertex_count; i++)

{

*vertex++ = (float)mesh->vertex[i].pt[0];

*vertex++ = (float)mesh->vertex[i].pt[1];

*vertex++ = (float)mesh->vertex[i].pt[2];

*vertex++;

}

ShaderManager* shm = ShaderManager::Instance();

cl_int err;

cl_mem vertices_mem, bbox_mem, new_verts;

vertices_mem = clCreateBuffer(shm->opencl_context(), CL_MEM_READ_ONLY, 4 * mesh->vertex_count * sizeof(float), NULL, NULL);

err = clEnqueueWriteBuffer(shm->opencl_command_queue(), vertices_mem, CL_TRUE, 0, 4 * mesh->vertex_count * sizeof(float),

(void *)vertices, 0, NULL, NULL);

bbox_mem = clCreateBuffer(shm->opencl_context(), CL_MEM_READ_WRITE, 8 * sizeof(float), NULL, NULL);

err = clEnqueueWriteBuffer(shm->opencl_command_queue(), bbox_mem, CL_TRUE, 0, 8 * sizeof(float), (void *)bbox, 0, NULL, NULL);

new_verts = clCreateBuffer(shm->opencl_context(), CL_MEM_WRITE_ONLY, 4 * mesh->vertex_count * sizeof(float), NULL, NULL);

clFinish(shm->opencl_command_queue());

err = clSetKernelArg(shm->opencl_kernel(), 0, sizeof(cl_mem), &vertices_mem);

err |= clSetKernelArg(shm->opencl_kernel(), 1, sizeof(cl_mem), &bbox_mem);

err |= clSetKernelArg(shm->opencl_kernel(), 2, sizeof(cl_mem), &new_verts);

size_t global_work_size = mesh->vertex_count;

err = clEnqueueNDRangeKernel(shm->opencl_command_queue(), shm->opencl_kernel(), 1, NULL,

&global_work_size, NULL, 0, NULL, NULL);

#pragma warning(disable:4189)

const char* str = errorString(err);

clFinish(shm->opencl_command_queue());

err = clEnqueueReadBuffer(shm->opencl_command_queue(), bbox_mem, CL_TRUE, 0, 8 * sizeof(float),

results, 0, NULL, NULL);

err = clEnqueueReadBuffer(shm->opencl_command_queue(), new_verts, CL_TRUE, 0, 4 * mesh->vertex_count * sizeof(float),

fnew_verts, 0, NULL, NULL);

clFinish(shm->opencl_command_queue());

// Verify with input data

for (int i = 0; i < 4 * mesh->vertex_count; i++)

assert(fnew_verts[i] == vertices[i]);

for (int i = 0; i < mesh->vertex_count; i++)

{

mesh->vertex[i].pt[0] = (double)(fnew_verts[0]);

mesh->vertex[i].pt[1] = (double)(fnew_verts[1]);

mesh->vertex[i].pt[2] = (double)(fnew_verts[2]);

fnew_verts += 4;

}

free(vertices);

The kernel is defined like this as follows:

__kernel void bound_mesh(__global float4* vertices, __global float4* bbox, __global float4 new_verts)

{

int gid = get_global_id(0);

if (vertices[gid].x < bbox[0].x)

bbox[0].x = vertices[gid].x;

if (vertices[gid].y < bbox[0].y)

bbox[0].y = vertices[gid].y;

if (vertices[gid].z < bbox[0].z)

bbox[0].z = vertices[gid].z;

if (vertices[gid].x > bbox[1].x)

bbox[1].x = vertices[gid].x;

if (vertices[gid].y > bbox[1].y)

bbox[1].y = vertices[gid].y;

if (vertices[gid].z > bbox[2].z)

bbox[1].z = vertices[gid].z;

new_verts[gid = vertices[gid];

bbox[0].w = -1.0; bbox[1].w = -1.0;

}

So new_verts should atleast give me the correct vertices considering I am assigning them the same value for the vertices.

So neither the bounding box nor the new_verts has the correct values in them. Any suggestions as to what I might be doing wrong.

Thanks for your help.