more than 1 read_only image2d_t freezes video card

Hi,

I’m trying to read in from more than 1 image2d_t object in my kernel. With one ojbect everything works fine, but with 2, my video card freezes. What am I doing wrong?

here’s the host code:


#include "deferred.h"
#include "../resources/generated_lights.h"
#include "objs.h"

void deferred::init()
{
  objs::get()->shader_loader.load_shader( "shaders/deferred/texture.sc", &fs_quad );
  objs::get()->compute_shader_loader.load_shader_control_file( "shaders/deferred/compute_light.sc", &compute_light_shader );

  //two triangles, 6 indices
  quad.faces = new unsigned int[6];
  quad.faces[0] = 0;
  quad.faces[1] = 1;
  quad.faces[2] = 3;

  quad.faces[3] = 3;
  quad.faces[4] = 2;
  quad.faces[5] = 0;

  //4 vertices
  quad.vertices = new float[12];

  //store far clip plane corners' positions (lower left)
  quad.vertices[3 * 0 + 0] = objs::get()->frm.far_ll.get()->v[0];
  quad.vertices[3 * 0 + 1] = objs::get()->frm.far_ll.get()->v[1];
  quad.vertices[3 * 0 + 2] = objs::get()->frm.far_ll.get()->v[2];

  //lower right
  quad.vertices[3 * 1 + 0] = objs::get()->frm.far_lr.get()->v[0];
  quad.vertices[3 * 1 + 1] = objs::get()->frm.far_lr.get()->v[1];
  quad.vertices[3 * 1 + 2] = objs::get()->frm.far_lr.get()->v[2];

  //upper left
  quad.vertices[3 * 2 + 0] = objs::get()->frm.far_ul.get()->v[0];
  quad.vertices[3 * 2 + 1] = objs::get()->frm.far_ul.get()->v[1];
  quad.vertices[3 * 2 + 2] = objs::get()->frm.far_ul.get()->v[2];

  //upper right
  quad.vertices[3 * 3 + 0] = objs::get()->frm.far_ur.get()->v[0];
  quad.vertices[3 * 3 + 1] = objs::get()->frm.far_ur.get()->v[1];
  quad.vertices[3 * 3 + 2] = objs::get()->frm.far_ur.get()->v[2];

  //4 tex coords
  quad.tex_coords = new float[8];

  quad.tex_coords[2 * 0 + 0] = 0.0f;
  quad.tex_coords[2 * 0 + 1] = 0.0f;

  quad.tex_coords[2 * 1 + 0] = 1.0f;
  quad.tex_coords[2 * 1 + 1] = 0.0f;

  quad.tex_coords[2 * 2 + 0] = 0.0f;
  quad.tex_coords[2 * 2 + 1] = 1.0f;

  quad.tex_coords[2 * 3 + 0] = 1.0f;
  quad.tex_coords[2 * 3 + 1] = 1.0f;

  quad.faces_count = 6;
  quad.vertices_count = 4;
  quad.tex_coords_count = 4;

  //upload it to the VGA
  objs::get()->mesh_loader.upload_mesh_from_memory( &quad, fs_quad );
  quad.valid = true;

  unsigned int w = ( unsigned int )objs::get()->conf.SCREEN_WIDTH;
  unsigned int h = ( unsigned int )objs::get()->conf.SCREEN_HEIGHT;

  fbo.create();
  fbo.bind();

  GLenum modes[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1, GL_COLOR_ATTACHMENT2 };
  glDrawBuffers( 3, modes );

  albedo.create();
  normals.create();
  depth.create();

  albedo.valid = true;
  normals.valid = true;
  depth.valid = true;

  glActiveTexture( GL_TEXTURE5 );
  albedo.bind();
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
  glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, ( GLsizei )w, ( GLsizei )h, 0, GL_RGBA, GL_FLOAT, 0 ); //diffuse G-Buffer component
  albedo.width = w;
  albedo.height = h;

  glActiveTexture( GL_TEXTURE6 );
  normals.bind();
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
  glTexImage2D( GL_TEXTURE_2D, 0, GL_RG16F, ( GLsizei )w, ( GLsizei )h, 0, GL_RGBA, GL_FLOAT, 0 ); //normal G-Buffer component
  normals.width = w;
  normals.height = h;

  glActiveTexture( GL_TEXTURE7 );
  depth.bind();
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
  glTexImage2D( GL_TEXTURE_2D, 0, GL_R32F, ( GLsizei )w, ( GLsizei )h, 0, GL_RGBA, GL_FLOAT, 0 ); //depth G-Buffer component, OpenCL doesn't take DEPTH_COMPONENT, YET!
  depth.width = w;
  depth.height = h;

  glActiveTexture( GL_TEXTURE0 );

  rbo.create();
  rbo.bind();
  rbo.set_storage_format( GL_DEPTH_COMPONENT, w, h );
  rbo.width = w;
  rbo.height = h;
  rbo.attach_to_frame_buffer( GL_DEPTH_ATTACHMENT, &fbo );

  albedo.attach_to_frame_buffer( GL_COLOR_ATTACHMENT0, &fbo );
  normals.attach_to_frame_buffer( GL_COLOR_ATTACHMENT1, &fbo );
  depth.attach_to_frame_buffer( GL_COLOR_ATTACHMENT2, &fbo );

  fbo.check();

  fbo.unbind();

  /*
   * Set up OpenCL
   */

  cl_int error;
  albedo_cl = clCreateFromGLTexture2D( objs::get()->the_compute_context.the_context, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, albedo.data, &error );
  objs::get()->get_opencl_error( error );
  normals_cl = clCreateFromGLTexture2D( objs::get()->the_compute_context.the_context, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, normals.data, &error );
  objs::get()->get_opencl_error( error );
  depth_cl = clCreateFromGLTexture2D( objs::get()->the_compute_context.the_context, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, depth.data, &error );
  objs::get()->get_opencl_error( error );

  global_ws = new size_t[2];
  global_ws[0] = ( size_t )w;
  global_ws[1] = ( size_t )h;

  result.create();
  result.valid = true;
  result.bind();
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
  glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
  glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA16F, ( GLsizei )w, ( GLsizei )h, 0, GL_RGBA, GL_FLOAT, 0 ); //might wanna consider using a GL_RG16F and a GL_R16F so the last 16 bits aren't wasted
  result.width = w;
  result.height = h;

  result_cl = clCreateFromGLTexture2D( objs::get()->the_compute_context.the_context, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, result.data, &error );
  objs::get()->get_opencl_error( error );

  objs::get()->compute_shader_loader.get_compute_shader( compute_light_shader )->set_kernel_arg( "main", 0, sizeof( cl_mem ), &albedo_cl );
  objs::get()->get_opencl_error( error );
  //UNCOMMENT for more than one image2d_t
  /*objs::get()->compute_shader_loader.get_compute_shader( compute_light_shader )->set_kernel_arg( "main", 0, sizeof( cl_mem ), &normals_cl );
  objs::get()->get_opencl_error( error );*/
  /*objs::get()->compute_shader_loader.get_compute_shader( compute_light_shader )->set_kernel_arg( "main", 1, sizeof( cl_mem ), &depth_cl );
  objs::get()->get_opencl_error( error );*/
  objs::get()->compute_shader_loader.get_compute_shader( compute_light_shader )->set_kernel_arg( "main", 1, sizeof( cl_mem ), &result_cl );
  objs::get()->get_opencl_error( error );

  objs::get()->get_opengl_error();
}

void deferred::start()
{
  fbo.bind();
}

void deferred::end()
{
  fbo.unbind();
}

void deferred::render()
{
  glFinish();

  cl_int error;

  error = clEnqueueAcquireGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &albedo_cl, 0, 0, 0 );
  objs::get()->get_opencl_error( error );
  //UNCOMMENT for more than one image2d_t
  /*error = clEnqueueAcquireGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &normals_cl, 0, 0, 0 );
  objs::get()->get_opencl_error( error );*/
  /*error = clEnqueueAcquireGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &depth_cl, 0, 0, 0 );
  objs::get()->get_opencl_error( error );*/
  error = clEnqueueAcquireGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &result_cl, 0, 0, 0 );
  objs::get()->get_opencl_error( error );

  //draw the image
  objs::get()->compute_shader_loader.get_compute_shader( compute_light_shader )->execute( "main", 2, global_ws, 0 );

  error = clEnqueueReleaseGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &albedo_cl, 0, 0, 0 );
  objs::get()->get_opencl_error( error );
  //UNCOMMENT for more than one image2d_t
  /*error = clEnqueueReleaseGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &normals_cl, 0, 0, 0 );
  objs::get()->get_opencl_error( error );*/
  /*error = clEnqueueReleaseGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &depth_cl, 0, 0, 0 );
  objs::get()->get_opencl_error( error );*/
  error = clEnqueueReleaseGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &result_cl, 0, 0, 0 );
  objs::get()->get_opencl_error( error );

  error = clFinish( objs::get()->the_compute_context.the_command_queue );
  objs::get()->get_opencl_error( error );

  //lets show what opencl has created
  glDisable( GL_DEPTH_TEST );
  objs::get()->shader_loader.get_shader( fs_quad )->bind();
  result.bind();

  objs::get()->shader_loader.get_shader( fs_quad )->pass_m4x4( objs::get()->ppl.get_projection_matrix(), "m4_p" );
  objs::get()->shader_loader.get_shader( fs_quad )->pass_m4x4( objs::get()->ppl.get_model_view_matrix(), "m4_mv" );
  objs::get()->shader_loader.get_shader( fs_quad )->pass_int( 0, "texture0" );

  quad.render();

  objs::get()->shader_loader.get_shader( fs_quad )->unbind();
  glEnable( GL_DEPTH_TEST );
}

the kernel code:

__constant sampler_t the_sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

__kernel void main( __read_only image2d_t albedo, //diffuse surface color from the g-buffer
                    //__read_only image2d_t normals, //normals encoded using spheremap encoding
                    //__read_only image2d_t depth, //linear depth
                    __write_only image2d_t result )//the output buffer that stores lighting data
{
  /*
   * Per pixel calculations (global)
   */

  int2 coords = ( int2 )( get_global_id( 0 ), get_global_id( 1 ) );
  float4 raw_albedo = read_imagef( albedo, the_sampler, coords );
  //float4 raw_normal = read_imagef( normals, the_sampler, coords ); //this will store the decoded normals

  float4 out_color = ( float4 )( 0.0f ); //this will store the resulting color
  out_color.w = 1.0f;

  //float4 raw_depth = read_imagef( depth, the_sampler, coords ); //this will store the decoded view space position
  
  out_color = raw_albedo;
  
  write_imagef( result, coords, out_color );
}