Hi,
I’m trying to read in from more than 1 image2d_t object in my kernel. With one ojbect everything works fine, but with 2, my video card freezes. What am I doing wrong?
here’s the host code:
#include "deferred.h"
#include "../resources/generated_lights.h"
#include "objs.h"
void deferred::init()
{
objs::get()->shader_loader.load_shader( "shaders/deferred/texture.sc", &fs_quad );
objs::get()->compute_shader_loader.load_shader_control_file( "shaders/deferred/compute_light.sc", &compute_light_shader );
//two triangles, 6 indices
quad.faces = new unsigned int[6];
quad.faces[0] = 0;
quad.faces[1] = 1;
quad.faces[2] = 3;
quad.faces[3] = 3;
quad.faces[4] = 2;
quad.faces[5] = 0;
//4 vertices
quad.vertices = new float[12];
//store far clip plane corners' positions (lower left)
quad.vertices[3 * 0 + 0] = objs::get()->frm.far_ll.get()->v[0];
quad.vertices[3 * 0 + 1] = objs::get()->frm.far_ll.get()->v[1];
quad.vertices[3 * 0 + 2] = objs::get()->frm.far_ll.get()->v[2];
//lower right
quad.vertices[3 * 1 + 0] = objs::get()->frm.far_lr.get()->v[0];
quad.vertices[3 * 1 + 1] = objs::get()->frm.far_lr.get()->v[1];
quad.vertices[3 * 1 + 2] = objs::get()->frm.far_lr.get()->v[2];
//upper left
quad.vertices[3 * 2 + 0] = objs::get()->frm.far_ul.get()->v[0];
quad.vertices[3 * 2 + 1] = objs::get()->frm.far_ul.get()->v[1];
quad.vertices[3 * 2 + 2] = objs::get()->frm.far_ul.get()->v[2];
//upper right
quad.vertices[3 * 3 + 0] = objs::get()->frm.far_ur.get()->v[0];
quad.vertices[3 * 3 + 1] = objs::get()->frm.far_ur.get()->v[1];
quad.vertices[3 * 3 + 2] = objs::get()->frm.far_ur.get()->v[2];
//4 tex coords
quad.tex_coords = new float[8];
quad.tex_coords[2 * 0 + 0] = 0.0f;
quad.tex_coords[2 * 0 + 1] = 0.0f;
quad.tex_coords[2 * 1 + 0] = 1.0f;
quad.tex_coords[2 * 1 + 1] = 0.0f;
quad.tex_coords[2 * 2 + 0] = 0.0f;
quad.tex_coords[2 * 2 + 1] = 1.0f;
quad.tex_coords[2 * 3 + 0] = 1.0f;
quad.tex_coords[2 * 3 + 1] = 1.0f;
quad.faces_count = 6;
quad.vertices_count = 4;
quad.tex_coords_count = 4;
//upload it to the VGA
objs::get()->mesh_loader.upload_mesh_from_memory( &quad, fs_quad );
quad.valid = true;
unsigned int w = ( unsigned int )objs::get()->conf.SCREEN_WIDTH;
unsigned int h = ( unsigned int )objs::get()->conf.SCREEN_HEIGHT;
fbo.create();
fbo.bind();
GLenum modes[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1, GL_COLOR_ATTACHMENT2 };
glDrawBuffers( 3, modes );
albedo.create();
normals.create();
depth.create();
albedo.valid = true;
normals.valid = true;
depth.valid = true;
glActiveTexture( GL_TEXTURE5 );
albedo.bind();
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, ( GLsizei )w, ( GLsizei )h, 0, GL_RGBA, GL_FLOAT, 0 ); //diffuse G-Buffer component
albedo.width = w;
albedo.height = h;
glActiveTexture( GL_TEXTURE6 );
normals.bind();
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
glTexImage2D( GL_TEXTURE_2D, 0, GL_RG16F, ( GLsizei )w, ( GLsizei )h, 0, GL_RGBA, GL_FLOAT, 0 ); //normal G-Buffer component
normals.width = w;
normals.height = h;
glActiveTexture( GL_TEXTURE7 );
depth.bind();
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
glTexImage2D( GL_TEXTURE_2D, 0, GL_R32F, ( GLsizei )w, ( GLsizei )h, 0, GL_RGBA, GL_FLOAT, 0 ); //depth G-Buffer component, OpenCL doesn't take DEPTH_COMPONENT, YET!
depth.width = w;
depth.height = h;
glActiveTexture( GL_TEXTURE0 );
rbo.create();
rbo.bind();
rbo.set_storage_format( GL_DEPTH_COMPONENT, w, h );
rbo.width = w;
rbo.height = h;
rbo.attach_to_frame_buffer( GL_DEPTH_ATTACHMENT, &fbo );
albedo.attach_to_frame_buffer( GL_COLOR_ATTACHMENT0, &fbo );
normals.attach_to_frame_buffer( GL_COLOR_ATTACHMENT1, &fbo );
depth.attach_to_frame_buffer( GL_COLOR_ATTACHMENT2, &fbo );
fbo.check();
fbo.unbind();
/*
* Set up OpenCL
*/
cl_int error;
albedo_cl = clCreateFromGLTexture2D( objs::get()->the_compute_context.the_context, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, albedo.data, &error );
objs::get()->get_opencl_error( error );
normals_cl = clCreateFromGLTexture2D( objs::get()->the_compute_context.the_context, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, normals.data, &error );
objs::get()->get_opencl_error( error );
depth_cl = clCreateFromGLTexture2D( objs::get()->the_compute_context.the_context, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, depth.data, &error );
objs::get()->get_opencl_error( error );
global_ws = new size_t[2];
global_ws[0] = ( size_t )w;
global_ws[1] = ( size_t )h;
result.create();
result.valid = true;
result.bind();
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA16F, ( GLsizei )w, ( GLsizei )h, 0, GL_RGBA, GL_FLOAT, 0 ); //might wanna consider using a GL_RG16F and a GL_R16F so the last 16 bits aren't wasted
result.width = w;
result.height = h;
result_cl = clCreateFromGLTexture2D( objs::get()->the_compute_context.the_context, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, result.data, &error );
objs::get()->get_opencl_error( error );
objs::get()->compute_shader_loader.get_compute_shader( compute_light_shader )->set_kernel_arg( "main", 0, sizeof( cl_mem ), &albedo_cl );
objs::get()->get_opencl_error( error );
//UNCOMMENT for more than one image2d_t
/*objs::get()->compute_shader_loader.get_compute_shader( compute_light_shader )->set_kernel_arg( "main", 0, sizeof( cl_mem ), &normals_cl );
objs::get()->get_opencl_error( error );*/
/*objs::get()->compute_shader_loader.get_compute_shader( compute_light_shader )->set_kernel_arg( "main", 1, sizeof( cl_mem ), &depth_cl );
objs::get()->get_opencl_error( error );*/
objs::get()->compute_shader_loader.get_compute_shader( compute_light_shader )->set_kernel_arg( "main", 1, sizeof( cl_mem ), &result_cl );
objs::get()->get_opencl_error( error );
objs::get()->get_opengl_error();
}
void deferred::start()
{
fbo.bind();
}
void deferred::end()
{
fbo.unbind();
}
void deferred::render()
{
glFinish();
cl_int error;
error = clEnqueueAcquireGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &albedo_cl, 0, 0, 0 );
objs::get()->get_opencl_error( error );
//UNCOMMENT for more than one image2d_t
/*error = clEnqueueAcquireGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &normals_cl, 0, 0, 0 );
objs::get()->get_opencl_error( error );*/
/*error = clEnqueueAcquireGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &depth_cl, 0, 0, 0 );
objs::get()->get_opencl_error( error );*/
error = clEnqueueAcquireGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &result_cl, 0, 0, 0 );
objs::get()->get_opencl_error( error );
//draw the image
objs::get()->compute_shader_loader.get_compute_shader( compute_light_shader )->execute( "main", 2, global_ws, 0 );
error = clEnqueueReleaseGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &albedo_cl, 0, 0, 0 );
objs::get()->get_opencl_error( error );
//UNCOMMENT for more than one image2d_t
/*error = clEnqueueReleaseGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &normals_cl, 0, 0, 0 );
objs::get()->get_opencl_error( error );*/
/*error = clEnqueueReleaseGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &depth_cl, 0, 0, 0 );
objs::get()->get_opencl_error( error );*/
error = clEnqueueReleaseGLObjects( objs::get()->the_compute_context.the_command_queue, 1, &result_cl, 0, 0, 0 );
objs::get()->get_opencl_error( error );
error = clFinish( objs::get()->the_compute_context.the_command_queue );
objs::get()->get_opencl_error( error );
//lets show what opencl has created
glDisable( GL_DEPTH_TEST );
objs::get()->shader_loader.get_shader( fs_quad )->bind();
result.bind();
objs::get()->shader_loader.get_shader( fs_quad )->pass_m4x4( objs::get()->ppl.get_projection_matrix(), "m4_p" );
objs::get()->shader_loader.get_shader( fs_quad )->pass_m4x4( objs::get()->ppl.get_model_view_matrix(), "m4_mv" );
objs::get()->shader_loader.get_shader( fs_quad )->pass_int( 0, "texture0" );
quad.render();
objs::get()->shader_loader.get_shader( fs_quad )->unbind();
glEnable( GL_DEPTH_TEST );
}
the kernel code:
__constant sampler_t the_sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
__kernel void main( __read_only image2d_t albedo, //diffuse surface color from the g-buffer
//__read_only image2d_t normals, //normals encoded using spheremap encoding
//__read_only image2d_t depth, //linear depth
__write_only image2d_t result )//the output buffer that stores lighting data
{
/*
* Per pixel calculations (global)
*/
int2 coords = ( int2 )( get_global_id( 0 ), get_global_id( 1 ) );
float4 raw_albedo = read_imagef( albedo, the_sampler, coords );
//float4 raw_normal = read_imagef( normals, the_sampler, coords ); //this will store the decoded normals
float4 out_color = ( float4 )( 0.0f ); //this will store the resulting color
out_color.w = 1.0f;
//float4 raw_depth = read_imagef( depth, the_sampler, coords ); //this will store the decoded view space position
out_color = raw_albedo;
write_imagef( result, coords, out_color );
}