Problem with clCreateFromGLTexture2D

I try it in this way: ( Here the most important code)

Code for the texture


...
glGenTextures(1, &textur);	
glBindTexture(GL_TEXTURE_2D, textur);
glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);

glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );	
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, BMP.bmWidth,  BMP.bmHeight, 0, GL_BGR_EXT, GL_UNSIGNED_BYTE, BMP.bmBits);

Code to get the context (Windows)


cl_context_properties props[] = { CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),  CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC(), CL_CONTEXT_PLATFORM, (cl_context_properties) selectedPlatformID, 0};

cl_context  context = clCreateContext(properties,1, &selectedDeviceID, NULL, NULL, &err);

Code to get the texture object from OpenGL to OpenCL


imageOutObject = createFromGLTexture2D(context, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D,0,textur); 

Code in the main loop



size_t localWorkSize[2] = { 8, 8 };		
unsigned int workGroupSize = 64;		

size_t globalWorkSize[2] =  {width, height};
		
clSetKernelArg( kernel, 0, sizeof( imageOutObject ), &imageOutObject );
				
clEnqueueAcquireGLObjects(commands,1,&imageOutObject,0,NULL,NULL);
					
clEnqueueNDRangeKernel(commands, kernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);

clEnqueueReleaseGLObjects(commands,1,&imageOutObject,0,NULL,NULL);
clFinish(commands);


glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glLoadIdentity();					
glBindTexture(GL_TEXTURE_2D, textur);

glBegin(GL_QUADS);
		
     glTexCoord2f(0.0f, 0.0f); glVertex3f(-1.0f, -1.0f,  1.0f);
     glTexCoord2f(1.0f, 0.0f); glVertex3f( 1.0f, -1.0f,  1.0f);
     glTexCoord2f(1.0f, 1.0f); glVertex3f( 1.0f,  1.0f,  1.0f);
     glTexCoord2f(0.0f, 1.0f); glVertex3f(-1.0f,  1.0f,  1.0f);
glEnd();

glFinish();
SwapBuffers(hDC);	 

The kernel

__kernel void kernel1(write_only image2d_t output)       
   {                                                                                                   
       int2 coordi = (int2)( get_global_id(0), get_global_i(1) );                                          
       float4 color;                                                                                   
       color = (float4)(1.0,0.0,0.0,1.0);    //set new color
       write_imagef( output,coordi , color );                                            
   }  

I use a picture with many different colors for the texture and it is shown on the screen.
But changing the color with the kernel does not happen.

I get error code -5 at
clEnqueueAcquireGLObjects
and
clEnqueueReleaseGLObjects

But this error is not defined!!!
No error shown here:
http://www.khronos.org/registry/cl/sdk/ … jects.html

http://www.khronos.org/registry/cl/sdk/ … jects.html

matches with my error code -5

No matter what I write in the kernel, nothing happens to the texture.

(Sorry for not perfect english)

What is wrong in my program?

Error -5 is EIO, which doesn’t mean much apart from ‘it failed’. If anything fails: you can’t expect it to work.

I would guess that the problem is the texture format:

glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, BMP.bmWidth, BMP.bmHeight, 0, GL_BGR_EXT, GL_UNSIGNED_BYTE, BMP.bmBits);

Only a limited number of formats are guaranteed to inter-operate. See The OpenCL specification 1.1, section 9.8.3.1 “List of OpenGL and corresponding OpenCL Image Formats”, and if you’re using opencl 1.0 the list is even smaller.

I’ve used GL_RGBA8 and GL_RGBA successfully in the past, e.g.:

glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, BMP.bmWidth, BMP.bmHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);

(there’s no need to copy a bitmap from the host if you’re writing to it from device?)

This will equate to the opencl image format CL_RGBA with data type CL_UNORM_INT8

Now I use this for making the texture.


FREE_IMAGE_FORMAT format = FreeImage_GetFileType(pictureFile, 0);
	FIBITMAP* image = FreeImage_Load(format, pictureFile);

	FIBITMAP* temp = image;
	image = FreeImage_ConvertTo32Bits(image);
	FreeImage_Unload(temp);

	textureWidth = FreeImage_GetWidth(image);
	textureHeight = FreeImage_GetHeight(image);

	char *buffer = new char[textureWidth * textureHeight * 4];
	memcpy(buffer, FreeImage_GetBits(image), textureWidth * textureHeight * 4);

	FreeImage_Unload(image);

	glGenTextures(1, &myTexture);	
	glBindTexture(GL_TEXTURE_2D, myTexture);
	glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);

	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );	

	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, textureWidth,  textureHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, buffer);

And this is my main Loop


void Render(void)
{
	
	//Der OpenCL Anteil
	//The OpenCL Part
	size_t localWorkSize[2] = { 8, 8 };
	unsigned int workGroupSize = 64;		

	size_t globalWorkSize[2] =  {textureWidth, textureHeight};

	err = clEnqueueAcquireGLObjects(commands,1,&imageOutObject,0,NULL,NULL);
	if(err != CL_SUCCESS){ clEnqueueAcquireGLObjectsMessage(err);}
	f << "2" << std::endl;

	err = clEnqueueNDRangeKernel(commands, kernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
	if(err != CL_SUCCESS){clEnqueueNDRangeKernelMessage(err);}
	f << "3" << std::endl;

	err = clEnqueueReleaseGLObjects(commands,1,&imageOutObject,0,NULL,NULL);
	if(err != CL_SUCCESS) {clEnqueueReleaseGLObjectsMessage(err);}
	f << "4" << std::endl;

	clFinish(commands);
	f << "5" << std::endl;


	//Der OpenGL Anteil
	//The OpenGL Part
	glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); 
	glMatrixMode(GL_PROJECTION);
	glLoadIdentity();
	gluPerspective(60.0, 1.0, 1.0, 200.0);     

	glMatrixMode(GL_MODELVIEW);
	glLoadIdentity();


	glTranslatef (0.0, 0.0 ,-4.0);   
	glRotatef (rotX, 1.0, 0.0, 0.0);   
	glRotatef (rotY, 0.0, 1.0, 0.0);  
	glTranslatef (-0.5, -0.5 , -0.5); 

	glBindTexture(GL_TEXTURE_2D, myTexture);

	glBegin (GL_QUADS);
	glTexCoord2f(0.0f, 0.0f); glVertex3f(-1.0f, -1.0f,  1.0f);
	glTexCoord2f(1.0f, 0.0f); glVertex3f( 1.0f, -1.0f,  1.0f);
	glTexCoord2f(1.0f, 1.0f); glVertex3f( 1.0f,  1.0f,  1.0f);
	glTexCoord2f(0.0f, 1.0f); glVertex3f(-1.0f,  1.0f,  1.0f);
	glEnd ();


	
	glutSwapBuffers();
}

I get a crash but no error codes in the main loop at

clEnqueueNDRangeKernel(commands, kernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);

Here are all information I get with my program:

Platform index: 1
Platform number: 00765598
Platform Name: Intel(R) OpenCL
Platform Profile: FULL_PROFILE
Platform Version: OpenCL 1.1 WINDOWS
Platform Vedndor: Intel(R) Corporation
Platform Extensions: cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_gl_sharing cl_khr_byte_addressable_store cl_intel_printf cl_ext_device_fission cl_khr_icd

Device index: 1
Device number: 05841908
Device Address Bits: 32
Device Available: true
Device Compiler Available: true
Double precision floating-point capability (denorms are supported): true
Double precision floating-point capability (INF and NaNs are supported): true
Double precision floating-point capability (round to nearest even rounding mode supported): true
Double precision floating-point capability (round to zero rounding mode supported): true
Double precision floating-point capability (round to +ve and -ve infinity rounding modes supported): true
Double precision floating-point capability (IEEE754-2008 fused multiply-add is supported): true
Is true if the OpenCL device is a little endian device and false otherwise: true
Is true if the device implements error correction for the memories, caches, registers etc. in the device: false
Describes the execution capabilities of the device(CL_EXEC_KERNEL): true
Describes the execution capabilities of the device(CL_EXEC_NATIVE_KERNEL ): false
list of device extension names: cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing cl_nv_d3d9_sharing cl_nv_d3d10_sharing cl_khr_d3d10_sharing cl_nv_d3d11_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64
Size of global memory cache in bytes: 131072
cl_device_mem_cache_type (CL_NONE): false
cl_device_mem_cache_type (CL_READ_ONLY_CACHE): false
cl_device_mem_cache_type (CL_READ_WRITE_CACHE): true
Size of global memory cache line in bytes: 128
Size of global device memory in bytes: 1041694720
Half precision floating-point capability (denorms are supported): false
Half precision floating-point capability (INF and NaNs are supported): false
Half precision floating-point capability (round to nearest even rounding mode supported): false
Half precision floating-point capability (round to zero rounding mode supported): false
Half precision floating-point capability (round to +ve and -ve infinity rounding modes supported): false
Half precision floating-point capability (IEEE754-2008 fused multiply-add is supported): false
Is true if images are supported by the OpenCL device and false otherwise: true
Size of global device memory in bytes: 1041694720
Max height of 2D image in pixels: 16384
Max width of 2D image in pixels: 16384
Max depth of 3D image in pixels: 2048
Max height of 3D image in pixels: 2048
Max width of 3D image in pixels: 2048
Size of local memory arena in bytes: 49152
Type of local memory supported(CL_LOCAL): true
Type of local memory supported(CL_GLOBAL): false
Maximum configured clock frequency of the device in MHz: 1645
The number of parallel compute cores on the OpenCL device: 8
Max number of arguments declared with the __constant qualifier in a kernel: 9
Max size in bytes of a constant buffer allocation: 65536
Max size of memory object allocation in bytes: 260423680
Max size in bytes of the arguments that can be passed to a kernel: 4352
Max number of simultaneous image objects that can be read by a kernel: 128
Maximum number of samplers that can be used in a kernel: 16
Maximum number of work-items in a work-group executing a kernel using the data parallel execution model: 1024
Maximum dimensions that specify the global and local work-item IDs used by the data parallel execution model: 3
Maximum number of work-items that can be specified in each dimension of the work-group to clEnqueueNDRangeKernel: (1024, 1024, 64)
Max number of simultaneous image objects that can be written to by a kernel: 8
Describes the alignment in bits of the base address of any allocated memory object: 4096
The smallest alignment in bytes which can be used for any data type: 128
Device name string: GeForce GTX 560 Ti
The platform associated with this device: 05841860
Preferred native vector width size for built-in scalar types that can be put into vectors. Thevector width is defined as the number of scalar elements that can be stored in the vector (Char): 1
Preferred native vector width size for built-in scalar types that can be put into vectors. Thevector width is defined as the number of scalar elements that can be stored in the vector (Short): 1
Preferred native vector width size for built-in scalar types that can be put into vectors. Thevector width is defined as the number of scalar elements that can be stored in the vector (Int): 1
Preferred native vector width size for built-in scalar types that can be put into vectors. Thevector width is defined as the number of scalar elements that can be stored in the vector (Long): 1
Preferred native vector width size for built-in scalar types that can be put into vectors. Thevector width is defined as the number of scalar elements that can be stored in the vector (Float): 1
Preferred native vector width size for built-in scalar types that can be put into vectors. Thevector width is defined as the number of scalar elements that can be stored in the vector (Double): 1
Returns the profile name supported by the device: FULL_PROFILE
Describes the resolution of device timer. This is measured in nanoseconds: 1000
Describes the command-queue properties supported by the device (CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE): true
Describes the command-queue properties supported by the device (CL_QUEUE_PROFILING_ENABLE): true
Describes single precision floating-point capability of the device (CL_FP_DENORM): true
Describes single precision floating-point capability of the device (CL_FP_INF_NAN ): true
Describes single precision floating-point capability of the device (CL_FP_ROUND_TO_NEAREST ): true
Describes single precision floating-point capability of the device (CL_FP_ROUND_TO_ZERO ): true
Describes single precision floating-point capability of the device (CL_FP_ROUND_TO_INF ): true
Describes single precision floating-point capability of the device (CL_FP_FMA): true
The OpenCL device type (CL_DEVICE_TYPE_CPU): false
The OpenCL device type (CL_DEVICE_TYPE_GPU): true
The OpenCL device type (CL_DEVICE_TYPE_ACCELERATOR): false
The OpenCL device type (CL_DEVICE_TYPE_DEFAULT): false
Vendor name string: NVIDIA Corporation
A unique device vendor identifier: 4318
OpenCL version string: OpenCL 1.1 CUDA
OpenCL software driver version string in the form major_number.minor_number: 280.26

imageOutObject
Actual size of memobj in bytes: 0
Map count. The map count returned should be considered immediately stale: 0
Return memobj reference count: 1
Created with clCreateImage2D
Context: 058419D8

The only problem I found is "Actual size of memobj in bytes: 0 " (I think memoy of 0 is not good) from this code:


//Erzeuge das Image-Objet aus der OpenGL-Textur
	//Get an image Object from the OpenGL texture
	imageOutObject= clCreateFromGLTexture2D( context, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D,0, myTexture,  &err);
	if (err != CL_SUCCESS)
	{
		clCreateFromGLTexture2DMessage(err);			
	}
	//Informationen über das gewonnene OpenCL-Texturobject ausgeben
	//Show Information about the OpenCL image
	clGetMEMMessages(imageOutObject, "imageOutObject");

void clGetMEMMessages(cl_mem memobj, char* anmerkung)
{
	cl_int err;


	size_t memSizeInfo;
	cl_uint mapCount;
	cl_uint memReferenceCount;
	cl_mem_object_type memType;
	cl_context memContext;

	err = clGetMemObjectInfo(memobj, CL_MEM_TYPE, sizeof(cl_uint), &memType, NULL);
	err = clGetMemObjectInfo(memobj, CL_MEM_SIZE,sizeof(size_t), &memSizeInfo, NULL);
	err = clGetMemObjectInfo(memobj, CL_MEM_MAP_COUNT,sizeof(cl_uint), &mapCount, NULL);
	err = clGetMemObjectInfo(memobj, CL_MEM_REFERENCE_COUNT, sizeof(cl_uint), &memReferenceCount, NULL);
	err = clGetMemObjectInfo(memobj, CL_MEM_CONTEXT, sizeof(cl_uint), &memContext, NULL);


	f << anmerkung << std::endl;
	f << "Actual size of memobj in bytes: " << memSizeInfo << std::endl;
	f << "Map count. The map count returned should be considered immediately stale: " << mapCount << std::endl;
	f << "Return memobj reference count: " << memReferenceCount << std::endl;
	if (memType == CL_MEM_OBJECT_BUFFER ) f << "Created with clCreateBuffer" << std::endl;
	if (memType == CL_MEM_OBJECT_IMAGE2D ) f << "Created with clCreateImage2D" << std::endl;
	if (memType == CL_MEM_OBJECT_IMAGE3D ) f << "Created with clCreateImage3D" << std::endl; 
	f << "Context: " << memContext << std::endl << std::endl;
}

Please help me.

correction I did not copy all information:

Platform index: 0
Platform number: 04DD1860
Platform Name: NVIDIA CUDA
Platform Profile: FULL_PROFILE
Platform Version: OpenCL 1.1 CUDA 4.0.1
Platform Vedndor: NVIDIA Corporation
Platform Extensions: cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing cl_nv_d3d9_sharing cl_nv_d3d10_sharing cl_khr_d3d10_sharing cl_nv_d3d11_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll

Platform index: 1
Platform number: 01204F98
Platform Name: Intel(R) OpenCL
Platform Profile: FULL_PROFILE
Platform Version: OpenCL 1.1 WINDOWS
Platform Vedndor: Intel(R) Corporation
Platform Extensions: cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_gl_sharing cl_khr_byte_addressable_store cl_intel_printf cl_ext_device_fission cl_khr_icd

Device index: 0
Device number: 04DD1908
Device Address Bits: 32
Device Available: true
Device Compiler Available: true
Double precision floating-point capability (denorms are supported): true
Double precision floating-point capability (INF and NaNs are supported): true
Double precision floating-point capability (round to nearest even rounding mode supported): true
Double precision floating-point capability (round to zero rounding mode supported): true
Double precision floating-point capability (round to +ve and -ve infinity rounding modes supported): true
Double precision floating-point capability (IEEE754-2008 fused multiply-add is supported): true
Is true if the OpenCL device is a little endian device and false otherwise: true
Is true if the device implements error correction for the memories, caches, registers etc. in the device: false
Describes the execution capabilities of the device(CL_EXEC_KERNEL): true
Describes the execution capabilities of the device(CL_EXEC_NATIVE_KERNEL ): false
list of device extension names: cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing cl_nv_d3d9_sharing cl_nv_d3d10_sharing cl_khr_d3d10_sharing cl_nv_d3d11_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64
Size of global memory cache in bytes: 131072
cl_device_mem_cache_type (CL_NONE): false
cl_device_mem_cache_type (CL_READ_ONLY_CACHE): false
cl_device_mem_cache_type (CL_READ_WRITE_CACHE): true
Size of global memory cache line in bytes: 128
Size of global device memory in bytes: 1041694720
Half precision floating-point capability (denorms are supported): false
Half precision floating-point capability (INF and NaNs are supported): false
Half precision floating-point capability (round to nearest even rounding mode supported): true
Half precision floating-point capability (round to zero rounding mode supported): true
Half precision floating-point capability (round to +ve and -ve infinity rounding modes supported): false
Half precision floating-point capability (IEEE754-2008 fused multiply-add is supported): false
Is true if images are supported by the OpenCL device and false otherwise: true
Size of global device memory in bytes: 1041694720
Max height of 2D image in pixels: 16384
Max width of 2D image in pixels: 16384
Max depth of 3D image in pixels: 2048
Max height of 3D image in pixels: 2048
Max width of 3D image in pixels: 2048
Size of local memory arena in bytes: 49152
Type of local memory supported(CL_LOCAL): true
Type of local memory supported(CL_GLOBAL): false
Maximum configured clock frequency of the device in MHz: 1645
The number of parallel compute cores on the OpenCL device: 8
Max number of arguments declared with the __constant qualifier in a kernel: 9
Max size in bytes of a constant buffer allocation: 65536
Max size of memory object allocation in bytes: 260423680
Max size in bytes of the arguments that can be passed to a kernel: 4352
Max number of simultaneous image objects that can be read by a kernel: 128
Maximum number of samplers that can be used in a kernel: 16
Maximum number of work-items in a work-group executing a kernel using the data parallel execution model: 1024
Maximum dimensions that specify the global and local work-item IDs used by the data parallel execution model: 3
Maximum number of work-items that can be specified in each dimension of the work-group to clEnqueueNDRangeKernel: (1024, 1024, 64)
Max number of simultaneous image objects that can be written to by a kernel: 8
Describes the alignment in bits of the base address of any allocated memory object: 4096
The smallest alignment in bytes which can be used for any data type: 128
Device name string: GeForce GTX 560 Ti
The platform associated with this device: 04DD1860
Preferred native vector width size for built-in scalar types that can be put into vectors. Thevector width is defined as the number of scalar elements that can be stored in the vector (Char): 1
Preferred native vector width size for built-in scalar types that can be put into vectors. Thevector width is defined as the number of scalar elements that can be stored in the vector (Short): 1
Preferred native vector width size for built-in scalar types that can be put into vectors. Thevector width is defined as the number of scalar elements that can be stored in the vector (Int): 1
Preferred native vector width size for built-in scalar types that can be put into vectors. Thevector width is defined as the number of scalar elements that can be stored in the vector (Long): 1
Preferred native vector width size for built-in scalar types that can be put into vectors. Thevector width is defined as the number of scalar elements that can be stored in the vector (Float): 1
Preferred native vector width size for built-in scalar types that can be put into vectors. Thevector width is defined as the number of scalar elements that can be stored in the vector (Double): 1
Returns the profile name supported by the device: FULL_PROFILE
Describes the resolution of device timer. This is measured in nanoseconds: 1000
Describes the command-queue properties supported by the device (CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE): true
Describes the command-queue properties supported by the device (CL_QUEUE_PROFILING_ENABLE): true
Describes single precision floating-point capability of the device (CL_FP_DENORM): true
Describes single precision floating-point capability of the device (CL_FP_INF_NAN ): true
Describes single precision floating-point capability of the device (CL_FP_ROUND_TO_NEAREST ): true
Describes single precision floating-point capability of the device (CL_FP_ROUND_TO_ZERO ): true
Describes single precision floating-point capability of the device (CL_FP_ROUND_TO_INF ): true
Describes single precision floating-point capability of the device (CL_FP_FMA): true
The OpenCL device type (CL_DEVICE_TYPE_CPU): false
The OpenCL device type (CL_DEVICE_TYPE_GPU): true
The OpenCL device type (CL_DEVICE_TYPE_ACCELERATOR): false
The OpenCL device type (CL_DEVICE_TYPE_DEFAULT): false
Vendor name string: NVIDIA Corporation
A unique device vendor identifier: 4318
OpenCL version string: OpenCL 1.1 CUDA
OpenCL software driver version string in the form major_number.minor_number: 280.26

imageOutObject
Actual size of memobj in bytes: 0
Map count. The map count returned should be considered immediately stale: 0
Return memobj reference count: 1
Created with clCreateImage2D
Context: 04DD19D8

I tryed it with many code from the internet, which people say it will work.

In every example I get from “clCreateFromGLTexture2D()” a cl_mem object with:
Actual size of memobj in bytes: 0
(But no error message from the fuction itself)

And a error in “clEnqueueAcquireGLObjects()” at using the created mem object.

Is it possible that the function “clCreateFromGLTexture2D()” is implemented in nvidia driver as a dummy function? (I use the latest Nvidia driver for windows 7 64 Bit)

Is it possible that the function “clCreateFromGLTexture2D()” is implemented in nvidia driver as a dummy function? (I use the latest Nvidia driver for windows 7 64 Bit)

No, it works. I’ve tested both on that and linux with `no problems’. Although I had a lot of hassle getting it to work and abandoned it later anyway.

Can you please post code of a (simple) working example for Windows?
(My one and all I found do not work)

I might be able to find one, but it’s Java, so if that’s no use to you there isn’t much point.

Hi,

It is difficult to use a java program to find the correct way to make a C program, so
I have made my (not working) program as simpe as possible.



#include <windows.h>
#include <gl/gl.h>
#include <GL/glut.h>
#include <CL/cl.h>
#include <CL/cl_gl.h>
#include <iostream>

cl_int err;
cl_uint selectedPlatform;
cl_device_id selectedDeviceID; 
cl_context  context;
cl_command_queue commands;
cl_program program;
cl_kernel kernel;
cl_mem imageOutObject;

GLuint myTexture;
GLuint textureWidth;
GLuint textureHeight;

void InitGL(void);
void InitCL(void);

void Render(void);
void Resize(int width, int height);
void SpecialKey(int key, int x, int y);  

int wnd_width  = 800;
int wnd_height = 600; 

float rotX=0;
float rotY=0;

int main(int argc, char **argv)
{
	//initialise OpenGL
	glutInit(&argc, argv);              
	glutInitDisplayMode(  GLUT_DOUBLE | GLUT_DEPTH  | GLUT_RGB);    
	glutInitWindowSize(wnd_width,wnd_height);           

	glutCreateWindow("OpenGL-CL interraction!");        
	InitGL(); 
	InitCL(); 

	glutDisplayFunc(&Render); 
	glutReshapeFunc(&Resize); 
	glutSpecialFunc(&SpecialKey);
	glutMainLoop();             
}
void InitGL(void)
{
	glClearColor(0.0,0.0,0.0,0.0);          
	glEnable(GL_DEPTH_TEST);
	glEnable(GL_TEXTURE_2D);	
	 
	//make OpenGL texture
	textureWidth = 256;
	textureHeight = 256;
	char *buffer = new char[textureWidth * textureHeight * 4];
	for (unsigned int i = 0; i < textureWidth * textureHeight * 4; i++)
	{
		buffer[i] = (char)255;	// RGBA = (255,255,255,255) = white
	}
	
	glGenTextures(1, &myTexture);	
	glBindTexture(GL_TEXTURE_2D, myTexture);
	glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);

	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );	
	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, textureWidth,  textureHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
	glBindTexture(GL_TEXTURE_2D, 0);
}


void InitCL(void)
{
	//Get all Platforms and select a GPU one
	cl_uint numPlatforms;
	clGetPlatformIDs (65536, NULL, &numPlatforms); 
	std::cout << "Platforms detected: " << numPlatforms << std::endl;

	cl_platform_id* platformIDs;
	platformIDs = new cl_platform_id[numPlatforms];

	err = clGetPlatformIDs(numPlatforms, platformIDs, NULL);
	if(err != CL_SUCCESS)
	{
		std::cout << "error at clGetPlatformIDs :" << err << std::endl;

	}

	selectedPlatform=0;  // simply take first platform(index 0), code for taking correct one is long and not postet here

	cl_platform_id selectedPlatformID = platformIDs[selectedPlatform];
	delete[] platformIDs; 	

	
	//Select a GPU device
	err = clGetDeviceIDs(selectedPlatformID, CL_DEVICE_TYPE_GPU, 1, &selectedDeviceID, NULL);
	if(err != CL_SUCCESS)
	{
		std::cout << "error at clGetDeviceIDs :" << err << std::endl;		
	}
	
	char cDeviceNameBuffer[1024];
	clGetDeviceInfo (selectedDeviceID, CL_DEVICE_NAME, sizeof(char) *  1024, cDeviceNameBuffer, NULL);
	std::cout  <<": Device Name: "		<< cDeviceNameBuffer << std::endl; 
	std::cout << std::endl;

	//Get a context with OpenGL connection
	cl_context_properties props[] = { CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),  CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC(), CL_CONTEXT_PLATFORM, (cl_context_properties) selectedPlatformID, 0};

	context = clCreateContext(props,1, &selectedDeviceID, NULL, NULL, &err);
	if(!context || err!= CL_SUCCESS)
	{
		std::cout << "error at clCreateContext :" << err << std::endl;	

	}

	//create a command queue
	commands = clCreateCommandQueue(context, selectedDeviceID, 0,&err);
	if(!commands || err!= CL_SUCCESS)
	{
		std::cout << "error at clCreateCommandQueue :" << err << std::endl;	
	}

	//use  the kernel-source code to create a program
	char* kernelSource = "  \
    __kernel void kernel1(write_only image2d_t output)            
\
   {															                 
\
	   int2 coordi = (int2)( get_global_id(0), get_global_id(1) ); 
\
       float4 color;			                                     
\
       color = (float4)(1.0,0.0,0.0,1.0);		                   
\
       write_imagef( output,coordi , color );                     
\
   }";  

	int szKernelLength = strlen(kernelSource); 
	program = clCreateProgramWithSource(context, 1,(const char**)& kernelSource, NULL, &err);
	if (!program)
	{	
		std::cout << "error at clCreateProgramWithSource :" << err << std::endl;	
		if (kernelSource) {delete[] kernelSource;}
	}


	//Compile the kernel and get errors if exits
	err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
	if(err != CL_SUCCESS)
	{
		size_t len;
		char buffer[2048];

		std::cout << "error at clBuildProgram :" << err << std::endl;	
		clGetProgramBuildInfo(program, selectedDeviceID, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
		std::cout << "The kernel has bugs: "<< std::endl << buffer << std::endl;
	}

	//select the kernel name
	kernel = clCreateKernel(program, "kernel1", &err);
	if (!kernel || err != CL_SUCCESS)
	{
		std::cout << "error at  clCreateKernel :" << err << std::endl;	
	}

	//Get an image Object from the OpenGL texture
	imageOutObject= clCreateFromGLTexture2D( context, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D,0, myTexture,  &err);
	if (err != CL_SUCCESS)
	{
		std::cout << "error at  clCreateFromGLTexture2D:" << err << std::endl;	 
	}
}

// Interaction to make the program to draw the OpenGL window again
void SpecialKey(int key, int x, int y)
{
	switch (key) {
  case GLUT_KEY_UP:
	  rotX -= 5;
	  break;
  case GLUT_KEY_DOWN:
	  rotX += 5;
	  break;
  case GLUT_KEY_LEFT:
	  rotY -= 5;
	  break;
  case GLUT_KEY_RIGHT:
	  rotY += 5;
	  break;
  default:
	  return;
	}
	glutPostRedisplay();
}
void Resize(int width, int height)
{
	glViewport(0, 0, (GLint)width, (GLint)height); 
	wnd_width = width;
	wnd_height= height;
}
void Render(void)
{
	//The OpenCL Part
	size_t localWorkSize[2] = { 8, 8 };
	size_t globalWorkSize[2] =  {textureWidth, textureHeight};

	err = clSetKernelArg( kernel, 0, sizeof( imageOutObject ), &imageOutObject );
	if(err != CL_SUCCESS)
	{
		std::cout << "error at clSetKernelArg: " << err << std::endl;	 
	}
	
	err = clEnqueueAcquireGLObjects(commands,1,&imageOutObject,0,NULL,NULL);
	if(err != CL_SUCCESS)
	{
		std::cout << "error at clEnqueueAcquireGLObjects: " << err << std::endl;
	}
	
	err = clEnqueueNDRangeKernel(commands, kernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
	if(err != CL_SUCCESS)
	{
		std::cout << "error at clEnqueueNDRangeKernel: " << err << std::endl;
	}
	
	err = clEnqueueReleaseGLObjects(commands,1,&imageOutObject,0,NULL,NULL);
	if(err != CL_SUCCESS) 
	{
		std::cout << "error at clEnqueueReleaseGLObjects: " << err << std::endl;	
	}
	clFinish(commands);
	
	//The OpenGL Part (simply a quad with the texture)
	glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); 
	glMatrixMode(GL_PROJECTION);
	glLoadIdentity();
	gluPerspective(60.0, 1.0, 1.0, 200.0);     

	glMatrixMode(GL_MODELVIEW);
	glLoadIdentity();


	glTranslatef (0.0, 0.0 ,-4.0);   
	glRotatef (rotX, 1.0, 0.0, 0.0);   
	glRotatef (rotY, 0.0, 1.0, 0.0);  
	glTranslatef (-0.5, -0.5 , -0.5); 

	glBindTexture(GL_TEXTURE_2D, myTexture);

	glBegin (GL_QUADS);
	glTexCoord2f(0.0f, 0.0f); glVertex3f(-1.0f, -1.0f,  1.0f);
	glTexCoord2f(1.0f, 0.0f); glVertex3f( 1.0f, -1.0f,  1.0f);
	glTexCoord2f(1.0f, 1.0f); glVertex3f( 1.0f,  1.0f,  1.0f);
	glTexCoord2f(0.0f, 1.0f); glVertex3f(-1.0f,  1.0f,  1.0f);
	glEnd ();
	glBindTexture(GL_TEXTURE_2D, 0);

	glFinish();
	glutSwapBuffers();
}

The texture is set to white. The kernel should make it blue (GBRA mode).
But the texture is white on the screen.

Please help me to find the error.

GL_RGBA32F Works !!!

But only on AMD gpu not on my Nvidia :evil:

The code is not working on a geforce gtx 560 ti on windows-7 64 bit.

It is working on a geforce gtx 260 on windows-7 64 bit without any problems.

=> It is a driver or GPU bug

Some people write about same problems at the Nvidia forum.
All of them use a gtx 5xx GPU too.