Hi everyone,
I’m trying to use the atom_cmpxchg (OpenCL version is 1.0, GPU is NVIDIA 9600M GT) function, but I cant manage to get the expected result: the swap dont happen.
I tried to find a example (my code is rather long, so I looked for something simpler), but I can’t make it work either (this code can be found on the internet, but I just dont remember the address right now):
The output result:
Old A = 500
New A 500
I may not have understood what this function is supposed to do…
Given the prototype of the function atom_cmpxchg (__global int *p, int cmp, int val): I want to swap the value at *p by val if and only if *p == cmp (store the old value of *p if *p!=cmp). Is that right? Or I am missing something?
Thanks for your help!
The kernel:
__kernel void atomiccmpxchg(__global int *old, __global int *new)
{
__local int v,v1;
v = 500;
v1=10;
*old = atom_cmpxchg(new,v,v1);
}
The host code:
#include <iostream>
#include <cstdlib>
#include <fstream>
#include <string>
#if defined __APPLE__ || defined (MACOSX)
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
using namespace std;
void err_check( int err, string err_code ) {
if ( err != CL_SUCCESS ) {
cout << "Error: " << err_code << "(" << err << ")" << endl;
exit(-1);
}
}
int main()
{
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem mobj_a = NULL;
cl_mem mobj_b = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int err;
int a, b;
a = 500;
b = 500;
// Get platform/device information
err = clGetPlatformIDs( 1, &platform_id, &ret_num_platforms );
err_check( err, "clGetPlatformIDs" );
// Get information about the device
err = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices );
err_check( err, "clGetDeviceIDs" );
// Create OpenCL Context
context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &err );
err_check( err, "clCreateContext" );
// Create Command Queue
command_queue = clCreateCommandQueue( context, device_id, CL_QUEUE_PROFILING_ENABLE, &err );
err_check( err, "clCreateCommandQueue" );
// Create memory objects and tranfer the data to memory buffer
mobj_a = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof(int), NULL, &err );
err = clEnqueueWriteBuffer( command_queue, mobj_a, CL_TRUE, 0, sizeof(int), &a, 0, NULL, NULL );
err_check( err, "clEnqueueWriteBuffer" );
mobj_b = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof(int), NULL, &err );
err = clEnqueueWriteBuffer( command_queue, mobj_b, CL_TRUE, 0, sizeof(int), &b, 0, NULL, NULL );
err_check( err, "clEnqueueWriteBuffer" );
// Read kernel file
ifstream file("atomic_cmpxchg.cl");
string prog( istreambuf_iterator<char>( file ), ( istreambuf_iterator<char>() ) );
const char *source_str = prog.c_str();
// Create Kernel program from the read in source
program = clCreateProgramWithSource( context, 1, (const char **) &source_str, 0, &err );
err_check( err, "clCreateProgramWithSource" );
// Build Kernel Program
err = clBuildProgram( program, 1, &device_id, NULL, NULL, NULL );
size_t len;
char buffer[2048];
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
sizeof(buffer), buffer, &len);
printf("--- Build log ---
%s
", buffer);
err_check( err, "clBuildProgram" );
// Create OpenCL Kernel
kernel = clCreateKernel( program, "atomiccmpxchg", &err );
err_check( err, "clCreateKernel" );
// Set OpenCL kernel argument
err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void *) &mobj_a );
err_check( err, "clSetKernelArg" );
err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *) &mobj_b );
err_check( err, "clSetKernelArg" );
// Execute OpenCL kernel in task parallel
clEnqueueTask( command_queue, kernel, 0, NULL, NULL );
err_check( err, "clEnqueueTask" );
// Read (Transfer result) from the memory buffer
err = clEnqueueReadBuffer( command_queue, mobj_a, CL_TRUE, 0, sizeof(int), &a, 0, NULL, NULL );
err = clEnqueueReadBuffer( command_queue, mobj_b, CL_TRUE, 0, sizeof(int), &b, 0, NULL, NULL );
// Free objects
err = clFlush( command_queue );
err = clFinish( command_queue );
err = clReleaseKernel( kernel );
err = clReleaseProgram( program );
err = clReleaseMemObject( mobj_a );
err = clReleaseMemObject( mobj_b );
err = clReleaseCommandQueue( command_queue );
err = clReleaseContext( context );
// Display result
cout << " Old A = " << a << endl;
cout << " New A " << b << endl;
return 0;
}