Crash after trying recreate buffers

Hello everyone!
I am beginner in OpenCL, and i’m testing to use it in simple Autodesk Maya plugin.
Plugin implements command that pushes vertices along their normals.
After some number of executions (often after first), Maya crushes at this point

bufA = clCreateBuffer(context,  CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,  problemSize, clf3points, &err);

I suppose that i’m not correctly recreate buffers.

Here is the full code

Thanks for your help! :slight_smile:


#define __NO_STD_VECTOR // Use cl::vector instead of STL version
#define __CL_ENABLE_EXCEPTIONS
#include <maya/MIOStream.h>
#include <maya/MSimple.h>
#include <maya/MGlobal.h>
#include <maya/MSelectionList.h>
#include <maya/MPlug.h>
#include <maya/MDagPath.h>
#include <maya/MPointArray.h>
#include <maya/MFloatPointArray.h>
#include <maya/MFnMesh.h>

#include <maya/MFnMesh.h>
#include <maya/MFnDependencyNode.h>

#include <maya/MItMeshVertex.h>
#include <maya/MItSelectionList.h>

#include <utility>
#include <iostream>
#include <fstream>
#include <string>
#include <time.h>

#include "CL/cl_platform.h"
#include "CL/cl.h"
#include "maya/MVectorArray.h"



#define MCheckStatus(status, message) \
	if (MStatus::kFailure == status) { \
	MGlobal::displayError(message); \
	return MStatus::kFailure; \
	}

class testCl: public MPxCommand
{
private:
	cl_float4* mclPushAlongNormals(cl_command_queue commands, cl_context context ,cl_kernel kernel, ::size_t local, cl_float4* clf3points, cl_float4* clf3normals, cl_float clfOffset, long count, MStatus &status);
	static std::string loadClSource(std::string filename);
	void mPushAlongNormals(MDagPath& dagPath, float offset, MStatus& status);

public:

	static cl_device_id device_id;             // compute device id
	static cl_context context;                 // compute context
	static cl_command_queue commands;          // compute command queue
	static cl_program program;                 // compute program
	static cl_kernel kernel;                   // compute kernel
	static cl_uint platforms_number;
	static cl_platform_id platform;
	static cl_int err;

	static ::size_t local;                       // local domain size for our calculation

	testCl();
	virtual ~testCl();

	virtual MStatus doIt( const MArgList& argList );
	
	static void* creator();
	static MStatus initCL();
	static MStatus freeCL();

};

cl_device_id testCl::device_id;             // compute device id
cl_context testCl::context;                 // compute context
cl_command_queue testCl::commands;          // compute command queue
cl_program testCl::program;                 // compute program
cl_kernel testCl::kernel;                   // compute kernel
cl_uint testCl::platforms_number;
cl_platform_id testCl::platform;
cl_int testCl::err;

::size_t testCl::local;  

testCl::testCl(){};
testCl::~testCl(){};

MStatus testCl::initCL()
{
	MStatus status = MStatus::kSuccess;
	platform = NULL;
	err = 0;
	//Initialization 
	//////////////////////////////////////////////////////////////////////////


	err = clGetPlatformIDs(0, NULL, &platforms_number);
	if (err != CL_SUCCESS)
	{
		std::cout << "Error: Failed to get platforms " << err << std::endl;
		status = MStatus::kFailure;
	}
	else
	{
		cl_platform_id* platforms = (cl_platform_id *)malloc(platforms_number*sizeof(cl_platform_id));
		err = clGetPlatformIDs(platforms_number, platforms, NULL);
		platform = platforms[0];
		delete platforms;

	}

	// Connect to a compute device
	//
	int gpu = 0;
	err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
	if (err != CL_SUCCESS)
	{
		printf("Error: Failed to create a device group!
 %i", err);
		status = MStatus::kFailure;
	}

	// Create a compute context
	//
	context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
	if (!context)
	{
		printf("Error: Failed to create a compute context!
");
		status = MStatus::kFailure;
	}

	// Create a command commands
	//
	commands = clCreateCommandQueue(context, device_id, 0, &err);
	if (!commands)
	{
		printf("Error: Failed to create a command commands!
");
		status = MStatus::kFailure;
	}

	// Create the compute program from the source buffer
	//
	std::string sourceCode = loadClSource("F:/Dev/clTest/clTest/vector_add.cl");
	const char* c_sourceCode = sourceCode.c_str();

	program = clCreateProgramWithSource(context, 1, (const char**)&c_sourceCode, NULL, &err);
	if (!program)
	{
		printf("Error: Failed to create compute program!
");
		status = MStatus::kFailure;
	}

	// Build the program executable
	//
	err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
	if (err != CL_SUCCESS)
	{
		::size_t len;
		char buffer[2048];

		printf("Error: Failed to build program executable!
");
		clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
		printf("%s
", buffer);
		status = MStatus::kFailure;
	}

	// Create the compute kernel in the program we wish to run
	//
	kernel = clCreateKernel(program, "vector_add", &err);
	if (!kernel || err != CL_SUCCESS)
	{
		printf("Error: Failed to create compute kernel!
");
		status = MStatus::kFailure;
	}

	return status;

}

MStatus testCl::freeCL()
{
	clReleaseProgram(program);
	clReleaseKernel(kernel);
	clReleaseCommandQueue(commands);
	clReleaseContext(context);
	return MStatus::kSuccess;
}

void* testCl::creator()
{
	return new testCl;
}


std::string testCl::loadClSource(std::string filename){
	std::ifstream sourceFile(filename.data());
	std::string sourceCode( 
		std::istreambuf_iterator<char>(sourceFile),
		(std::istreambuf_iterator<char>()));
	return sourceCode;
}


cl_float4* testCl::mclPushAlongNormals(cl_command_queue commands, cl_context context ,cl_kernel kernel, ::size_t local, cl_float4* clf3points, cl_float4* clf3normals, cl_float clfOffset, long count, MStatus &status)
{


	// Get the maximum work group size for executing the kernel on the device
	//
	err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
	if (err != CL_SUCCESS)
	{
		printf("Error: Failed to retrieve kernel work group info! %d
", err);
		status = MStatus::kFailure;
	}
	printf("Workgroup size is %u", local);

	cl_mem bufA;						// device memory used for the input array
	cl_mem bufB;     
	cl_mem bufC;						// device memory used for the output array

	int err;
	::size_t global;

	::size_t problemPack = local*((count/local)+1);
	::size_t problemSize = problemPack*sizeof(cl_float4);

	cl_float4 *clf3result = new cl_float4[problemSize];

	// Create the input and output arrays in device memory for our calculation
	//

        //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
        //!!!  I T  C R A S H E S   H E R E !!!!
       //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	bufA = clCreateBuffer(context,  CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,  problemSize, clf3points, &err);
	bufB = clCreateBuffer(context,  CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,  problemSize, clf3normals, &err);
	bufC = clCreateBuffer(context, CL_MEM_WRITE_ONLY,  problemSize, NULL, &err);
	if (err != CL_SUCCESS)
	{
		printf("Error: Failed to allocate device memory!
");
		status = MStatus::kFailure;
	}
	if (!bufA || !bufB || !bufC)
	{
		printf("Error: Failed to allocate device memory!
");
		status = MStatus::kFailure;
	}   
	//// Write our data set into the input array in device memory
	////
	//err = clEnqueueWriteBuffer(commands, bufA, CL_FALSE, 0, problemSize, clf3points, 0, NULL, NULL);
	//err = clEnqueueWriteBuffer(commands, bufB, CL_FALSE, 0, problemSize, clf3normals, 0, NULL, NULL);
	//if (err != CL_SUCCESS)
	//{
	//	printf("Error: Failed to write to source array!
");
	//	status = MStatus::kFailure;
	//}

	// Set the arguments to our compute kernel
	//
	err = 0;
	err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufA);
	err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufB);
	err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufC);
	err |= clSetKernelArg(kernel, 3, sizeof(cl_float), &clfOffset);
	if (err != CL_SUCCESS)
	{
		printf("Error: Failed to set kernel arguments! %d
", err);
		status = MStatus::kFailure;
	}

	//// Execute the kernel over the entire range of our 1d input data set
	//// using the maximum number of work group items for this device
	////

	global = problemPack;
	err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
	if (err)
	{
		printf("Error: Failed to execute kernel! %d
", err);
		status = MStatus::kFailure;
	}

	// Wait for the command commands to get serviced before reading back results
	//
	clFinish(commands);

	// Read back the results from the device to verify the output
	//
	err = clEnqueueReadBuffer( commands, bufC, CL_TRUE, 0, problemSize, clf3result, 0, NULL, NULL ); 
	if (err != CL_SUCCESS)
	{
		printf("Error: Failed to read output array! %d
", err);
		status = MStatus::kFailure;
	}

	err = clReleaseMemObject(bufA);
	err = clReleaseMemObject(bufB);
	err = clReleaseMemObject(bufC);
	if (err != CL_SUCCESS)
	{
		printf("Error: Failed to release buffers! %d
", err);
		status = MStatus::kFailure;
	}
	
	return clf3result;

}

void testCl::mPushAlongNormals(MDagPath& dagPath, float offset, MStatus& status)
{
	MFnMesh mesh(dagPath);
	MItMeshVertex itInputMesh(dagPath);
	itInputMesh.geomChanged();
	MPointArray points;
	for (; !itInputMesh.isDone(); itInputMesh.next()) {
		MPoint point;
		point = itInputMesh.position(MSpace::kObject, &status);
		if (MStatus::kSuccess == status)
		{
			MVector normal;
			MPoint finPoint;
			mesh.getVertexNormal(itInputMesh.index(),normal);
			normal *= offset;
			finPoint = point + normal;
			points.append(finPoint);
		}
	}
	mesh.setPoints(points);
	status = itInputMesh.updateSurface();
}

MStatus testCl::doIt( const MArgList& argList )
{   
	clock_t tstart;
	clock_t tend;
	double tdiff;
	bool use_cl = 0;
	bool use_maya = 1;
	MString timer_message("Operation running time with OpenCl: ");

	MStatus status = MStatus::kSuccess;
	float offset = 1.0;

	for (unsigned int i = 0; i < argList.length(); i++ )
	{
		if ( argList.asString(i) == MString("-opencl") || 
			argList.asString(i) == MString("-c") )
		{
			bool tmp = argList.asBool(++i, &status);
			if(status)
				use_cl = tmp;
		}
		else if ( argList.asString(i) == MString("-maya") || 
			argList.asString(i) == MString("-m") )
		{
			bool tmp = argList.asBool(++i, &status);
			if(status)
				use_maya = tmp;
		}
		else if ( argList.asString(i) == MString("-offset") || 
			argList.asString(i) == MString("-o") )
		{
			float tmp = argList.asDouble(++i, &status);
			if (status)
				offset = tmp;
		}
		else
			break;
	}

	MSelectionList selList;
	MGlobal::getActiveSelectionList( selList );

	MItSelectionList itSelList( selList );
	itSelList.setFilter( MFn::kMesh );
	//Start time
	
	for (; !itSelList.isDone(); itSelList.next()) {
		MDagPath dagPath; 
		status = itSelList.getDagPath(dagPath);
		MItMeshVertex itInputMesh(dagPath);
		//OPENCL
		//////////////////////////////////////////////////////////////////////////
		tstart = clock();
		if (use_cl){
			itInputMesh.reset();
			
			const int count = itInputMesh.count();

			cl_float4 *clf3points = new cl_float4[count];
			cl_float4 *clf3normals = new cl_float4[count];
			cl_float4 *clf3result = new cl_float4[count];
			cl_float clfOffset = offset;
			
			MFnMesh cmesh(dagPath);
			MPointArray fpPoints; 
			cmesh.getPoints(fpPoints);

			MVector normal;
			int prevIdx = itInputMesh.index();

			for (unsigned int idx=0; idx < fpPoints.length(); idx++) {

				clf3points[idx].s[0] = fpPoints[idx].x;
				clf3points[idx].s[1] = fpPoints[idx].y;
				clf3points[idx].s[2] = fpPoints[idx].z;

				itInputMesh.setIndex(idx, prevIdx);
				itInputMesh.getNormal(normal);

				clf3normals[idx].s[0] = normal.x;
				clf3normals[idx].s[1] = normal.y;
				clf3normals[idx].s[2] = normal.z;
				prevIdx = idx;

			}
			

			clf3result = mclPushAlongNormals(commands, context, kernel, local, clf3points, clf3normals, clfOffset, count, status);

			if (status == MStatus::kSuccess)
			{
				
				MPointArray cpoints;
				for (unsigned int idx = 0; idx < count; idx++) 
					cpoints.append(clf3result[idx].s[0], clf3result[idx].s[1],clf3result[idx].s[2]);
				cmesh.setPoints(cpoints);
				status = itInputMesh.updateSurface();
			}
			itInputMesh.geomChanged();

			//Make some clean
			//////////////////////////////////////////////////////////////////////////

			delete[] clf3points;
			delete[] clf3normals;
			delete[] clf3result;
		}
		//Pure Maya
		////////////////////////////
		if (use_maya){
			mPushAlongNormals(dagPath, offset, status);
			MCheckStatus(status, "Maya push failed!");
		}
	}
	tend = clock();
	tdiff = tend - tstart;
	timer_message = "Operation running time : ";
	timer_message += tdiff;
	//How long it takes
	MGlobal::displayInfo(timer_message);

	return status;
}


MStatus initializePlugin( MObject obj )
{
	MStatus   status;
	MFnPlugin plugin( obj, "DK", "1.0", "Any");

	status = plugin.registerCommand( "testCl", testCl::creator );
	if (!status) {
		status.perror("registerCommand");
		return status;
	}
	status = testCl::initCL();

	return status;
}

MStatus uninitializePlugin( MObject obj )
{
	MStatus   status;
	MFnPlugin plugin( obj );
	testCl::freeCL();
	status = plugin.deregisterCommand( "testCl" );
	if (!status) {
		status.perror("deregisterCommand");
		return status;
	}

	return status;
}

a little explanation:
initializePlugin method calls when plugin loads in maya, and this method calls initCL method that doing openCL initialization.

doIt method calls everytime when command that plugin implements, executes in maya. it gets vertex data from maya, and pass it to this method that implements opencl execution and return result back to doIt method that updates mesh with new vertex data.

cl_float4* testCl::mclPushAlongNormals(cl_command_queue commands, cl_context context ,cl_kernel kernel, ::size_t local, cl_float4* clf3points, cl_float4* clf3normals, cl_float clfOffset, long count, MStatus &status)

hope, it’s more clearly now.
so problem is in mclPushAlongNormals, when i executing command in maya more than once, on this call

 bufA = clCreateBuffer(context,  CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,  problemSize, clf3points, &err);

or maybe someone can provide me simple example how i must implement calls, when i need to execute kernel multiple times with different data. it will be simplier. thanks! :slight_smile:

Hi,
I was just wondering, you say it crashes after the first time, do you mean it always runs first
time but can crash anytime after that ?

Also, can you explain what is happening here

::size_t problemPack = local*((count/local)+1);
::size_t problemSize = problemPack*sizeof(cl_float4);
cl_float4 *clf3result = new cl_float4[problemSize];

The bit I don’t get is why you are allocating clf3result to be a multiple of “sizeof(cl_float4)”
when it is already a cl_float4?

You then do
bufA = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, problemSize, clf3points, &err);

but clf3points comes in as size “count” but “problemSize” is much larger (count*sizeof(float4)) . . .
Did you really mean to do that ?


jason

Hello Jason!

Yes, it always runs first time.

Ooh! Thank you! I have changed it from malloc to new, and automatically don’t noticed it. It much clearer now. :slight_smile: