I’m taking the oclParticles example program and changing it to an SPH simulation. However, when the computePressure kernel is called I get the error:
Unhandled exception at 0x049ed72b in oclParticles.exe: 0xC0000005: Access violation reading location 0x00008068
With some print statements I can see that my computePressure kernel is called but doesn’t finish. I should also mention that when I comment out all of the code inside the kernel (ie. so it doesn’t do anything) I still get the same error. This leads me to believe the problem isn’t in the kernel code but perhaps in the way I’ve called the kernel. Anyway I’ve spent a few days looking for my problem with no success, so I thought I’d post here to see if someone can see what my problem is. Thanks for looking.
Here is the array allocation:
//Allocate GPU data
shrLog("Allocating GPU Data buffers...
");
allocateArray(&m_dPos, m_numParticles * 4 * sizeof(float));
allocateArray(&m_dVel, m_numParticles * 4 * sizeof(float));
allocateArray(&m_dDen, m_numParticles * sizeof(float) );
allocateArray(&m_dPre, m_numParticles * sizeof(float) );
allocateArray(&m_dReorderedPos, m_numParticles * 4 * sizeof(float));
allocateArray(&m_dReorderedVel, m_numParticles * 4 * sizeof(float));
allocateArray(&m_dReorderedDen, m_numParticles * sizeof(float) );
allocateArray(&m_dReorderedPre, m_numParticles * sizeof(float) );
allocateArray(&m_dHash, m_numParticles * sizeof(uint));
allocateArray(&m_dIndex, m_numParticles * sizeof(uint));
allocateArray(&m_dCellStart, m_numGridCells * sizeof(uint));
allocateArray(&m_dCellEnd, m_numGridCells * sizeof(uint));
shrLog("Allocation of GPU Data buffers was successful...
");
Here is where the kernel is called from particleSystem_class.cpp:
std::cout << "Here we launch the kernel computePressure..." << std::endl;
computePressure(
m_dDen,
m_dPre,
m_dReorderedPos,
m_dIndex,
m_dCellStart,
m_dCellEnd,
m_numParticles,
m_numGridCells
);
std::cout << "...but we never get here." << std::endl;
Declaration in particleSystem_engine.h:
extern "C" void computePressure(
memHandle_t d_Den,
memHandle_t d_Pre,
memHandle_t d_ReorderedPos,
memHandle_t d_Index,
memHandle_t d_CellStart,
memHandle_t d_CellEnd,
uint numParticles,
uint numCells
);
Defn in oclParticles_launcher.cpp:
extern "C" void computePressure(
memHandle_t d_Den,
memHandle_t d_Pre,
memHandle_t d_ReorderedPos,
memHandle_t d_Index,
memHandle_t d_CellStart,
memHandle_t d_CellEnd,
uint numParticles,
uint numCells
){
cl_int ciErrNum;
size_t globalWorkSize = uSnap(numParticles, wgSize);
ciErrNum = clSetKernelArg(ckComputePressure, 0, sizeof(cl_mem), (void *)&d_Den);
ciErrNum |= clSetKernelArg(ckComputePressure, 1, sizeof(cl_mem), (void *)&d_Pre);
ciErrNum |= clSetKernelArg(ckComputePressure, 2, sizeof(cl_mem), (void *)&d_ReorderedPos);
ciErrNum |= clSetKernelArg(ckComputePressure, 3, sizeof(cl_mem), (void *)&d_Index);
ciErrNum |= clSetKernelArg(ckComputePressure, 4, sizeof(cl_mem), (void *)&d_CellStart);
ciErrNum |= clSetKernelArg(ckComputePressure, 5, sizeof(cl_mem), (void *)&d_CellEnd);
ciErrNum |= clSetKernelArg(ckComputePressure, 6, sizeof(cl_mem), (void *)¶ms);
ciErrNum |= clSetKernelArg(ckComputePressure, 7, sizeof(uint), (void *)&numParticles);
oclCheckError(ciErrNum, CL_SUCCESS);
ciErrNum = clEnqueueNDRangeKernel(cqDefaultCommandQue, ckComputePressure, 1, NULL, &globalWorkSize, &wgSize, 0, NULL, NULL);
oclCheckError(ciErrNum, CL_SUCCESS);
}
Here is the OpenCL code for the kernel computePressure:
__kernel void computePressure(
__global float *d_Den, //output: new density
__global float *d_Pre, //output: new pressure
__global const float4 *d_ReorderedPos, //input: reordered positions
__global const uint *d_Index, //input: reordered particle indices
__global const uint *d_CellStart, //input: beginning of cell boundary
__global const uint *d_CellEnd, //input: end of cell boundary
__constant simParams_t *params,
uint numParticles
){
uint index = get_global_id(0);
if(index >= numParticles)
return;
float4 pos = d_ReorderedPos[index];
float sum = 0.0, distanceSqrd = 0.0, dx, dy, dz;
//Get address in grid
int4 gridPos = getGridPos(pos, params);
//Accumulate surrounding cells
for(int z = -1; z <= 1; z++)
for(int y = -1; y <= 1; y++)
for(int x = -1; x <= 1; x++)
{
//Get start particle index for this cell
uint hash = getGridHash(gridPos + (int4)(x, y, z, 0), params);
uint startI = d_CellStart[hash];
//Skip empty cell
if(startI == 0xFFFFFFFFU)
continue;
//Iterate over particles in this cell
uint endI = d_CellEnd[hash];
for(uint j = startI; j < endI; j++)
{
if(j == index)
continue;
float4 pos2 = d_ReorderedPos[j];
dx = pos.x - pos2.x;
dy = pos.y - pos2.y;
dz = pos.z - pos2.z;
//if a particle is within the smoothing length of the query particle include it in the summation
distanceSqrd = ( dx * dx ) + ( dy * dy ) + ( dz * dz );
if( distanceSqrd <= params->smoothingRadiusSqrd )
{
distanceSqrd = params->smoothingRadiusSqrd - distanceSqrd;
sum += distanceSqrd * distanceSqrd * distanceSqrd;
}
}
}
//Now we update the density and pressure to the original unsorted location
float density = ( sum * params->particleMass * params->poly6Kern );
d_Pre[d_Index[index]] = ( density - params->restDensity ) * params->internalStiffness;
d_Den[d_Index[index]] = 1.0 / density;
}