HI, I’m getting a CL_OUT_OF_RESOURCES. I’ll post two implementations. They are pratically identical.
The first one runs ok. I’m calling a function on a loop that runs my kernel with different arguments.
float Parallel::step(){
size_t i=0,max=0;
float diff=0;
//PAY ATTENTION HERE AND TO IMPLEMANTION OF stepGRoup()
for(i=0;i<groups.size();++i){
stepGroup(i);
}
try{
queue.enqueueBarrier();
//Read x buffer back
queue.enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(float)*rows, x);
} catch ( cl::Error& err ) {
std::cerr << "Caught exception: " << err.what() << '(' << err.err() << ')' << std::endl;
}
}
void Parallel::stepGroup(size_t grp_idx){
size_t group_rows=groups[grp_idx].size();
size_t rows_offset=0;
for(size_t i=0;i<grp_idx;++i)
rows_offset+=groups[i].size();
try{
size_t local_size=512;
size_t global_size;
if(group_rows%local_size)
global_size=(group_rows/local_size+1)*local_size;
else
global_size=group_rows/local_size;
//Kernel setargs
kernel.setArg(0,d_values);
kernel.setArg(1,d_col_idx);
kernel.setArg(2,d_row_index);
kernel.setArg(3,d_b);
kernel.setArg(4,d_x);
kernel.setArg(5,rows_offset);
kernel.setArg(6,group_rows);
kernel.setArg(7,max_nz);
kernel.setArg(8,rows);
//Enque the kernel now
//std::cerr<<"global_size: "<<global_size<<" local_size: "<<local_size<<std::endl;
queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(global_size),cl::NDRange(local_size));
} catch ( cl::Error& err ) {
std::cerr << "Caught exception: " << err.what() << '(' << err.err() << ')' << std::endl;
}
}
The second one, instead of looping on each group and calling stepGroup(), I’m calling a function called stepAllGroups() that has the loop for the groups in it:
float Parallel::step(){
size_t i=0,max=0;
float diff=0;
//PAY ATTENTION HERE AND TO IMPLEMANTION OF stepAllGroups()
stepAllGroups();
try{
queue.enqueueBarrier();
//Read x buffer back
queue.enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(float)*rows, x);
} catch ( cl::Error& err ) {
std::cerr << "Caught exception: " << err.what() << '(' << err.err() << ')' << std::endl;
}
}
void Parallel::stepAllGroups(){
size_t group_rows;
size_t rows_offset=0;
size_t local_size=512;
size_t global_size;
queue.enqueueBarrier();
for(size_t i=0;i<groups.size();++i){
group_rows=groups[i].size();
rows_offset+=group_rows;
std::cerr<<"Calling kernel on group "<<i<<" with group_rows "<<group_rows<<std::endl;
try{
if(groups[i].size()%local_size)
global_size=(group_rows/local_size+1)*local_size;
else
global_size=group_rows/local_size;
kernel.setArg(0,d_values);
kernel.setArg(1,d_col_idx);
kernel.setArg(2,d_row_index);
kernel.setArg(3,d_b);
kernel.setArg(4,d_x);
kernel.setArg(5,rows_offset);
kernel.setArg(6,group_rows);
kernel.setArg(7,max_nz);
kernel.setArg(8,rows);
queue.enqueueBarrier();
queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(global_size),cl::NDRange(local_size));
} catch ( cl::Error& err ) {
std::cerr << "Caught exception: " << err.what() << '(' << err.err() << ')' << std::endl;
}
}
queue.enqueueBarrier();
}
In the second implemention, I’m getting CL_OUT_OF_RESOURCES when calling ::queueReadBuffer();
Btw, I’m using enqueueBarrier() in an effort to solve this with no success.
I dont understand where the difference is since it’s seems to be a matter of how I chose to structure my code and nothing to do with OpenCL, but something is definetly happening.
Any pointers will be appreciated.
Best regards
Mat