Hey I am fairly new to OpenCL and I am currently making a program using C++ bindings and “OpenCLUtilities/openCLUtilities.hpp”. I know that data stays on device unless enqueueReadBuffer method is called but how do I make a pointer to that data? I’d like to perform some computation in one kernel (grad) and use output (ang, mag) in second kernel without copying buffers back to host and again to device. If you could tell me where and what I am doing wrong in my code or provide some relevant example I will be very grateful.
Context context = createCLContextFromArguments(argc, argv);
Program program = buildProgramFromSource(context, "/Users/Mateusz/Desktop/grad.cl");
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
CommandQueue queue = CommandQueue(context, devices[1], CL_QUEUE_PROFILING_ENABLE);
cl::Device dev0 = devices[1];
std::string name;
dev0.getInfo(CL_DEVICE_NAME, &name);
std::cout << "Used device: " << name << std::endl;
Image2D clImage1 = Image2D(context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
ImageFormat(CL_RGBA, CL_UNSIGNED_INT8),
image1.columns(), image1.rows(), 0, image1_pixels);
// Create a buffer for the result
Buffer clResultAng = Buffer(context, CL_MEM_READ_WRITE, sizeof(float)*image1.rows()*image1.columns());
Buffer clResultMag = Buffer(context, CL_MEM_READ_WRITE, sizeof(double)*image1.rows()*image1.columns());
Buffer clResultOut = Buffer(context, CL_MEM_READ_WRITE, sizeof(float)*image1.rows()*image1.columns());
Kernel gradients = Kernel(program, "grad");
gradients.setArg(0, clImage1);
gradients.setArg(1, clResultAng);
gradients.setArg(2, clResultMag);
Event kernel_event, read_event;
queue.enqueueNDRangeKernel(gradients, NullRange,
NDRange(image1.columns(), image1.rows() ),
NullRange, NULL, &kernel_event);
// Program program1 = buildProgramFromSource(context, "/Users/Mateusz/Desktop/histograms.cl");
Kernel histograms = Kernel(program, "histograms");
histograms.setArg(0, clResultAng);
histograms.setArg(1, clResultMag);
histograms.setArg(2, clResultOut);
//Synchronize?
kernel_event.wait();
// Transfer image back to host
//queue.enqueueReadBuffer(clResultMag, CL_TRUE, 0, sizeof(double)*image1.columns()*image1.rows(), mag);
queue.enqueueReadBuffer(clResultOut, CL_TRUE, 0, sizeof(float)*image1.columns()*image1.rows(), test);
queue.enqueueReadBuffer(clResultAng, CL_TRUE, 0, sizeof(float)*image1.columns()*image1.rows(), ang);
// DEBUGGING OUTPUT
// std::cout<<std::endl;
for (int i=0; i<image1_size/4; i++) {
if(ang[i]>0)
std::cout << "Test: " << test[i] << " vs. Angle: " << ang[i] << std::endl;
}
//#pragma OPENCL EXTENSION cl_khr_fp64 : enable
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
__kernel void grad(
__read_only image2d_t input_image,
__global float * ang,
__global double * mag
) {
const int2 pos = {get_global_id(0), get_global_id(1)};
__private float4 dx = 0.0f;
__private float4 dy = 0.0f;
// calculate gradients in dx direction using [1, 0, -1] kernel
for(int a = -1; a < 2; a++) {
dx += (-a)*read_imagef(input_image, sampler, (int2)((pos.x+a), pos.y) );
}
// calculate gradients in dy direction using transposed [1, 0, -1] kernel
for(int b = -1; b < 2; b++) {
dy += (-b)*read_imagef(input_image, sampler, (int2)(pos.x, (pos.y+b)) );
}
/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Dalal and Triggs suggested:
"For colour images, we calculate separate gradients for
each colour channel, and take the one with the largest norm
as the pixel’s gradient vector."
*/// !!!!!!!!!!!!!!!!!!!!!!!!!!!!
float R_x=0.0f, G_x=0.0f, B_x=0.0f, x=0;
float R_y=0.0f, G_y=0.0f, B_y=0.0f, y=0;
R_x += dx.x; G_x += dx.y; B_x += dx.z;
R_y += dy.x; G_y += dy.y; B_y += dy.z;
double xx = 0, yy = 0;
x = (R_x>G_x && R_x>B_x) ? dx.x : ((G_x>B_x) ? dx.y : dx.z); // is only one channel used or are they mixed??
y = (R_y>G_y && R_y>B_y) ? dy.x : ((G_y>B_y) ? dy.y : dy.z);
xx = (double)x*x;
yy = (double)y*y;
ang[pos.x+pos.y*get_global_size(0)] = (float)atan2pi(y,x)*360;
mag[pos.x+pos.y*get_global_size(0)] = (double)sqrt(xx+yy);
}
__kernel void histograms(
__global float * ang,
__global double * mag,
__global float * out
) {
const int2 pos = {get_global_id(0), get_global_id(1)};
out[pos.x+pos.y*get_global_size(0)] = ang[pos.x+pos.y*get_global_size(0)];
}