Hello guys, I’m new to OpenCL and trying to write a program that will simply copy an image2d buffer to another one, the problem is that the values on the returned image are totally different from the input one.
This is the kernel:
__kernel void image_test(__read_only image2d_t image, __write_only image2d_t output)
{
const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int x = get_global_id(0);
int y = get_global_id(1);
int2 coord = (int2) (x, y);
float4 pixel = read_imagef(image, smp, coord);
write_imagef(output, coord, pixel);
}
And this is the complete opencl program, I’m using OpenCL c++ wrapper by the way:
#define __CL_ENABLE_EXCEPTIONS
#include <highgui.h>
#include "cl.hpp"
#include <stdio.h>
#include <time.h>
#include <iostream>
#include <vector>
#include <sys/stat.h>
const char* loadKernel(const char* filename);
int main()
{
int width = 4;
int height = 2;
int image_size = width * height * 4;
float* input = (float*) calloc(image_size, sizeof(float));
float* output = (float*) calloc(image_size, sizeof(float));
srand (time(NULL));
for (int a = 0; a < image_size; a++)
input[a] = (float) (rand() % 100);
for (int a = 0; a < image_size; a++)
output[a] = 255.0;
// Initialize OpenCL
cl::Context* context;
std::vector<cl::Device> devices;
cl::CommandQueue queue;
cl::Kernel* kernel;
// The origin is the same as the indices for the first value
// The z component must be 0 for 2D images
cl::size_t<3> origin;
origin.push_back(0);
origin.push_back(0);
origin.push_back(0);
// The region is the same as the dimensions of the image
// The z component must be 1 for 2D images
cl::size_t<3> region;
region.push_back(width);
region.push_back(height);
region.push_back(1);
// The row pitch is the number of bytes in each row
size_t row_pitch = width * 4 * sizeof(float);
try
{
// Get available platforms
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
if (platforms.size() == 0)
printf("Err: platform size 0!
");
// Select the default platform and create
// a context using this platform and the GPU
cl_context_properties cps[] = {
CL_CONTEXT_PLATFORM,
(cl_context_properties) (platforms[0]) (),
0
};
context = new cl::Context(CL_DEVICE_TYPE_GPU, cps);
// Get a list of devices on this platform
devices = context->getInfo<CL_CONTEXT_DEVICES>();
// Create a command queue and use the first device
queue = cl::CommandQueue(*context, devices[0]);
}
catch (cl::Error error)
{
printf("%s (%d)
", error.what(), error.err());
}
// Load and build Program and make kernel
try
{
// Read kernel file
const char* filename = "kernel.cl";
std::string source_code(loadKernel(filename));
cl::Program::Sources source(1, std::make_pair(source_code.data(), source_code.length() + 1));
// Make program of the source code in the context
cl::Program program(*context, source);
// Build program for these specific devices;
program.build(devices);
// Make kernel
kernel = new cl::Kernel(program, "image_test");
}
catch (cl::Error error)
{
printf("%s (%d)
", error.what(), error.err());
}
// Memory buffers
cl::Image2D* image_a;
cl::Image2D* image_b;
try
{
// Create memory buffers
image_a = new cl::Image2D(*context,
CL_MEM_READ_ONLY,
cl::ImageFormat(CL_RGBA, CL_FLOAT),
width, height, 0);
image_b = new cl::Image2D(*context,
CL_MEM_WRITE_ONLY,
cl::ImageFormat(CL_RGBA, CL_FLOAT),
width, height, 0);
// Copy lists a and b to the memory buffers
queue.enqueueWriteImage(*image_a,
CL_TRUE,
origin, region,
row_pitch, 0,
(void*) input);
}
catch (cl::Error error)
{
printf("%s (%d)
", error.what(), error.err());
}
// Run kernel
try
{
// Set arguments to kernel
kernel->setArg(0, *image_a);
kernel->setArg(1, *image_b);
// Execute the program
queue.enqueueNDRangeKernel(*kernel, cl::NullRange, cl::NDRange(width, height), cl::NullRange);
queue.finish();
}
catch (cl::Error error)
{
printf("%s (%d)
", error.what(), error.err());
}
// Get answer buffer back
try
{
queue.enqueueReadImage(*image_b,
CL_TRUE,
origin, region,
row_pitch, 0,
(void*) output);
}
catch (cl::Error error)
{
printf("%s (%d)
", error.what(), error.err());
}
for (int a = 0; a < image_size; a++)
{
if (a % (width * 4) == 0)
printf("
");
printf("%d ", (int) input[a]);
}
printf("
");
for (int a = 0; a < image_size; a++)
{
if (a % (width * 4) == 0)
printf("
");
printf("%d ", (int) output[a]);
}
printf("
");
return 0;
}
const char* loadKernel(const char* filename)
{
FILE* file_handler;
struct stat file_stat;
char* source;
file_handler = fopen(filename, "r");
if (!file_handler) return 0;
stat(filename, &file_stat);
source = (char*) calloc(file_stat.st_size + 1, sizeof(char));
fread(source, file_stat.st_size, 1, file_handler);
source[file_stat.st_size] = '\0';
return source;
}
The program will simply create 2 arrays of floats with width 4 and height 2, and to every pixel in width there an RGBA information, so in the end the array size is 32, then the program fill the input with random values, then I initialize the OpenCL, load the kernel and execute it, in the end i read back the returned array and show the 2 arrays to compare it, but the values as changed, this is an example of an output of the program:
~/opencl_test/src $ ./opencl
4 66 93 64 45 35 59 1 66 77 20 85 32 91 95 45
97 41 9 22 0 28 29 21 87 92 19 82 59 3 19 64
45 35 59 1 32 91 95 45 0 0 2 0 0 0 -2147483648 0
0 28 29 21 59 3 19 64 -2147483648 0 0 0 0 -576 0 0
As you guys can see the second array is different from the first one
What’s going on?
ps: I really love to see some examples of usage of image2d_t, I can’t find much information about it on the internet
Thanks a lot!