hello everyone,
im trying to develop simple program (using clCreateImage2D and read/write_image) - read bmp from file, compute it in kernel (copy in this case) and then save to another bmp file. unfortunatelly when i run program, output file is empty, no errors are given by program. i have no idea what is causing the problem.
any ideas what is wrong with my code ?
vec.cpp
#include <oclUtils.h>
const char* cSourceFile = “kernel.cl”;
// OpenCL Vars
cl_context cxGPUContext; // OpenCL context
cl_command_queue cqCommandQue; // OpenCL command que
cl_device_id* cdDevices; // OpenCL device list
cl_program cpProgram; // OpenCL program
cl_kernel ckKernel; // OpenCL kernel
cl_mem cmDevSrcA; // OpenCL device source buffer A
cl_mem cmDevSrcB; // OpenCL device source buffer B
cl_mem cmDevDst; // OpenCL device destination buffer
size_t szGlobalWorkSize = {512, 512};
size_t szLocalWorkSize = {16, 16};
size_t szParmDataBytes; // Byte size of context information
size_t szKernelLength; // Byte size of kernel code
cl_int ciErr1, ciErr2; // Error code var
char* cPathAndName = NULL; // var for full paths to data, src, etc.
char* cSourceCL = NULL; // Buffer to hold source for compilation
shrBOOL bNoPrompt = shrFALSE;
// Main function
// ************************************************************
int main(int argc, char argv)
{
// get command line arg for quick test, if provided
bNoPrompt = shrCheckCmdLineFlag(argc, (const char)argv, “noprompt”);
// start logs
shrSetLogFileName (“vec.txt”);
void *image = fopen(“in.bmp”, “rb”);
void *image2 = fopen(“wynik.bmp”, “wb”);
image = (void )malloc(8 * (5125123+54));
image2 = (void )malloc(8 * (5125123+54));
// Create the OpenCL context on a GPU device
cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateContextFromType…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateContextFromType, Line %u in file %s !!!
", LINE, FILE);}
// Get the list of GPU devices associated with context
ciErr1 = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
cdDevices = (cl_device_id*)malloc(szParmDataBytes);
ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
shrLog(LOGBOTH, 0.0, "clGetContextInfo…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clGetContextInfo, Line %u in file %s !!!
", LINE, FILE);}
// Create a command-queue
cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateCommandQueue…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateCommandQueue, Line %u in file %s !!!
", LINE, FILE);}
// Allocate the OpenCL buffer memory objects for source and result on the device GMEM
size_t width = 512;
size_t height = 512;
size_t rowpitch = 0;
cl_image_format format;
format.image_channel_order = CL_RGBA;
format.image_channel_data_type = CL_UNSIGNED_INT8;
cl_mem_flags flags;
flags = CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR;
cl_mem myClImage = clCreateImage2D(
cxGPUContext,
flags,
&format,
width,
height,
rowpitch,
image,
&ciErr1
);
cl_mem_flags flags2;
flags2 = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR;
cl_mem myClImage2 = clCreateImage2D(
cxGPUContext, // a valid OpenCL context
flags2, // option flags [1]
&format, // image format properties [2]
width, // width of the image in pixels
height, // height of the image in pixels
rowpitch, // scan-line pitch in bytes [3]
image2, // pointer to the image data
&ciErr2 // on return, the result code
);
ciErr1 |= ciErr2;
shrLog(LOGBOTH, 0.0, "clCreateBuffer…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateBuffer, Line %u in file %s !!!
", LINE, FILE);}
// Read the OpenCL kernel in from source file
shrLog(LOGBOTH, 0.0, "oclLoadProgSource (%s)…
", cSourceFile);
cPathAndName = shrFindFilePath(cSourceFile, argv[0]);
cSourceCL = oclLoadProgSource(cPathAndName, “”, &szKernelLength);
// Create the program
cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cSourceCL, &szKernelLength, &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateProgramWithSource…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateProgramWithSource, Line %u in file %s !!!
", LINE, FILE);}
// Build the program
ciErr1 = clBuildProgram(cpProgram, 0, NULL, NULL, NULL, NULL);
shrLog(LOGBOTH, 0.0, "clBuildProgram…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clBuildProgram, Line %u in file %s !!!
", LINE, FILE);}
// Create the kernel
ckKernel = clCreateKernel(cpProgram, “copy”, &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateKernel (copy)…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateKernel, Line %u in file %s !!!
", LINE, FILE);}
// Set the Argument values
ciErr1 = clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&myClImage);
ciErr1 |= clSetKernelArg(ckKernel, 1, sizeof(cl_mem), (void*)&myClImage2);
shrLog(LOGBOTH, 0.0, "clSetKernelArg 0 - 3…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clSetKernelArg, Line %u in file %s !!!
", LINE, FILE);}
// --------------------------------------------------------
// Start Core sequence… copy input data to GPU, compute, copy results back
const size_t origin = {0, 0, 0};
const size_t region = {1, 1, 1};
ciErr1 = clEnqueueWriteImage (
cqCommandQue,
myClImage, // cl_mem image,
CL_TRUE, // cl_bool blocking_read,
origin,// const size_t origin[3],
region, // const size_t region[3],
0, // size_t row_pitch,
0, // size_t slice_pitch,
image, // void *ptr,
0, // cl_uint num_events_in_wait_list,
NULL, // const cl_event *event_wait_list,
NULL // cl_event *event)
);
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clEnqueueReadImage, Line %u in file %s !!!
", LINE, FILE);}
// Launch kernel
ciErr1 = clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 2, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL);
shrLog(LOGBOTH, 0.0, "clEnqueueNDRangeKernel (copy)…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "leRROR Error in clEnqueueNDRangeKernel, Line %u in file %s !!!
", LINE, FILE);}
//collect results
ciErr1 = clEnqueueReadImage (
cqCommandQue,
myClImage2, // cl_mem image,
CL_TRUE, // cl_bool blocking_read,
origin, // const size_t origin[3],
region, // const size_t region[3],
0, // size_t row_pitch,
0, // size_t slice_pitch,
image2, // void *ptr,
0, // cl_uint num_events_in_wait_list,
NULL, // const cl_event *event_wait_list,
NULL // cl_event *event)
);
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clEnqueueReadImage, Line %u in file %s !!!
", LINE, FILE);}
FILE nk = fopen(“wynik.bmp”, “wb”);
fwrite(image2, 1, sizeof(8(5125123+54)), nk);
shrLog(LOGBOTH, 0.0, "END
");
}
kernel.cl
__kernel void copy(__read_only image2d_t imageIn,__write_only image2d_t imageOut)
{
const sampler_t sampler=CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_CLAMP|CLK_FI
LTER_NEAREST;
int gid0 = get_global_id(0);
int gid1 = get_global_id(1);
uint4 pixel;
pixel=read_imageui(imageIn,sampler,(int2)(gid0,gid1));
write_imageui (imageOut,(int2)(gid0,gid1),pixel);
}
as input file i use standart lena file (renamed to in.bmp) from http://www.bilsen.com/aic/tests/lena/lena.bmp