stripes over output image

i’ve decided to start new beginners thread because old one was in located inaccurate in ‘technical discussions’.

im trying to remove R and G color channel from 8bit/channel BMP file using image buffers.
the problem is my output is covered in RGB vertical stripes (something like in CRT monitors).
kernel code in my opinion is ok, the same as cl_image_format (CL_RGBA, CL_UNSIGNED_INT8). i have doubts about passing image to imagebuffer, but i dont know how what causes the problem (i’ve checked the error flags and theres no error given).
so my problem is how to remove this unwanted stripes.

kernel.cl

__kernel void copy(__read_only image2d_t imageIn,__write_only image2d_t imageOut)
{
const sampler_t sampler=CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_CLAMP|CLK_FI
LTER_NEAREST;
int gid0 = get_global_id(0);
int gid1 = get_global_id(1);
uint4 pixel;
pixel=read_imageui(imageIn,sampler,(int2)(gid0,gid1));
pixel.x = 0;
pixel.y = 0;
write_imageui (imageOut,(int2)(gid0,gid1),pixel);
}

vec.cpp

#include <oclUtils.h>

const char* cSourceFile = “kernel.cl”;

// OpenCL Vars
cl_context cxGPUContext; // OpenCL context
cl_command_queue cqCommandQue; // OpenCL command que
cl_device_id* cdDevices; // OpenCL device list
cl_program cpProgram; // OpenCL program
cl_kernel ckKernel; // OpenCL kernel
cl_mem cmDevSrcA; // OpenCL device source buffer A
cl_mem cmDevSrcB; // OpenCL device source buffer B
cl_mem cmDevDst; // OpenCL device destination buffer

size_t szGlobalWorkSize[2];
size_t szLocalWorkSize[2];

size_t szParmDataBytes; // Byte size of context information
size_t szKernelLength; // Byte size of kernel code
cl_int ciErr1, ciErr2; // Error code var
char* cPathAndName = NULL; // var for full paths to data, src, etc.
char* cSourceCL = NULL; // Buffer to hold source for compilation

shrBOOL bNoPrompt = shrFALSE;

// Main function
// ************************************************************


int main(int argc, char **argv)
{

// get command line arg for quick test, if provided
bNoPrompt = shrCheckCmdLineFlag(argc, (const char**)argv, “noprompt”);

// start logs
shrSetLogFileName (“vec.txt”);

size_t result=0;
unsigned char header [54];

FILE *input = fopen(“in.bmp”, “rb”);
result = fread(header,1,54,input);
fseek (input, 54, SEEK_SET);

unsigned char tab;
tab = (unsigned char
)malloc(5125124);
result = fread(tab, 1, 3512512, input);
fclose(input);
void * image= (unsigned char *)tab;

unsigned char tab2;
tab2 = (unsigned char
)malloc(5125124);
void * image2= (unsigned char *)tab2;

// Create the OpenCL context on a GPU device
cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateContextFromType…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateContextFromType, Line %u in file %s !!!

", LINE, FILE);}

// Get the list of GPU devices associated with context
ciErr1 = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
cdDevices = (cl_device_id*)malloc(szParmDataBytes);
ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
shrLog(LOGBOTH, 0.0, "clGetContextInfo…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clGetContextInfo, Line %u in file %s !!!

", LINE, FILE);}

// Create a command-queue
cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateCommandQueue…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateCommandQueue, Line %u in file %s !!!

", LINE, FILE);}

// Allocate the OpenCL buffer memory objects for source and result on the device GMEM

size_t width = 512;
size_t height = 512;
size_t rowpitch = 0;

cl_image_format format;
format.image_channel_order = CL_RGBA;
format.image_channel_data_type = CL_UNSIGNED_INT8;

cl_mem_flags flags;
flags = CL_MEM_READ_ONLY;
shrLog(LOGBOTH, 0.0, "clCreateImage 1…
");

cl_mem myClImage = clCreateImage2D(
cxGPUContext,
flags,
&format,
width,
height,
rowpitch,
0,
&ciErr1
);

if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateImage2d 1, Line %u in file %s !!!

", LINE, FILE);}

shrLog(LOGBOTH, 0.0, "clCreateImage 2…
");
cl_mem_flags flags2;
flags2 = CL_MEM_WRITE_ONLY;

cl_mem myClImage2 = clCreateImage2D(
cxGPUContext,
flags2,
&format,
width,
height,
rowpitch,
0,//image2,
&ciErr1
);

//ciErr1 |= ciErr2;
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateImage2D 2, Line %u in file %s !!!

", LINE, FILE);}

// Read the OpenCL kernel in from source file
shrLog(LOGBOTH, 0.0, "oclLoadProgSource (%s)…
", cSourceFile);
cPathAndName = shrFindFilePath(cSourceFile, argv[0]);
cSourceCL = oclLoadProgSource(cPathAndName, “”, &szKernelLength);

// Create the program
cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cSourceCL, &szKernelLength, &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateProgramWithSource…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateProgramWithSource, Line %u in file %s !!!

", LINE, FILE);}

// Build the program
ciErr1 = clBuildProgram(cpProgram, 0, NULL, NULL, NULL, NULL);
shrLog(LOGBOTH, 0.0, "clBuildProgram…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clBuildProgram, Line %u in file %s !!!

", LINE, FILE);}

// Create the kernel
ckKernel = clCreateKernel(cpProgram, “copy”, &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateKernel (copy)…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateKernel, Line %u in file %s !!!

", LINE, FILE);}

// Set the Argument values

ciErr1 = clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&myClImage);
shrLog(LOGBOTH, 0.0, "clSetKernelArg 0…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clSetKernelArg, Line %u in file %s !!!

", LINE, FILE);}

ciErr1 = clSetKernelArg(ckKernel, 1, sizeof(cl_mem), (void*)&myClImage2);
shrLog(LOGBOTH, 0.0, "clSetKernelArg 1…
");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clSetKernelArg, Line %u in file %s !!!

", LINE, FILE);}

// --------------------------------------------------------
// Start Core sequence… copy input data to GPU, compute, copy results back

size_t origin[3];
origin[0] = 0;
origin[1] = 0;
origin[2] = 0;

size_t region[3];
region[0] = width;
region[1] = height;
region[2] = 1;

shrLog(LOGBOTH, 0.0, "clEnqueueWriteImage…
");

ciErr1 = clEnqueueWriteImage (
cqCommandQue,
myClImage,
CL_TRUE,
origin,
region,
width*sizeof(char)4, //size_t input_row_pitch,
0, //width
sizeof(char)*height,//size_t input_slice_pitch,
image, //const void * ptr,
0,
NULL,
NULL
);

if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clEnqueueWriteImage, Line %u in file %s !!!

", LINE, FILE);}

// write of data to GPU device

szGlobalWorkSize[0] = 512;
szGlobalWorkSize[1] = 512;
szLocalWorkSize[0] = 16;
szLocalWorkSize[1] = 16;

// Launch kernel
ciErr1 = clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 2, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL);
clFinish(cqCommandQue);
shrLog(LOGBOTH, 0.0, "clEnqueueNDRangeKernel (VectorAdd)…
");

if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "leRROR Error in clEnqueueNDRangeKernel, Line %u in file %s !!!

", LINE, FILE);}

shrLog(LOGBOTH, 0.0, "clEnqueueReadImage …
");

ciErr1 = clEnqueueReadImage (
cqCommandQue,
myClImage2,
CL_TRUE,
origin,
region,
0, //widthsizeof(char), //size_t row_pitch,
0, //width
sizeof(char)*height,
image2, //void *ptr,
0,
NULL,
NULL
);

if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clEnqueueReadImage, Line %u in file %s !!!

", LINE, FILE);}

FILE nk = fopen(“outputt.bmp”, “wb”);
fwrite(header,1,54,nk);
fwrite(image2, 1, (3
512*512), nk);
fclose(nk);

shrLog(LOGBOTH, 0.0, "Finish success

");

}

output file
http://img641.imageshack.us/img641/9598/outputt.jpg
zoomed output file
http://img13.imageshack.us/img13/6292/zooms.jpg
input file

if anyone’s interested - the problem was the file 24bpp, with 32bpp everything works perfectly.

I was about to say that it sounded like you were missing a channel in your RGBA.