Dear all,
I am trying to write a code for image processing.
I want to write one kernel and want to call in another kernel. Is it possible?
I came to know that we can call functions. I also want to try the kernel also.
Please help me in this regard.
Here is the situation in which I am now.
//creatematrix.cl
float ** createMat(int iRows, int iColumns, float Initvalue)
{
float **pMatrix;
pMatrix = (float**)malloc(sizeof(float*) * iRows);
for (int i = 0; i < iRows; i++)
{
pMatrix[i] = (float*)malloc(sizeof(float) * iColumns);
}
for (int i = 0; i < iRows; i++)
{
for (int j = 0; j < iColumns; j++)
{
pMatrix[i][j] = Initvalue;
}
}
return pMatrix;
}
// Convolution.cl
__kernel float* IMConvolution(const __global float * pInput,
__constant float * pFilter,
const int nInWidth,
const int nFilterWidth,
const int nWidth)
{
float *pOutput;
//const int nWidth = get_global_size(0);
const int xOut = get_global_id(0);
const int yOut = get_global_id(1);
const int xInTopLeft = xOut;
const int yInTopLeft = yOut;
float sum = 0;
for (int r = 0; r < nFilterWidth; r++)
{
const int idxFtmp = r * nFilterWidth;
const int yIn = yInTopLeft + r;
const int idxIntmp = yIn * nInWidth + xInTopLeft;
for (int c = 0; c < nFilterWidth; c++)
{
const int idxF = idxFtmp + c;
const int idxIn = idxIntmp + c;
sum += pFilter[idxF]*pInput[idxIn];
}
} //for (int r = 0...
/*if (sum > 1)
sum = 1;
if (sum < -1)
sum = -1;*/
const int idxOut = yOut * nWidth + xOut;
pOutput[idxOut] = sum;
return(pOutput);
}
// CNNonGPU.cl
#include “Convolution.cl”
#include “creatematrix.cl”
__kernel void CNNonGPU(const __global float * pInput,
__constant float * tempA,
__constant float * tempB,
__global float * pOutput,
const int nInWidth,
const int nInHeight,
const int nFilterWidth,
const int iterations)
{
const int nWidth = get_global_size(0);
float *TBimg, *Yimg, *extYimg, *TAimg;
float **tempimg;
int Elements = 0;
int inElements = 0;
float *sum;
Elements = nWidth * nWidth;
inElements = nInWidth * nInHeight;
TBimg = new float [Elements];
TAimg = new float [Elements];
sum = new float [Elements];
TBimg = IMConvolution(pInput,tempB,nInWidth,nFilterWidth,nWidth);
extYimg = pInput;
for(int i = 0; i < iterations;i++)
{
Yimg = extYimg;
TAimg = IMConvolution(Yimg,tempA,nInWidth,nFilterWidth,nWidth);
for(int j = 0; j < Elements;j++)
{
sum[j] = TBimg[j] + TAimg[j] + bias;
}
tempimg = createMat(nInWidth,nInHeight,0);
int ind = 0;
for (int i = 1; i < iHeightExtended-1; i++)
{
for (int j = 1; j < iWidthExtended-1; j++)
{
tempimg[i][j] = 0.5 * ((abs(sum[ind] + 1) - (abs(sum[ind] - 1)));
ind++;
}
}
ind = 0;
for(int j = 0; j < nInHeight; j++)
{
for(int i = 0; i < nInWidth;i++)
{
extYimg[ind] = tempimg[i][j];
ind++;
}
}
}
ind = 0;
for (int i = 1; i < iHeightExtended-1; i++)
{
for (int j = 1; j < iWidthExtended-1; j++)
{
pOutput[ind] = tempimg[i][j];
ind++;
}
}
}
Here in CNNonGPU I want to use the convolution kernel many times. Some time it may be 1000 time or even more.
I also want to use the create matrix function once. Is it possible to do so?
I tried to execute the above one by calling the CNNonGPU from main(CPU). I am getting error at clBuildProgram. (Error is :CL_BUILD_PROGRAM_FAILURE)
Thanks in advance.