I am using Nvidia sdk to run the opencl prorgams. The codes provided as samples are running fine, but in my code, shrFindFilePath(cSourceCL) is returning NULL everytime. Even if I copy paste the codes exactly as the sample itself, it is retuning null.
This is my code as of the cpp file
#include<oclUtils.h>
#include<shrQATest.h>
const char* cSourceFile="./test.cl";//name of the cl file
void *srcA,*srcB,*C;//host buffers
//open cl type variables
cl_platform_id cpPlatform;//Opencl platform
cl_device_id cdDevice;//Opencl device
cl_context cxGPUContext;//opencl context
cl_command_queue cqCommandQueue;//opencl commandque
cl_program cpProgram;//opencl program
cl_kernel ckKernel;//Opencl Kernel
cl_mem cmDevSrcA;//opencl deice source buifffer for A;
cl_mem cmDevSrcB;//"
cl_mem cmDevSrcC;
size_t szGlobalWorkSize;
size_t szLocalWorkSize;
size_t szKernelLength;
cl_int ciErr;
char *cPathName=NULL;
char cSourceCL=NULL;
const char cExecutableName=NULL;
int iNumEl=256;
void Cleanup(int iExitCode);
void (*pCleanup)(int) = &Cleanup;
int main(int argc,char **argv){
//int ii=0;
shrQAStart(argc,argv);
cExecutableName=argv[0];
shrSetLogFileName(“pkAddcl.txt”);
shrLog("%s Starting…
of float elements per Array = %u
", argv[0], iNumEl);
szLocalWorkSize=256;
szGlobalWorkSize=shrRoundUp((int)szLocalWorkSize,iNumEl);
shrLog("Global Work Size = %u
Local Work Size = %u
of Work Groups = %u
",szGlobalWorkSize, szLocalWorkSize, (szGlobalWorkSize % szLocalWorkSize + szGlobalWorkSize/szLocalWorkSize));
//allocating host memory
shrLog("allocating Host and Init mems…
");
srcA=(void*)malloc(sizeof(cl_float4)szGlobalWorkSize);
srcB=(void)malloc(sizeof(cl_float4)szGlobalWorkSize);
C=(void)malloc(sizeof(cl_float4)szGlobalWorkSize);
shrFillArray((float)srcA,4iNumEl);
shrFillArray((float)srcB,5*iNumEl);
char buff[2048];
//Create everything
//get platform
ciErr=oclGetPlatformID(&cpPlatform);
oclCheckError(ciErr, CL_SUCCESS);
ciErr=clGetPlatformInfo (cpPlatform, CL_PLATFORM_NAME, sizeof(buff), buff, NULL);
shrLog( "the platform is…%s ",buff);
//get a GPU dev
ciErr=clGetDeviceIDs(cpPlatform,CL_DEVICE_TYPE_GPU,1,&cdDevice,NULL);
oclCheckError(ciErr, CL_SUCCESS);
shrLog("
the value of CL_SUCCESSs is …%d",CL_SUCCESS);
//create context
cxGPUContext=clCreateContext(0,1,&cdDevice,NULL,NULL,&ciErr);
oclCheckError(ciErr, CL_SUCCESS);
//creatint a command u
cqCommandQueue=clCreateCommandQueue(cxGPUContext,cdDevice,0,&ciErr);
oclCheckError(ciErr, CL_SUCCESS);
//allocating mem at the device
shrLog("
creating and allocating memory on the device");
cmDevSrcA=clCreateBuffer(cxGPUContext,CL_MEM_READ_ONLY,sizeof(cl_float)szGlobalWorkSize4,NULL,&ciErr);
oclCheckError(ciErr, CL_SUCCESS);
shrLog("
first buffer A %d",ciErr);
cmDevSrcB=clCreateBuffer(cxGPUContext,CL_MEM_READ_ONLY,sizeof(cl_float)szGlobalWorkSize4,NULL,&ciErr);
oclCheckError(ciErr, CL_SUCCESS);
shrLog("
2 buffer B %d",ciErr);
cmDevSrcC=clCreateBuffer(cxGPUContext,CL_MEM_WRITE_ONLY,sizeof(cl_float)*szGlobalWorkSize,NULL,&ciErr);
oclCheckError(ciErr, CL_SUCCESS);
shrLog("
3 buffer C %d",ciErr);
//read the Open cl file to form the kernel
shrLog("
reading from file callLoadProgWithSource…%s",cSourceFile);
cPathName=shrFindFilePath(cSourceFile,argv[0]);
///////////////////////////////////////////////////////////////////////////////////////////////
shrLog("
the cPathName value is…%s",cPathName);
oclCheckError(ciErr, CL_SUCCESS);
cSourceCL=oclLoadProgSource(cPathName,"",&szKernelLength);
oclCheckError(ciErr, CL_SUCCESS);
shrLog("
the cSourceCL value is…%s ",cSourceCL);
//create program
shrLog("
clCreateProgramWithSource…creating program with source
");
cpProgram=clCreateProgramWithSource(cxGPUContext,1,(const char**)&cSourceCL,&szKernelLength,&ciErr);
shrLog(“the cpProgram is %s”,cpProgram);
//Building the program
shrLog("
building the program…clBuildProgram
“);
ciErr=clBuildProgram(cpProgram,0,NULL,NULL,NULL,NULL);
shrLog(”
Eror at program = %d",ciErr);
if(ciErr!=CL_SUCCESS){
shrLog("
printed Upto here");
shrLogEx(LOGBOTH|ERRORMSG,ciErr,STDERROR);
oclLogBuildInfo(cpProgram,oclGetFirstDev(cxGPUContext));
oclLogPtx(cpProgram,oclGetFirstDev(cxGPUContext),“pkAddcl.ptx”);
}
//create kernel
shrLog("
Creating kernel…clCreateKernel
“);
ckKernel=clCreateKernel(cpProgram,“add”,&ciErr);
//setting the args
ciErr=clSetKernelArg(ckKernel,0,sizeof(cl_mem),(void*)&cmDevSrcA);
ciErr|=clSetKernelArg(ckKernel,1,sizeof(cl_mem),(void*)&cmDevSrcB);
ciErr|=clSetKernelArg(ckKernel,2,sizeof(cl_mem),(void*)&cmDevSrcC);
oclCheckError(ciErr, CL_SUCCESS);
//core seq…copy iput to GPU and Results back to host
//asynch write of data to GPU
shrLog(”
clEnqueue…srcA and srcB
“);
ciErr=clEnqueueWriteBuffer(cqCommandQueue,cmDevSrcA,CL_FALSE,0,sizeof(cl_float)szGlobalWorkSize4,srcA,0,NULL,NULL);
ciErr|=clEnqueueWriteBuffer(cqCommandQueue,cmDevSrcB,CL_FALSE,0,sizeof(cl_float)szGlobalWorkSize4,srcB,0,NULL,NULL);
oclCheckError(ciErr, CL_SUCCESS);
//launch kernel
shrLog(”
clEnqueueNDRangeKernel…
");
ciErr=clEnqueueNDRangeKernel(cqCommandQueue,ckKernel,1,NULL,&szGlobalWorkSize,&szLocalWorkSize,0,NULL,NULL);
oclCheckError(ciErr, CL_SUCCESS);
//read back the results
shrLog("
clEnqueueReadBuffer…read the results from the buffer
");
ciErr=clEnqueueReadBuffer(cqCommandQueue,cmDevSrcC,CL_TRUE,0,sizeof(cl_float)*szGlobalWorkSize,C,0,NULL,NULL);
oclCheckErrorEX(ciErr, CL_SUCCESS,pCleanup);
Cleanup(EXIT_SUCCESS);
}
void Cleanup(int iExitCode)
{
// Cleanup allocated objects
shrLog("Starting Cleanup…
");
if(cPathName)free(cPathName);
if(cSourceCL)free(cSourceCL);
if(ckKernel)clReleaseKernel(ckKernel);
if(cpProgram)clReleaseProgram(cpProgram);
if(cqCommandQueue)clReleaseCommandQueue(cqCommandQueue);
if(cxGPUContext)clReleaseContext(cxGPUContext);
if (cmDevSrcA)clReleaseMemObject(cmDevSrcA);
if (cmDevSrcB)clReleaseMemObject(cmDevSrcB);
if (cmDevSrcC)clReleaseMemObject(cmDevSrcC);
// Free host memory
free(srcA);
free(srcB);
free (C);
}
And This is the cl file
__kernel void add(__global const float* a, __global const float* b, __global float* c, int iNumElements)
{
unsigned int i=get_global_id(0);
c[i]=a[i]+b[i];
}
Both the files are in the same dir , test and are located at /piyush/NVIDIA_GPU_Computing_SDK/OpenCL/src/test
Any help on the matter will be appreciated.
Thanks
Piyush