Hi, i am being able to compile this code but it doesn’t give me any output and goes in some indefinite loop (or something else)… i dont understand whats happening. I’m a newbie to OpenCL and this is my first program… All i’m trying to do is add two vectors. I get no errors during the compilation. If i am not wrong, i should see the ‘End’ that i’ve printed after the gpu portion completes right?
This is the machine detail that i’m running the code on:
Linux gpu02.cluster 2.6.18-92.1.22.el5 #1 SMP Tue Dec 16 11:57:43 EST 2008 x86_64 x86_64 x86_64 GNU/Linux
#include <stdio.h>
#include <CL/cl.h>
#define SIZE 10
int va[SIZE];
int vb[SIZE];
int vc[SIZE];
char* load_program_source(const char*);
int Init(){
int i;
srand(20);
for(i=0;i<SIZE;i++){
va[i]=rand()%10;
vb[i]=rand()%10;
}
}
char* load_program_source(const char *filename)
{
int fileSize = 0;
FILE *pFile = fopen(filename, "r");
rewind(pFile);
fseek(pFile, 0, SEEK_END);
fileSize = ftell(pFile);
rewind(pFile);
char *data = (char*) calloc(sizeof(char), fileSize+1);
fread(data, 1, fileSize, pFile);
data[fileSize]='\0';
fclose(pFile);
return data;
}
int main(){
if(!Init()){
printf("Unable to initialize data");
return 1;
}
cl_context GPUContext = clCreateContextFromType(0,CL_DEVICE_TYPE_GPU, NULL, NULL, NULL);
if(!GPUContext){
printf("Error: Failed to create context");
return 1;
}
//Get the list of GPU devices associated with this context
size_t ParmDataBytes;
clGetContextInfo(GPUContext, CL_CONTEXT_DEVICES, 0, NULL, &ParmDataBytes);
cl_device_id* GPUDevices = (cl_device_id*)malloc(ParmDataBytes);
clGetContextInfo(GPUContext, CL_CONTEXT_DEVICES, ParmDataBytes,GPUDevices,NULL);
//Create a command queue on first gpu device
cl_command_queue GPUCommandQueue = clCreateCommandQueue(GPUContext, GPUDevices[0],0,NULL);
if(!GPUCommandQueue){
printf("Error: Failed to create a command queue");
return 1;
}
//Allocate memory
cl_mem GPUva = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int)*SIZE, va, NULL);
cl_mem GPUvb = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int)*SIZE, vb, NULL);
cl_mem GPUvc = clCreateBuffer(GPUContext, CL_MEM_WRITE_ONLY, sizeof(int)*SIZE, NULL, NULL);
//Create OCL program reading the source code from the file
char * OclSource = load_program_source("vectoradd.cl");
cl_program OpenCLProgram = clCreateProgramWithSource(GPUContext,1,(const char**)&OclSource,NULL,NULL);
//Build the program
clBuildProgram(OpenCLProgram,0,NULL,NULL,NULL,NULL);
//obtain the handle for the kernel
cl_kernel OpenCLVectorAdd = clCreateKernel(OpenCLProgram,"VectorAdd",NULL);
//associate GPU memory with the kernel
clSetKernelArg(OpenCLVectorAdd, 0, sizeof(cl_mem), (void*)&GPUvc);
clSetKernelArg(OpenCLVectorAdd, 1, sizeof(cl_mem), (void*)&GPUvb);
clSetKernelArg(OpenCLVectorAdd, 2, sizeof(cl_mem), (void*)&GPUva);
//Launch the kernel in the GPU
size_t WorkSize[1] = {SIZE};
clEnqueueNDRangeKernel(GPUCommandQueue, OpenCLVectorAdd,1,NULL,WorkSize,NULL,0,NULL,NULL);
//copy the result back to the main memory
clEnqueueReadBuffer(GPUCommandQueue, GPUvc, CL_TRUE,0,sizeof(int) * SIZE, vc, 0, NULL, NULL);
//cleanup
free(GPUDevices);
clReleaseKernel(OpenCLVectorAdd);
clReleaseProgram(OpenCLProgram);
clReleaseCommandQueue(GPUCommandQueue);
clReleaseContext(GPUContext);
clReleaseMemObject(GPUva);
clReleaseMemObject(GPUvb);
clReleaseMemObject(GPUvc);
printf("End");
return 0;
}
this is my kernel code:
__kernel void VectorAdd(__global int* vc, __global int* vb, __global int* va){
int i;
for(i=0;i<100;i++){
vc[i]=vb[i]+va[i];
}
}
could anyone please help me with it?
Thanks a lot