i am using gpu of type nvidia,i am using opencl but when i run the program using ctrl+F5(start without debugging )then i get result in which gpu takes more time than cpu but when i run the program cpu takes more time than gpu and result is also i am giving
start without debugging -> cpu time=6127 ms gpu time= 6240 ms
start with debug-> cpu time= 18354 ms gpu time= 9125 ms
wt is the reason in this difference…
visual studio 2010 i am using
the code is here. wt is going wrong.?..thanks
// Hello.cpp : Defines the entry point for the console application.
//
//#include <stdafx.h>
#include<stdio.h>
#include<stdlib.h>
#include<conio.h>
#include<time.h>
#include “CL/cl.h”
#define DATA_SIZE 100000
const char *KernelSource =
"kernel void hello(global float *input , global float *output)
"
"{
"
" size_t id =get_global_id(0);
"
"output[id] =input[id]*input[id];
"
"} "
"
"
"
";
//float start_time,end_time;
int main(void)
{
double start_time,end_time;
start_time=clock();
cl_context context;
cl_context_properties properties[3];
cl_kernel kernel;
cl_command_queue command_queue;
cl_program program;
cl_int err;
cl_uint num_of_platforms=0;
cl_platform_id platform_id;
cl_device_id device_id;
cl_uint num_of_devices=0;
cl_mem input,output;
size_t global;
float inputData[100000];
for(int j=0;j<100000;j++)
{
inputData[j]=(float)j;
}
float results[DATA_SIZE];//={0};
// int i;
//retrieve a list of platform variable
if(clGetPlatformIDs(1,&platform_id,&num_of_platforms)!=CL_SUCCESS)
{
printf("Unable to get platform_id
");
return 1;
}
//try to get supported GPU DEvice
if(clGetDeviceIDs(platform_id,CL_DEVICE_TYPE_CPU,1,&device_id,
&num_of_devices)!=CL_SUCCESS)
{
printf("unable to get device_id
");
return 1;
}
//context properties list -must be terminated with 0
properties[0]=CL_CONTEXT_PLATFORM;
properties[1]=(cl_context_properties) platform_id;
properties[2]=0;
//create a context with the GPU device
context=clCreateContext(properties,1,&device_id,NULL,NULL,&err);
//create command queue using the context and device
command_queue=clCreateCommandQueue(context,device_id,0,&err);
//create a program from the kernel source code
program=clCreateProgramWithSource(context,1,(const char**)
&KernelSource,NULL,&err);
//compile the program
err=clBuildProgram(program,0,NULL,NULL,NULL,NULL);
if((err!=CL_SUCCESS))
{
printf("build error
“,err);
size_t len;
char buffer[4096];
//get the build log
clGetProgramBuildInfo(program,device_id,CL_PROGRAM_BUILD_LOG,sizeof(buffer),buffer,&len);
printf(”----build Log—
%s
",buffer);
exit(1);
// return 1;
}
//specify which kernel from the program to execute
kernel=clCreateKernel(program,"hello",&err);
//create buffers for the input and output
input=clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float)*DATA_SIZE,NULL,NULL);
output=clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(float)*DATA_SIZE,NULL,NULL);
//load data into the input buffer
clEnqueueWriteBuffer(command_queue,input,CL_TRUE,0,
sizeof(float)*DATA_SIZE,inputData,0,NULL,NULL);
//set the argument list for the kernel command
clSetKernelArg(kernel,0,sizeof(cl_mem),&input);
clSetKernelArg(kernel,1,sizeof(cl_mem),&output);
global=DATA_SIZE;
//enqueue the kernel command for execution
clEnqueueNDRangeKernel(command_queue,kernel,1,NULL,&global,NULL,0,NULL,NULL);
clFinish(command_queue);
//copy the results from out of the buffer
clEnqueueReadBuffer(command_queue,output,CL_TRUE,0,sizeof(float)*DATA_SIZE,results,0,
NULL,NULL);
//print the results
printf("output:");
for(int i=0;i<DATA_SIZE;i++)
{
printf("%f
",results[i]);
//printf("no. of times loop run %d
",count);
}
//cleanup-release OpenCL resources
clReleaseMemObject(input);
clReleaseMemObject(output);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(command_queue);
clReleaseContext(context);
end_time=clock();
printf("execution time is%f",end_time-start_time);
_getch();
return 0;
}