#include <CL\cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <fstream>
#include <math.h>
int convertToString(const char filename, std::string& s)
{
size_t size;
char str;
std::fstream f(filename, (std::fstream::in | std::fstream::binary));
if(f.is_open())
{
size_t fileSize;
f.seekg(0, std::fstream::end);
size = fileSize = (size_t)f.tellg();
f.seekg(0, std::fstream::beg);
str = new char[size+1];
if(!str)
{
f.close();
return NULL;
}
f.read(str, fileSize);
f.close();
str[size] = '\0';
s = str;
delete[] str;
return 0;
}
printf("Error: Failed to open file %s
", filename);
return 1;
}
// OpenCL kernel. Each work item takes care of one element of c
int main( int argc, char* argv[] )
{
// vector length
long int n = 100;
// vector
float *h_a;
float *h_b;
// vector
float *h_c;
float *cpu_c;
// device input
cl_float *d_a;
cl_float *d_b;
cl_float *d_c;
/*
cl_mem d_a;
cl_mem d_b;
cl_mem d_c;*/
cl_platform_id cpPlatform; // OpenCL
cl_device_id device_id; // device ID
cl_context context; // context
cl_command_queue queue; // command queue
cl_kernel kernel; // kernel
//
size_t bytes = n*sizeof(float);
h_a = (float*)malloc(bytes);
h_b = (float*)malloc(bytes);
h_c = (float*)malloc(bytes);
cpu_c= (float*)malloc(bytes);
int i;
srand( (unsigned)time( NULL ) );
for(i = 0; i < n; i++)
h_a[i] = rand()%50;
srand( (unsigned)time( NULL ) +1000);
for(i = 0; i < n; i++)
h_b[i] = rand()%50;
//cpu computer
for( i=0; i < n; i++)
{
cpu_c[i] = h_a[i]+h_b[i];
}
size_t globalSize, localSize;
cl_int err;
//
localSize = 2;
//
globalSize = (size_t)ceil(n/(float)localSize)*localSize;
err = clGetPlatformIDs(1, &cpPlatform, NULL);
err = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
queue = clCreateCommandQueue(context, device_id, 0, &err);
//=====================Create memory====================
d_a=(cl_float*)clSVMAlloc(context,CL_MEM_READ_WRITE,bytes,0);
d_b=(cl_float*)clSVMAlloc(context,CL_MEM_READ_WRITE,bytes,0);
d_c=(cl_float*)clSVMAlloc(context,CL_MEM_WRITE_ONLY,bytes,0);
clEnqueueSVMMap(queue,CL_TRUE,CL_MAP_WRITE,d_a,bytes,0,0,0);
clEnqueueSVMMap(queue,CL_TRUE,CL_MAP_WRITE,d_b,bytes,0,0,0);
memcpy(d_a,h_a,bytes);
memcpy(d_b,h_b,bytes);
clEnqueueSVMUnmap(queue,d_a,0,0,0);
clEnqueueSVMUnmap(queue,d_a,0,0,0);
const char * filename = "mulmatrix.cl";
std::string sourceStr;
err = convertToString(filename, sourceStr);
const char * source = sourceStr.c_str();
size_t sourceSize[] = { strlen(source) };
cl_program program = clCreateProgramWithSource(
context,
1,
&source,
sourceSize,
NULL);
err = clBuildProgram( program, 1, &device_id, NULL, NULL, NULL );
if(err != 0)
{
printf("clBuild failed:%d
“, err);
char tbuf[0x10000];
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf, NULL);
printf(”
%s
", tbuf);
return -1;
}
kernel = clCreateKernel( program, "vecAdd", NULL );
clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
kernel = clCreateKernel(program, "vecAdd", &err);
err = clSetKernelArgSVMPointer(kernel, 0, &d_a);
err = clSetKernelArgSVMPointer(kernel, 1, &d_b);
err = clSetKernelArgSVMPointer(kernel, 2, &d_c);
err = clSetKernelArgSVMPointer(kernel, 3, &n);
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize,0, NULL, NULL);
clFinish(queue);
// Read the results from the device
//==============================================output===============================
clEnqueueSVMMap(queue,CL_TRUE,CL_MAP_READ,d_c,bytes,0,0,0);
memcpy(h_c,d_c,bytes);
clEnqueueSVMUnmap(queue,d_c,0,0,0);
float sum = 0;
for(i = 0; i < n; i++)
{
printf("(%.5f,%.5f)
",h_c[i],cpu_c[i]);
if(abs(cpu_c[i] - h_c[i]) > 0.0001)
{
printf("check failed
");
break;
}
}
if(i ==n)
printf("check passed
");
clSVMFree(context,d_a);
clSVMFree(context,d_b);
clSVMFree(context,d_c);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseContext(context);
free(h_a);
free(h_b);
free(h_c);
return 0;
}