After writing a large program that works in Windows and not Linux, I decided to debug with a Hello World program. I realie there are better ways to write this program, I am just trying to debug my hardware / install.
This runs correctly in Windows using the CUDA 5.5 and the Intel OpenCL libs. I have a Centos 6.4 server I would like to run on as well. I installed CUDA 5.5 and the Intel XE SDK. Neither will run Hello World. If I choose the NVIDIA GPU I get a -45 (CL_INVALID_PROGRAM_EXECUTABLE) error on clCreateKernel. If I select to run on my Intel Xeon Phi I actually get the message “1 warning generated.” however I cannot figure out how to view the warning!!
Can anyone provide insight or what to debug from here?
#include <utility>
#include <CL/cl.h>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
#include <iterator>
#include <cstring>
const char* kernel_text =
"#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
" \
"__constant char hw[] = \"Hello World\
\";
"
"__kernel void hello(__global char * out){
"
"size_t tid = get_global_id(0);
"
"out = hw[tid];}
";
int main(void)
{
cl_int err;
cl_uint platformCount, devCount;
cl_int status;
char nameBuf[1000];
// Get list of OpenCL compatible platforms
err = clGetPlatformIDs(0, NULL, &platformCount);
// Allocate memory, get list of platform handles
cl_platform_id *platforms =
(cl_platform_id *) malloc(platformCount*sizeof(cl_platform_id));
err = clGetPlatformIDs(platformCount, platforms, NULL);
// List platform(s) and vendor(s)
for(unsigned int i = 0; i < platformCount; i++)
{
err = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,sizeof(nameBuf), nameBuf, NULL);
std::cerr << "Platform " << i << " is by: " << nameBuf << std::endl;
}
std::string selection = "";
std::cout << "Please select a platform (0 to " << platformCount-1 << ")" << std::endl;
getline(std::cin, selection);
// Let user make selection
cl_platform_id selectedPlatform = platforms[stoi(selection)];
// allocate memory, get list of device handles in platform
err = clGetDeviceIDs(platforms[stoi(selection)], CL_DEVICE_TYPE_ALL, 0, NULL, &devCount);
cl_device_id *devices =
(cl_device_id *) malloc(devCount*sizeof(cl_device_id));
err = clGetDeviceIDs(platforms[stoi(selection)], CL_DEVICE_TYPE_ALL, devCount, devices, NULL);
cl_device_id device = devices[0];
// Create platform context and don't ask why the platform ID
// and properties are the same value with different typecasts...
cl_context_properties cprops[3] =
{CL_CONTEXT_PLATFORM, (cl_context_properties)selectedPlatform, 0};
cl_context context = clCreateContext(cprops, 1, &device, NULL, NULL, &status);
// create a command queue
cl_command_queue_properties queueProps = 0;
cl_command_queue queue = clCreateCommandQueue(context, device, queueProps, &status);
// Setup result buffer for OpenCL
char * outH = new char[hw.length()+1];
cl_mem outCL = clCreateBuffer(context,
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
hw.length()+1,
outH,
&err);
clFinish(queue);
//const char *srcStr = kernel_text;
const char **str = &kernel_text;//&srcStr;
// create an OpenCL program (may have multiple kernels)
size_t kernelSize[] = {strlen(kernel_text)};
cl_program program = clCreateProgramWithSource(context, 1, str, kernelSize, &status);
std::cout << "sts " << status << std::endl;
// build it
status = clBuildProgram(program, devCount, &devices[0], NULL, NULL, NULL);
if (status != 0) {
// Determine the size of the log
size_t log_size;
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
// Allocate memory for the log
char *log = (char *) malloc(log_size);
// Get the log
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
// Print the log
printf("%s
", log);
free(log);
}
// fish the kernel out of the program
const char* kernelName = "hello";
cl_kernel kernel = clCreateKernel(program, kernelName, &status);
std::cout << "sts " << status << std::endl;
clFinish(queue);
// Device computations use a command queue. This is a 1-to-1 mapping
// with the device, associated with a context.
size_t wgSize[]= {1};
size_t block_size[] = {1};
clSetKernelArg(kernel, 0, hw.length(), outCL);
clEnqueueNDRangeKernel(queue,kernel,1,NULL,block_size,wgSize,0,NULL,NULL);
clFinish(queue);
clEnqueueReadBuffer(queue,outCL,CL_TRUE,0,hw.length()+1,outH,0,NULL,NULL);
std::cout << outH;
std::cout << "Press any key to exit..." << std::endl;
getline(std::cin, selection);
//free(kernel_text);
return 0;
}