Hi,
I would like to compile kernel offline using SPIR, and then load the binary.
Several entry in this forum touch the subject ( https://www.khronos.org/message_boards/showthread.php/9482-Offline-Binary-Kernel-Generation , https://www.khronos.org/message_boards/showthread.php/8763-Offline-Kernel-Compiling-in-Ubuntu , https://www.khronos.org/message_boards/showthread.php/5796-offline-compilation ), but none of them answers my questions.
I followed the instructions given in GitHub - KhronosGroup/SPIR to build the SPIR generator using Clang. I the following snippets of code, clang refers to this version build from KhronosGroup github.
I tried on a very simple kernel, but failed to use it offline.
Kernel Code
I have a very simple kernel, in file bidon.cl :
__kernel void vecAdd(__global float* a)
{
int gid = get_global_id(0);
a[gid] += a[gid];
}
Online Compilation
I can call it online, using the following file offline.cpp :
#include <stdio.h>
#include <stdlib.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define MEM_SIZE (32)
#define MAX_SOURCE_SIZE (0x100000)
int main()
{
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem memobj = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
float mem[MEM_SIZE];
FILE *fp;
const char fileName[] = "./bidon.cl";
size_t source_size;
char *source_str;
cl_int i;
/* Load kernel source code */
fp = fopen(fileName, "r");
if (!fp) {
}
source_str = (char *)malloc(MAX_SOURCE_SIZE);
source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);
/*Initialize Data */
for (i = 0; i < MEM_SIZE; i++) {
mem[i] = i;
}
/* Get platform/device information */
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
/* Create OpenCL Context */
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
/* Create Command Queue */
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
/* Create memory buffer*/
memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, MEM_SIZE * sizeof(float), NULL, &ret);
/* Transfer data to memory buffer */
ret = clEnqueueWriteBuffer(command_queue, memobj, CL_TRUE, 0, MEM_SIZE * sizeof(float), mem, 0, NULL, NULL);
/* Create Kernel program from the read in source */
program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
/* Build Kernel Program */
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
/* Create OpenCL Kernel */
kernel = clCreateKernel(program, "vecAdd", &ret);
fprintf(stdout, "err: %d
", ret);
/* Set OpenCL kernel argument */
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj);
size_t global_work_size[3] = {MEM_SIZE, 0, 0};
size_t local_work_size[3] = {MEM_SIZE, 0, 0};
/* Execute OpenCL kernel */
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
/* Transfer result from the memory buffer */
ret = clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0, MEM_SIZE * sizeof(float), mem, 0, NULL, NULL);
/* Display result */
for (i=0; i < MEM_SIZE; i++) {
fprintf(stdout, "mem[%d]: %f
", i, mem[i]);
}
/* Finalization */
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(memobj);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
free(source_str);
return 0;
}
I compile this by running :
clang -I/usr/local/cuda-7.0/include online.cpp -lOpenCL
I run it and it works properly.
Offline Compilation
I compile the kernel with the follwing command :
clang -cc1 -emit-llvm-bc -triple spir64-unknown-unknown -include opencl_spir.h -o bidon.bc bidon.cl
This creates the file bidon.bc
I have a file offline.cpp (which is almost exactly the same as online.cpp, the only major difference is that clCreateProgramWithSource() is replaced with clCreateProgramWithBinary() :
#include <stdio.h>
#include <stdlib.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define MEM_SIZE (32)
#define MAX_BINARY_SIZE (0x100000)
int main()
{
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem memobj = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
float mem[MEM_SIZE];
FILE *fp;
char fileName[] = "./bidon.bc";
size_t binary_size;
char *binary_buf;
cl_int binary_status;
cl_int i;
/* Load kernel binary */
fp = fopen(fileName, "r");
if (!fp) {
fprintf(stderr, "Could not read the kernel file: %s
", fileName);
exit(1);
}
binary_buf = (char *)malloc(MAX_BINARY_SIZE);
binary_size = fread(binary_buf, 1, MAX_BINARY_SIZE, fp);
fclose(fp);
/* Initialize input data */
for (i = 0; i < MEM_SIZE; i++) {
mem[i] = i;
}
/* Get platform/device information */
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
/* Create OpenCL context*/
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
/* Create command queue */
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
/* Create memory buffer */
memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, MEM_SIZE * sizeof(float), NULL, &ret);
/* Transfer data over to the memory buffer */
ret = clEnqueueWriteBuffer(command_queue, memobj, CL_TRUE, 0, MEM_SIZE * sizeof(float), mem, 0, NULL, NULL);
/* Create kernel program from the kernel binary */
program = clCreateProgramWithBinary(context, 1, &device_id, (const size_t *)&binary_size,
(const unsigned char **)&binary_buf, &binary_status, &ret);
fprintf(stdout, "clCreateProgramWithBinary err: %d
", ret);
fprintf(stdout, "binary_status: %d
", binary_status);
ret = clBuildProgram( program, 1, &device_id, NULL, NULL, NULL );
fprintf(stdout, "clBuildProgram: %d
", ret);
/* Create OpenCL kernel */
kernel = clCreateKernel(program, "vecAdd", &ret);
printf("clCreateKernel err:%d
", ret);
/* Set OpenCL kernel arguments */
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj);
size_t global_work_size[3] = {MEM_SIZE, 0, 0};
size_t local_work_size[3] = {MEM_SIZE, 0, 0};
/* Execute OpenCL kernel */
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
/* Copy result from the memory buffer */
ret = clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0, MEM_SIZE * sizeof(float), mem, 0, NULL, NULL);
/* Display results */
for (i=0; i < MEM_SIZE; i++) {
fprintf(stdout, "mem[%d]: %f
", i, mem[i]);
}
/* Finalization */
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(memobj);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
free(binary_buf);
return 0;
}
I build this with the following command :
clang -I/usr/local/cuda-7.0/include offline.cpp -lOpenCL
But when I run it I get error codes.
clCreateProgramWithBinary() return code is (0) : OK
clBuildProgram() returns an error code (-42) : CL_INVALID_BINARY (I tried both with and without the call to clBuildProgram() : https://www.khronos.org/message_boards/showthread.php/9482-Offline-Binary-Kernel-Generation )
clCreateKernel() returns an error code (-45) : CL_INVALID_PROGRAM_EXECUTABLE
Did I do something wrong ? Any suggestion about what should I try to make it work ?