We are trying to process images from a framegrabber. The data appears on a mmaped buffer.
Every time we use the mmaped buffer on OpenCL we are getting an error message on syslog and the Slab section of /proc/mem_info increases. On our system we are leaking around 1 MB per second!
syslog error: [fglrx:MCIL_LockMemory] ERROR Could not lock memory into GART space
In order to demonstrate this error we have made a simple program with a loop that maps and unmaps a mmaped buffer. While running the program please run dmesg and examine /proc/mem_info. We are using the AMD implementation of OpenCL: Driver linux_x64 13.4.
Is there anything special we should do when handling mmaped memory?
This is the demo program:
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#include <CL/opencl.h>
#define BUF_SIZE 4096
#define cl_err_exit(errnum, errstring){
if (errnum != CL_SUCCESS) {
fprintf(stderr, "%s failed on line %d: %d
", errstring, LINE, errnum);
exit(1);
}
}
int main ()
{
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue gpu_queue = NULL;
cl_int err;
cl_mem pinned_buffer = NULL;
cl_mem device_buffer = NULL;
void *pinned_mem;
int fd;
fd = open("/dev/mem", O_RDONLY);
if (fd == -1) {
perror("open");
exit(1);
}
pinned_mem = mmap(NULL, BUF_SIZE, PROT_READ, MAP_SHARED, fd, 0);
if (pinned_mem == MAP_FAILED) {
perror("mmap");
exit(1);
}
err = clGetPlatformIDs(1, &platform_id, NULL);
cl_err_exit(err, "clGetPlatformIDs");
err = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
cl_err_exit(err, "clGetDeviceIDs");
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err);
cl_err_exit(err, "clCreateContext");
gpu_queue = clCreateCommandQueue(context, device_id, 0, &err);
cl_err_exit(err, "clCreateCommandQueue");
while (1) {
pinned_buffer = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, BUF_SIZE, pinned_mem, &err);
cl_err_exit(err, "clCreateBuffer");
device_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, BUF_SIZE, NULL, NULL);
cl_err_exit(err, "clCreateBuffer");
pinned_mem = (float *) clEnqueueMapBuffer(gpu_queue, pinned_buffer, CL_TRUE, CL_MAP_WRITE, 0, BUF_SIZE, 0, NULL, NULL, &err);
cl_err_exit(err, "clEnqueueMapBuffer");
err = clEnqueueWriteBuffer(gpu_queue, device_buffer, CL_FALSE, 0, BUF_SIZE, pinned_mem, 0, NULL, NULL);
cl_err_exit(err, "clEnqueueWriteBuffer");
err = clEnqueueUnmapMemObject(gpu_queue, pinned_buffer, pinned_mem, 0, NULL, NULL);
cl_err_exit(err, "clEnqueueUnmapMemObject");
clFinish(gpu_queue);
clReleaseMemObject(pinned_buffer);
clReleaseMemObject(device_buffer);
}
clReleaseCommandQueue(gpu_queue);
clReleaseContext(context);
munmap(pinned_mem, BUF_SIZE);
close(fd);
return 0;
}