Does anyone here have much experience with python/ctypes? I have a very simple program,which when directly ported to python via ctypes sporadically segfaults. It’s driving me quite insane. The program initializes opencl and then enters a loop where it repeatedly compiles a blank kernel & executes it. For some reason, repeated compilation & execution of a kernel in python (at least on my system) quite rapidly segfaults, and I have no idea why. Any ideas at all would be appreciated. I’m running Ubuntu 11.10 amd64 & using a nvidia 485m card, with the 285.05.09 driver. Thanks!
The simple c program:
#include <CL/cl.h>
#include <stdio.h>
cl_context ctx; // OpenCL context
cl_command_queue queue; // OpenCL command que
cl_platform_id platform; // OpenCL platform
cl_device_id device; // OpenCL device
cl_program program; // OpenCL program
cl_kernel kernel; // OpenCL kernel
cl_event event;
cl_int err1, err2; // Error code var
size_t global_size; // 1D var for Total # of work items
size_t local_size; // 1D var for # of work items in the work group
int main(){
global_size = 256;
local_size = 16;
// initialize opencl
err1 = clGetPlatformIDs(1, &platform, NULL);
err1 = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
ctx = clCreateContext(0, 1, &device, NULL, NULL, &err1);
queue = clCreateCommandQueue(ctx, device, 0, &err1);
printf("queue: %ld
", (long)queue);
char* source = “__kernel void main(){}”;
// repeatedly compile & execute a kernel
while(1){
program = clCreateProgramWithSource(ctx, 1, (const char **)&source, 0, &err1);
printf("program: %ld
", (long)program);
err1 = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
kernel = clCreateKernel(program, "main", &err1);
printf("kernel: %ld
", (long)kernel);
err1 = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
if(err1 == CL_SUCCESS)
printf("successfully executed
");
}
}
and the python port:
#! /usr/bin/python
from ctypes import *
openCL = CDLL(“libOpenCL.so”)
block_size = 16
kernel_dim = 256
DEVICE_TYPE_GPU = 1<<2
intalize opencl
platforms = create_string_buffer(4)
openCL.clGetPlatformIDs (1, platforms, None)
platform = cast(platforms, POINTER(c_int))[0]
device = create_string_buffer(4)
openCL.clGetDeviceIDs(platform, DEVICE_TYPE_GPU, 1, device, None);
device = cast(device, POINTER(c_int))[0]
ctx = openCL.clCreateContext(0, 1, (c_int * 1)(device), None, None, None);
queue = openCL.clCreateCommandQueue(ctx, device, 0, None);
contents = ‘’’ __kernel void main(){}’’’
contents = c_char_p(contents)
contents = (c_char_p * 1)(contents)
repeatedly build & execute kernel
while(True):
program = openCL.clCreateProgramWithSource(ctx, 1, contents, 0, None)
print "PROGRAM:", program
openCL.clBuildProgram(program, 0, None, None, None, None)
main_kernel = openCL.clCreateKernel(program, "main", None)
print "MAIN KERNEL:", main_kernel
openCL.clEnqueueNDRangeKernel(queue, main_kernel, 1, None, (c_long * 1)(kernel_dim), (c_long * 1)(block_size), 0, None, None)
#none of these seem to help
#openCL.clReleaseKernel(main_kernel)
#openCL.clReleaseProgram(program)
#openCL.clFinish(queue)
#openCL.clFlush(queue)