Hi everyone:
I’m a new user of OpenCL and would like to ask the experts out there for your experience and advice on this question.
I am trying to write a ‘struct’ onto the device memory after the command “queue.enqueueWriteBuffer ( - - - )” but unfortunately I am receiving a “-38” error, which represents Invalid Memory Object.
I believe the ‘struct’ that I am using is padded to 8 bytes since I am only grouping two integer variables as shown below
struct test_struct
{
int A;
int B;
};
Also I have attached the kernel code known as “second_cl.cl” – since I am writing my second CL program
struct __attribute__ ((packed)) test_struct
{
int A;
int B;
};
__kernel void second_cl (__global struct test_struct *a, __global struct test_struct *b)
{
int tid = get_global_id(0);
b[tid].A = a[tid].A;
b[tid].B = a[tid].B;
}
As you can see, the kernel simply just takes the structure data from A and writes it to B. This is a simple exercise that I want to do if I wish to use structs in my OpenCL development – which is very likely the case.
Also, below my code (removed any error messages in order to compress the size of this post) is the host code that I have developed.
/////*********************************************************/////
/////***** SOURCE FILE: simple_mem_write_read_struct.cpp *****/////
/////*********************************************************/////
#define __CL_ENABLE_EXCEPTIONS
#include <iostream>
#include <string>
#include <fstream>
#include "CL/cl.hpp"
using namespace std;
struct test_struct
{
int A;
int B;
};
int main(int argc, char *argv[])
{
cl::STRING_CLASS platformName, cl_program_name, cl_kernel_name;
string K_FileName, kernel_name;
cl::Device cl_device; // DEVICE IS GPU
cl::Context cl_context; // CONTEXT IDENTIFIER (AMD + GPU)
cl::Program::Sources cl_source; // SOURCE OF THE KERNEL
cl::Program cl_program; // PROGRAM OBJECT
cl::Kernel cl_kernel_program; // KERNEL PROGRAM OBJECT
cl::Event event; // EVENT
cl::CommandQueue queue; // COMMAND Q TO SUBMIT WORK TO THE GPU
vector <cl::Platform> platformList; // PLATFORM LIST TO QUERY AMD PLATFORM
vector <cl::Device> deviceList; // DEVICE LIST TO QUERY LIST OF GPU CARDS (only one is available)
cl::Buffer device_buf_A, device_buf_B; // Create Buffer Memory
test_struct *host_struct_in = new test_struct [1]; // HOST STRUCT OBJECT (FOR INPUT INTO GPU MEMORY)
test_struct *host_struct_out = new test_struct [1]; // HOST STRUCT OBJECT (FOR OUTPUT FROM GPU MEMORY)
// INITIALIZE RANDOM VALUES //
host_struct_in[0].A = 10;
host_struct_in[0].B = 100;
if (argc == 1)
{
cerr << "----- ERROR! Please specify kernel -----" << endl;
exit(1);
}
else
{
K_FileName = argv[1];
}
// Initialize the CL_SOURCE OBJECT
ifstream kernel_file (K_FileName.c_str());
string kernel_program (istreambuf_iterator<char>(kernel_file), (istreambuf_iterator<char>()));
cl_source = cl::Program::Sources (1, make_pair(kernel_program.c_str(), kernel_program.length() + 1)); // cl_source TO SOURCE OF KERNEL
try
{
cl::Platform::get(&platformList);
cl_context_properties cprops[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(platformList[0])(), 0}; // AUTOMATICALLY CHOOSE THE FIRST SINCE I'AM USING AMD
cl_context = cl::Context (CL_DEVICE_TYPE_GPU, cprops);
platformList[0].getDevices(CL_DEVICE_TYPE_GPU, &deviceList);
cl_device = deviceList[0]; // AUTOMATICALLY CHOOSE THE FIRST SINCE I ONLY HAVE ON GPU DEVICE
cl_program = cl::Program (cl_context, cl_source); // cl_program to CL PROGRAM OBJECT
vector <cl::Device> device_list;
device_list.push_back(cl_device);
// BUILD THE SIMPLE MEM WRITE/READ KERNEL
try
{
cl_program.build(device_list);
}
catch (cl::Error& err)
{
cerr << "Build failed!" << err.what() << '(' << err.err() << ')' << endl;
cerr << "Build log: " << endl << endl << cl_program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device_list[0]);
exit(1);
}
cout << "Build Success! ";
kernel_name = K_FileName.substr(0, K_FileName.find(".cl", 0));
cl_kernel_program = cl::Kernel (cl_program, kernel_name.c_str()); // SET PRIVATE cl_kernel_program TO COMPILED KERNEL OBJECT
cl_kernel_program.getInfo((cl_kernel_info)CL_KERNEL_FUNCTION_NAME, &cl_kernel_name);
cout << "Kernal found: \'" << cl_kernel_name << "\'" << endl;
// Initialize the command queue with the created context and device
queue = cl::CommandQueue (cl_context, cl_device(), 0);
// I GET THE ERROR OVER HERE
queue.enqueueWriteBuffer (device_buf_A, CL_TRUE, 0, sizeof (test_struct) * 1, host_struct_in); // WRITE host_struct_in INTO device_buf_A memory
/*
cl_kernel_program.setArg(0, device_buf_A);
cl_kernel_program.setArg(1, device_buf_B);
queue.enqueueNDRangeKernel(cl_kernel_program, cl::NullRange, cl::NDRange(1), cl::NDRange(1,1), NULL, &event); // EXECUTE THE KERNEL
event.wait();
queue.enqueueReadBuffer(device_buf_B, CL_TRUE, 0, sizeof (test_struct) * 1, host_struct_out); // READ device_buf_B and store it into host_struct_out host memory
*/
}
catch (cl::Error &err)
{
cout << "----- ERROR: " << err.what() << " Code: " << err.err() << endl;
}
}
Summary:
I created two test_structs of host_struct_in and host_struct_out, which the _in holds the input memory to the device, and _out holds the resulting data obtained from the GPU device memory respectively.
From thereafter, I created two buffers, device_buf_A and device_buf_B for host_struct_in and host_struct_out respectively. Both “sizeof”'s are the size of the struct.
In the comment “// I GET THE ERROR OVER HERE” is the location of where the queue.enqueueWriteBuffer command gave me the -38 error as invalid memory object. The code below that command is commented but “compile-able”.
After scouring through a lot of boards and blogs, I am unfortunately at a loss of how to rectify this problem. Can someone please kindly tell / show me how to solve this?
My system information is as follows:
CPU: Phenom II X6 1055T
Mem: 4GB DDR3 OCX
GPU: ATI FireStream 9270 (RV770 architecture)
SDK: ATI Stream SDK 2.3
Catalyst Version: 10.12
If someone can kindly please respond to my post, it would be greatly appreciated!
Thanks and with best regards